In [None]:
pip install pandas geopandas matplotlib shapely fuzzywuzzy

In [6]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import FancyArrowPatch
from shapely.geometry import Point
from fuzzywuzzy import process
import os

# File paths
data_path = '/Users/bishmaybarik/Library/CloudStorage/OneDrive-ShivNadarInstitutionofEminence/Migrant_Loans/02_Output/People of India/population.csv'
shapefile_path = '/Users/bishmaybarik/Library/CloudStorage/OneDrive-ShivNadarInstitutionofEminence/Migrant_Loans/01_Raw/Shapefiles/shrug_state_shapefile/state.shp'
output_dir = '/Users/bishmaybarik/Library/CloudStorage/OneDrive-ShivNadarInstitutionofEminence/Migrant_Loans/04_Results/People of India/Population Migration'

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Load datasets
migration_data = pd.read_csv(data_path)
india_states = gpd.read_file(shapefile_path)

# Ensure shapefile uses the same CRS as required
india_states = india_states.to_crs("EPSG:4326")

# Fuzzy matching state names in migration data with shapefile
state_names = india_states['s_name'].str.lower().tolist()
def fuzzy_match(name):
    match, score = process.extractOne(name.lower(), state_names)
    return match if score > 80 else None

migration_data['origin_matched'] = migration_data['state'].apply(fuzzy_match)
migration_data['destination_matched'] = migration_data['emigrated_immigrated_state'].apply(fuzzy_match)

# Debugging: Print unmatched states
unmatched_origins = migration_data[migration_data['origin_matched'].isnull()]['state'].unique()
unmatched_destinations = migration_data[migration_data['destination_matched'].isnull()]['emigrated_immigrated_state'].unique()
print("Unmatched Origin States:", unmatched_origins)
print("Unmatched Destination States:", unmatched_destinations)

# Iterate through all unique origin states
unique_origins = migration_data['origin_matched'].dropna().unique()

for origin_state in unique_origins:
    filtered_data = migration_data[migration_data['origin_matched'] == origin_state]

    # Merge with geometry for destination states
    india_states['s_name_lower'] = india_states['s_name'].str.lower()
    filtered_data = filtered_data.merge(india_states[['s_name_lower', 'geometry']], 
                                        left_on='destination_matched', 
                                        right_on='s_name_lower', 
                                        how='left')

    # Get origin state geometry
    origin_geometry = india_states[india_states['s_name_lower'] == origin_state].geometry.iloc[0]

    # Plot the map
    fig, ax = plt.subplots(figsize=(15, 15))
    india_states.plot(ax=ax, color='#D4E6F1', edgecolor='black')

    # Add arrows for migration
    for _, row in filtered_data.iterrows():
        if row['geometry'] and origin_geometry:
            origin_point = origin_geometry.centroid
            destination_point = row['geometry'].centroid

            # Create arrow properties
            migration_scale = row['migration'] / filtered_data['migration'].max() * 10  # Scale arrow thickness
            arrow_style = dict(arrowstyle="fancy", color="darkblue", alpha=0.8, lw=migration_scale)
            arrow = FancyArrowPatch(
                (origin_point.x, origin_point.y), 
                (destination_point.x, destination_point.y),
                connectionstyle="arc3,rad=0.3", **arrow_style
            )
            ax.add_patch(arrow)

    # Add labels for clarity
    for _, row in india_states.iterrows():
        if row['geometry']:
            centroid = row['geometry'].centroid
            plt.text(centroid.x, centroid.y, row['s_name'], fontsize=8, ha='center', color='darkgreen')

    # Final touches
    ax.set_title(f"Migration from {origin_state.title()} to Other States", fontsize=16, color='darkblue')
    ax.set_axis_off()

    # Save the plot
    output_path = os.path.join(output_dir, f"migration_from_{origin_state.replace(' ', '_')}.png")
    plt.savefig(output_path, dpi=300, bbox_inches='tight')
    plt.close(fig)

print("All migration maps have been saved.")


Unmatched Origin States: ['Telangana']
Unmatched Destination States: ['Foreign' 'Telangana' 'Not Applicable']
All migration maps have been saved.


In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import FancyArrowPatch
from shapely.geometry import Point
from fuzzywuzzy import process
import os

# File paths
data_path = '/Users/bishmaybarik/Library/CloudStorage/OneDrive-ShivNadarInstitutionofEminence/Migrant_Loans/02_Output/People of India/population_with_caste.csv'
shapefile_path = '/Users/bishmaybarik/Library/CloudStorage/OneDrive-ShivNadarInstitutionofEminence/Migrant_Loans/01_Raw/Shapefiles/shrug_state_shapefile/state.shp'
output_dir = '/Users/bishmaybarik/Library/CloudStorage/OneDrive-ShivNadarInstitutionofEminence/Migrant_Loans/04_Results/People of India/Population with Caste'

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Load datasets
migration_data = pd.read_csv(data_path)
india_states = gpd.read_file(shapefile_path)

# Ensure shapefile uses the same CRS as required
india_states = india_states.to_crs("EPSG:4326")

# Fuzzy matching state names in migration data with shapefile
state_names = india_states['s_name'].str.lower().tolist()
def fuzzy_match(name):
    match, score = process.extractOne(name.lower(), state_names)
    return match if score > 80 else None

migration_data['origin_matched'] = migration_data['state'].apply(fuzzy_match)
migration_data['destination_matched'] = migration_data['emigrated_immigrated_state'].apply(fuzzy_match)

# Debugging: Print unmatched states
unmatched_origins = migration_data[migration_data['origin_matched'].isnull()]['state'].unique()
unmatched_destinations = migration_data[migration_data['destination_matched'].isnull()]['emigrated_immigrated_state'].unique()
print("Unmatched Origin States:", unmatched_origins)
print("Unmatched Destination States:", unmatched_destinations)

# Assign unique colors to each caste category
caste_categories = migration_data['caste_category'].unique()
colors = plt.cm.tab10(range(len(caste_categories)))
color_map = dict(zip(caste_categories, colors))

# Iterate through all unique origin states
unique_origins = migration_data['origin_matched'].dropna().unique()

for origin_state in unique_origins:
    filtered_data = migration_data[migration_data['origin_matched'] == origin_state]

    # Merge with geometry for destination states
    india_states['s_name_lower'] = india_states['s_name'].str.lower()
    filtered_data = filtered_data.merge(india_states[['s_name_lower', 'geometry']], 
                                        left_on='destination_matched', 
                                        right_on='s_name_lower', 
                                        how='left')

    # Get origin state geometry
    origin_geometry = india_states[india_states['s_name_lower'] == origin_state].geometry.iloc[0]

    # Plot the map
    fig, ax = plt.subplots(figsize=(15, 15))
    india_states.plot(ax=ax, color='#E5F5E0', edgecolor='black')

    # Add arrows for migration
    for _, row in filtered_data.iterrows():
        if row['geometry'] and origin_geometry:
            origin_point = origin_geometry.centroid
            destination_point = row['geometry'].centroid

            # Create arrow properties
            caste_color = color_map[row['caste_category']]
            migration_scale = row['migration'] / filtered_data['migration'].max() * 10  # Scale arrow thickness
            arrow_style = dict(arrowstyle="fancy", color=caste_color, alpha=0.8, lw=migration_scale)
            arrow = FancyArrowPatch(
                (origin_point.x, origin_point.y), 
                (destination_point.x, destination_point.y),
                connectionstyle="arc3,rad=0.3", **arrow_style
            )
            ax.add_patch(arrow)

    # Add legend for caste categories
    handles = [plt.Line2D([0], [0], color=color_map[caste], lw=2, label=caste) for caste in caste_categories]
    ax.legend(handles=handles, loc='upper left', title="Caste Categories", fontsize=10)

    # Add labels for clarity
    for _, row in india_states.iterrows():
        if row['geometry']:
            centroid = row['geometry'].centroid
            plt.text(centroid.x, centroid.y, row['s_name'], fontsize=8, ha='center', color='darkgreen')

    # Final touches
    ax.set_title(f"Migration from {origin_state.title()} to Other States by Caste", fontsize=16, color='darkblue')
    ax.set_axis_off()

    # Save the plot
    output_path = os.path.join(output_dir, f"migration_from_{origin_state.replace(' ', '_')}_by_caste.png")
    plt.savefig(output_path, dpi=300, bbox_inches='tight')
    plt.close(fig)

print("All migration maps by caste have been saved.")


In [None]:
# Updated map generation loop
for origin_state in unique_origins:
    filtered_data = migration_data[migration_data['origin_matched'] == origin_state]

    # Merge with geometry for destination states
    india_states['s_name_lower'] = india_states['s_name'].str.lower()
    filtered_data = filtered_data.merge(india_states[['s_name_lower', 'geometry']], 
                                        left_on='destination_matched', 
                                        right_on='s_name_lower', 
                                        how='left')

    # Get origin state geometry
    origin_geometry = india_states[india_states['s_name_lower'] == origin_state].geometry.iloc[0]

    # Plot the map
    fig, ax = plt.subplots(figsize=(15, 15))
    india_states.plot(ax=ax, color='white', edgecolor='black')  # White background for the map

    # Add arrows for migration, color-coded by migration reason
    for _, row in filtered_data.iterrows():
        if row['geometry'] and origin_geometry:
            origin_point = origin_geometry.centroid
            destination_point = row['geometry'].centroid

            # Create arrow properties
            migration_scale = row['migration'] / filtered_data['migration'].max() * 10  # Scale arrow thickness
            arrow_color = reason_colors[row['emigration_immigration_reason']]
            arrow_style = dict(arrowstyle="fancy", color=arrow_color, alpha=0.8, lw=migration_scale)
            arrow = FancyArrowPatch(
                (origin_point.x, origin_point.y), 
                (destination_point.x, destination_point.y),
                connectionstyle="arc3,rad=0.3", **arrow_style
            )
            ax.add_patch(arrow)

    # Add labels for clarity
    for _, row in india_states.iterrows():
        if row['geometry']:
            centroid = row['geometry'].centroid
            plt.text(centroid.x, centroid.y, row['s_name'], fontsize=8, ha='center', color='darkgreen')

    # Add legend for migration reasons (compact layout)
    handles = [plt.Line2D([0], [0], color=reason_colors[reason], lw=4, label=reason) for reason in reasons]
    legend = ax.legend(
        handles=handles, 
        title="Migration Reasons", 
        fontsize=8, title_fontsize=10, 
        loc='lower center', 
        bbox_to_anchor=(0.5, -0.2),  # Position legend below map
        ncol=3,  # Arrange in 3 columns
        frameon=False
    )

    # Final touches
    ax.set_title(f"Migration from {origin_state.title()} by Reason", fontsize=16, color='darkblue')
    ax.set_axis_off()

    # Save the plot
    output_path = os.path.join(output_dir, f"migration_from_{origin_state.replace(' ', '_')}.png")
    plt.savefig(output_path, dpi=300, bbox_inches='tight')
    plt.close(fig)

print("All migration maps by reason have been saved.")


# Formal Borrowings

In [5]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import FancyArrowPatch
from shapely.geometry import Point
from fuzzywuzzy import process
import os

# File paths
data_path = '/Users/bishmaybarik/Library/CloudStorage/OneDrive-ShivNadarInstitutionofEminence/Migrant_Loans/02_Output/People of India/population.csv'
migration_loans_path = '/Users/bishmaybarik/Library/CloudStorage/OneDrive-ShivNadarInstitutionofEminence/Migrant_Loans/02_Output/People of India/mig_state_loans.csv'
shapefile_path = '/Users/bishmaybarik/Library/CloudStorage/OneDrive-ShivNadarInstitutionofEminence/Migrant_Loans/01_Raw/Shapefiles/shrug_state_shapefile/state.shp'
output_dir = '/Users/bishmaybarik/Library/CloudStorage/OneDrive-ShivNadarInstitutionofEminence/Migrant_Loans/04_Results/People of India/Population Migration/Formal'
borrowings_output_dir = '/Users/bishmaybarik/Library/CloudStorage/OneDrive-ShivNadarInstitutionofEminence/Migrant_Loans/04_Results/CPHS with Loan/Total Population'

# Ensure output directories exist
os.makedirs(output_dir, exist_ok=True)
os.makedirs(borrowings_output_dir, exist_ok=True)

# Load datasets
migration_data = pd.read_csv(data_path)
migration_loans_data = pd.read_csv(migration_loans_path)
india_states = gpd.read_file(shapefile_path)

# Ensure shapefile uses the same CRS as required
india_states = india_states.to_crs("EPSG:4326")

# Fuzzy matching state names in migration data with shapefile
state_names = india_states['s_name'].str.lower().tolist()
def fuzzy_match(name):
    match, score = process.extractOne(name.lower(), state_names)
    return match if score > 80 else None

migration_data['origin_matched'] = migration_data['state'].apply(fuzzy_match)
migration_data['destination_matched'] = migration_data['emigrated_immigrated_state'].apply(fuzzy_match)
migration_loans_data['state_matched'] = migration_loans_data['state'].apply(fuzzy_match)

# Debugging: Print unmatched states
unmatched_origins = migration_data[migration_data['origin_matched'].isnull()]['state'].unique()
unmatched_destinations = migration_data[migration_data['destination_matched'].isnull()]['emigrated_immigrated_state'].unique()
unmatched_loans = migration_loans_data[migration_loans_data['state_matched'].isnull()]['state'].unique()
print("Unmatched Origin States:", unmatched_origins)
print("Unmatched Destination States:", unmatched_destinations)
print("Unmatched Loan States:", unmatched_loans)

# Merge migration loans data with shapefile data
india_states['s_name_lower'] = india_states['s_name'].str.lower()
india_states = india_states.merge(
    migration_loans_data[['state_matched', 'formal_borr']],
    left_on='s_name_lower',
    right_on='state_matched',
    how='left'
)

# Debugging: Check for states without data
missing_states = india_states[india_states['formal_borr'].isnull()]['s_name']
print("States without data for formal borrowings:", missing_states.tolist())

# Iterate through all unique origin states
unique_origins = migration_data['origin_matched'].dropna().unique()

for origin_state in unique_origins:
    filtered_data = migration_data[migration_data['origin_matched'] == origin_state]

    # Merge with geometry for destination states
    filtered_data = filtered_data.merge(india_states[['s_name_lower', 'geometry']], 
                                        left_on='destination_matched', 
                                        right_on='s_name_lower', 
                                        how='left')

    # Get origin state geometry
    origin_geometry = india_states[india_states['s_name_lower'] == origin_state].geometry.iloc[0]

    # Plot the map
    fig, ax = plt.subplots(figsize=(15, 15))

    # Heatmap for formal borrowings
    india_states.plot(
        column='formal_borr',
        cmap='YlOrRd',
        legend=True,
        legend_kwds={'label': "Average Formal Borrowings", 'shrink': 0.6},
        ax=ax
    )

    # Add migration arrows
    for _, row in filtered_data.iterrows():
        if row['geometry'] and origin_geometry:
            origin_point = origin_geometry.centroid
            destination_point = row['geometry'].centroid

            # Create arrow properties
            migration_scale = row['migration'] / filtered_data['migration'].max() * 10  # Scale arrow thickness
            arrow_style = dict(arrowstyle="fancy", color="darkblue", alpha=0.8, lw=migration_scale)
            arrow = FancyArrowPatch(
                (origin_point.x, origin_point.y), 
                (destination_point.x, destination_point.y),
                connectionstyle="arc3,rad=0.3", **arrow_style
            )
            ax.add_patch(arrow)

    # Add labels for clarity
    for _, row in india_states.iterrows():
        if row['geometry']:
            centroid = row['geometry'].centroid
            plt.text(centroid.x, centroid.y, row['s_name'], fontsize=8, ha='center', color='darkgreen')

    # Final touches
    ax.set_title(f"Migration from {origin_state.title()} to Other States", fontsize=16, color='darkblue')
    ax.set_axis_off()

    # Save the plot
    output_path = os.path.join(output_dir, f"migration_from_{origin_state.replace(' ', '_')}.png")
    plt.savefig(output_path, dpi=300, bbox_inches='tight')
    plt.close(fig)

print("All migration maps have been saved.")

# Plot heatmap for formal borrowings only
fig, ax = plt.subplots(figsize=(15, 15))

india_states.plot(
    column='formal_borr',
    cmap='YlOrRd',
    legend=True,
    legend_kwds={'label': "Average Formal Borrowings", 'shrink': 0.6},
    ax=ax
)

# Add labels for state names
for _, row in india_states.iterrows():
    if row['geometry']:
        centroid = row['geometry'].centroid
        plt.text(
            centroid.x,
            centroid.y,
            row['s_name'],
            fontsize=8,
            ha='center',
            color='darkgreen',
        )

# Final touches
ax.set_title("Heatmap of Formal Borrowings by State", fontsize=16, color='darkblue')
ax.set_axis_off()

# Save the heatmap
heatmap_output_path = os.path.join(borrowings_output_dir, "formal_borrowings_heatmap.png")
plt.savefig(heatmap_output_path, dpi=300, bbox_inches='tight')
plt.close(fig)

print("Heatmap of formal borrowings saved successfully.")


Unmatched Origin States: ['Telangana']
Unmatched Destination States: ['Foreign' 'Telangana' 'Not Applicable']
Unmatched Loan States: ['Telangana']
States without data for formal borrowings: ['Arunachal Pradesh', 'Nagaland', 'Manipur', 'Mizoram', 'Daman and Diu', 'Dadra and Nagar Haveli', 'Lakshadweep', 'Andaman and Nicobar Islands']
All migration maps have been saved.
Heatmap of formal borrowings saved successfully.


# Informal Borrowings

In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import FancyArrowPatch
from shapely.geometry import Point
from fuzzywuzzy import process
import os

# File paths
data_path = '/Users/bishmaybarik/Library/CloudStorage/OneDrive-ShivNadarInstitutionofEminence/Migrant_Loans/02_Output/People of India/population.csv'
migration_loans_path = '/Users/bishmaybarik/Library/CloudStorage/OneDrive-ShivNadarInstitutionofEminence/Migrant_Loans/02_Output/People of India/mig_state_loans.csv'
shapefile_path = '/Users/bishmaybarik/Library/CloudStorage/OneDrive-ShivNadarInstitutionofEminence/Migrant_Loans/01_Raw/Shapefiles/shrug_state_shapefile/state.shp'
output_dir = '/Users/bishmaybarik/Library/CloudStorage/OneDrive-ShivNadarInstitutionofEminence/Migrant_Loans/04_Results/People of India/Population Migration/Informal'
borrowings_output_dir = '/Users/bishmaybarik/Library/CloudStorage/OneDrive-ShivNadarInstitutionofEminence/Migrant_Loans/04_Results/CPHS with Loan/Total Population'

# Ensure output directories exist
os.makedirs(output_dir, exist_ok=True)
os.makedirs(borrowings_output_dir, exist_ok=True)

# Load datasets
migration_data = pd.read_csv(data_path)
migration_loans_data = pd.read_csv(migration_loans_path)
india_states = gpd.read_file(shapefile_path)

# Ensure shapefile uses the same CRS as required
india_states = india_states.to_crs("EPSG:4326")

# Fuzzy matching state names in migration data with shapefile
state_names = india_states['s_name'].str.lower().tolist()
def fuzzy_match(name):
    match, score = process.extractOne(name.lower(), state_names)
    return match if score > 80 else None

migration_data['origin_matched'] = migration_data['state'].apply(fuzzy_match)
migration_data['destination_matched'] = migration_data['emigrated_immigrated_state'].apply(fuzzy_match)
migration_loans_data['state_matched'] = migration_loans_data['state'].apply(fuzzy_match)

# Debugging: Print unmatched states
unmatched_origins = migration_data[migration_data['origin_matched'].isnull()]['state'].unique()
unmatched_destinations = migration_data[migration_data['destination_matched'].isnull()]['emigrated_immigrated_state'].unique()
unmatched_loans = migration_loans_data[migration_loans_data['state_matched'].isnull()]['state'].unique()
print("Unmatched Origin States:", unmatched_origins)
print("Unmatched Destination States:", unmatched_destinations)
print("Unmatched Loan States:", unmatched_loans)

# Merge migration loans data with shapefile data
india_states['s_name_lower'] = india_states['s_name'].str.lower()
india_states = india_states.merge(
    migration_loans_data[['state_matched', 'informal_borr']],
    left_on='s_name_lower',
    right_on='state_matched',
    how='left'
)

# Debugging: Check for states without data
missing_states = india_states[india_states['informal_borr'].isnull()]['s_name']
print("States without data for formal borrowings:", missing_states.tolist())

# Iterate through all unique origin states
unique_origins = migration_data['origin_matched'].dropna().unique()

for origin_state in unique_origins:
    filtered_data = migration_data[migration_data['origin_matched'] == origin_state]

    # Merge with geometry for destination states
    filtered_data = filtered_data.merge(india_states[['s_name_lower', 'geometry']], 
                                        left_on='destination_matched', 
                                        right_on='s_name_lower', 
                                        how='left')

    # Get origin state geometry
    origin_geometry = india_states[india_states['s_name_lower'] == origin_state].geometry.iloc[0]

    # Plot the map
    fig, ax = plt.subplots(figsize=(15, 15))

    # Heatmap for formal borrowings
    india_states.plot(
        column='informal_borr',
        cmap='Purples', #YlOrRd
        legend=True,
        legend_kwds={'label': "Average Formal Borrowings", 'shrink': 0.6},
        ax=ax
    )

    # Add migration arrows
    for _, row in filtered_data.iterrows():
        if row['geometry'] and origin_geometry:
            origin_point = origin_geometry.centroid
            destination_point = row['geometry'].centroid

            # Create arrow properties
            migration_scale = row['migration'] / filtered_data['migration'].max() * 10  # Scale arrow thickness
            arrow_style = dict(arrowstyle="fancy", color="darkblue", alpha=0.8, lw=migration_scale)
            arrow = FancyArrowPatch(
                (origin_point.x, origin_point.y), 
                (destination_point.x, destination_point.y),
                connectionstyle="arc3,rad=0.3", **arrow_style
            )
            ax.add_patch(arrow)

    # Add labels for clarity
    for _, row in india_states.iterrows():
        if row['geometry']:
            centroid = row['geometry'].centroid
            plt.text(centroid.x, centroid.y, row['s_name'], fontsize=8, ha='center', color='darkgreen')

    # Final touches
    ax.set_title(f"Migration from {origin_state.title()} to Other States", fontsize=16, color='darkblue')
    ax.set_axis_off()

    # Save the plot
    output_path = os.path.join(output_dir, f"migration_from_{origin_state.replace(' ', '_')}.png")
    plt.savefig(output_path, dpi=300, bbox_inches='tight')
    plt.close(fig)

print("All migration maps have been saved.")

# Plot heatmap for formal borrowings only
fig, ax = plt.subplots(figsize=(15, 15))

india_states.plot(
    column='informal_borr',
    cmap='Purples',
    legend=True,
    legend_kwds={'label': "Average Formal Borrowings", 'shrink': 0.6},
    ax=ax
)

# Add labels for state names
for _, row in india_states.iterrows():
    if row['geometry']:
        centroid = row['geometry'].centroid
        plt.text(
            centroid.x,
            centroid.y,
            row['s_name'],
            fontsize=8,
            ha='center',
            color='darkgreen',
        )

# Final touches
ax.set_title("Heatmap of Formal Borrowings by State", fontsize=16, color='darkblue')
ax.set_axis_off()

# Save the heatmap
heatmap_output_path = os.path.join(borrowings_output_dir, "informal_borrowings_heatmap.png")
plt.savefig(heatmap_output_path, dpi=300, bbox_inches='tight')
plt.close(fig)

print("Heatmap of formal borrowings saved successfully.")
