# Viewing the shapefile

In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt

# Path to the shapefile
shapefile_path = "/Users/bishmaybarik/Library/CloudStorage/OneDrive-ShivNadarInstitutionofEminence/Migrant_Loans/01_Raw/Shapefiles/shrug_state_shapefile/state.shp"

# Load the shapefile
gdf = gpd.read_file(shapefile_path)

# Display the first few rows of the GeoDataFrame
print("First few rows of the shapefile data:")
print(gdf.head())

# Display the column names
print("\nColumn names:")
print(gdf.columns)

# Plot the shapefile
gdf.plot(edgecolor='black', figsize=(10, 10))
plt.title("Map of the Shapefile")
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.show()


# Code for generating India Heatmaps

In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from fuzzywuzzy import process
import os

# File paths
shapefile_path = "/Users/bishmaybarik/Library/CloudStorage/OneDrive-ShivNadarInstitutionofEminence/Migrant_Loans/01_Raw/Shapefiles/shrug_state_shapefile/state.shp"
data_folder = "/Users/bishmaybarik/Library/CloudStorage/OneDrive-ShivNadarInstitutionofEminence/Migrant_Loans/02_Output/Migrants"
output_folder = "/Users/bishmaybarik/Library/CloudStorage/OneDrive-ShivNadarInstitutionofEminence/Migrant_Loans/04_Results/Total Migration"

# Ensure the output folder exists
os.makedirs(output_folder, exist_ok=True)

# Years to process
years = [2001, 2011]

# Load the shapefile
gdf = gpd.read_file(shapefile_path)
gdf['s_name'] = gdf['s_name'].str.strip().str.lower()

for year in years:
    dataset_path = os.path.join(data_folder, f"{year}/{year}_migrant_india_no.csv")
    df = pd.read_csv(dataset_path)
    
    # Preprocess state names
    df['state'] = df['state'].str.strip().str.lower()

    # Fuzzy match state names
    state_choices = gdf['s_name'].unique()
    df['matched_state'] = df['state'].apply(lambda x: process.extractOne(x, state_choices)[0])

    # Merge shapefile with dataset
    merged_gdf = gdf.merge(df, left_on='s_name', right_on='matched_state', how='left')

    # Normalize total_migration for better visualization
    merged_gdf['normalized_migration'] = (
        (merged_gdf['total_migration'] - merged_gdf['total_migration'].min()) /
        (merged_gdf['total_migration'].max() - merged_gdf['total_migration'].min())
    )

    # Create the plot
    fig, ax = plt.subplots(1, 1, figsize=(15, 12))

    # Plot the base map with a blue color scheme
    merged_gdf.plot(
        ax=ax, column='total_migration', cmap='Blues', edgecolor='white', linewidth=0.5,
        legend=True, legend_kwds={'label': "Total Migration", 'orientation': "vertical", 'shrink': 0.8}
    )

    # Add labels for each state
    for idx, row in merged_gdf.iterrows():
        x, y = row.geometry.centroid.x, row.geometry.centroid.y
        ax.text(
            x, y, f"{int(row['total_migration']):,}", fontsize=8, ha='center', color='black', weight='bold',
            bbox=dict(facecolor='white', edgecolor='none', boxstyle='round,pad=0.3', alpha=0.7)
        )

    # Title and credits
    ax.set_title(f"Total Migration by State ({year})", fontsize=18, fontweight='bold')
    ax.annotate(f"Source: Census Dataset {year}", xy=(0.1, 0.08), xycoords='figure fraction', fontsize=10, color='gray')
    ax.axis('off')

    # Save the plot as a PNG file
    output_path = os.path.join(output_folder, f"total_migration_{year}.png")
    plt.tight_layout()
    plt.savefig(output_path, dpi=300)
    plt.close()

    print(f"Saved map for {year} at {output_path}")


# Code for Generating Heatmaps for Migration with Duration of Stay

In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from fuzzywuzzy import process
import os

# File paths
shapefile_path = "/Users/bishmaybarik/Library/CloudStorage/OneDrive-ShivNadarInstitutionofEminence/Migrant_Loans/01_Raw/Shapefiles/shrug_state_shapefile/state.shp"
data_folder = "/Users/bishmaybarik/Library/CloudStorage/OneDrive-ShivNadarInstitutionofEminence/Migrant_Loans/02_Output/Migrants"
output_folder = "/Users/bishmaybarik/Library/CloudStorage/OneDrive-ShivNadarInstitutionofEminence/Migrant_Loans/04_Results/Migration with Duration"

# Ensure the output folder exists
os.makedirs(output_folder, exist_ok=True)

# Years to process
years = [2001, 2011]

# Duration of residence categories
duration_categories = [
    "Duration of residence 1-4 years",
    "Duration of residence 10 years and above",
    "Duration of residence 5-9 years",
    "Duration of residence less than 1 year"
]

# Load the shapefile
gdf = gpd.read_file(shapefile_path)
gdf['s_name'] = gdf['s_name'].str.strip().str.lower()

for year in years:
    dataset_path = os.path.join(data_folder, f"{year}/{year}_migrant_india_duration.csv")
    df = pd.read_csv(dataset_path)

    # Preprocess state names
    df['state'] = df['state'].str.strip().str.lower()

    # Fuzzy match state names
    state_choices = gdf['s_name'].unique()
    df['matched_state'] = df['state'].apply(lambda x: process.extractOne(x, state_choices)[0])

    # Process each duration category
    for duration in duration_categories:
        # Filter the dataset for the current duration
        duration_df = df[df['duration_of_residence'] == duration]

        # Merge shapefile with dataset
        merged_gdf = gdf.merge(duration_df, left_on='s_name', right_on='matched_state', how='left')

        # Normalize total_migration for better visualization
        merged_gdf['normalized_migration'] = (
            (merged_gdf['total_migration'] - merged_gdf['total_migration'].min()) /
            (merged_gdf['total_migration'].max() - merged_gdf['total_migration'].min())
        )

        # Create the plot
        fig, ax = plt.subplots(1, 1, figsize=(15, 12))

        # Plot the base map with a blue color scheme
        merged_gdf.plot(
            ax=ax, column='total_migration', cmap='Blues', edgecolor='white', linewidth=0.5,
            legend=True, legend_kwds={'label': "Total Migration", 'orientation': "vertical", 'shrink': 0.8}
        )

        # Add labels for each state
        for idx, row in merged_gdf.iterrows():
            if pd.notna(row['total_migration']):
                x, y = row.geometry.centroid.x, row.geometry.centroid.y
                ax.text(
                    x, y, f"{int(row['total_migration']):,}", fontsize=8, ha='center', color='black', weight='bold',
                    bbox=dict(facecolor='white', edgecolor='none', boxstyle='round,pad=0.3', alpha=0.7)
                )

        # Title and credits
        ax.set_title(f"Total Migration by State ({year}): {duration}", fontsize=18, fontweight='bold')
        ax.annotate(f"Source: Census Dataset {year}", xy=(0.1, 0.08), xycoords='figure fraction', fontsize=10, color='gray')
        ax.axis('off')

        # Save the plot as a PNG file
        duration_slug = duration.lower().replace(" ", "_").replace("-", "to")
        output_path = os.path.join(output_folder, f"migration_{year}_{duration_slug}.png")
        plt.tight_layout()
        plt.savefig(output_path, dpi=300)
        plt.close()

        print(f"Saved map for {year} - {duration} at {output_path}")

# Code for Generating Heatmaps with Reason for Migration

In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from fuzzywuzzy import process
import os

# File paths
shapefile_path = "/Users/bishmaybarik/Library/CloudStorage/OneDrive-ShivNadarInstitutionofEminence/Migrant_Loans/01_Raw/Shapefiles/shrug_state_shapefile/state.shp"
data_folder = "/Users/bishmaybarik/Library/CloudStorage/OneDrive-ShivNadarInstitutionofEminence/Migrant_Loans/02_Output/Migrants"
output_base_folder = "/Users/bishmaybarik/Library/CloudStorage/OneDrive-ShivNadarInstitutionofEminence/Migrant_Loans/04_Results/Reason of Migration"

# Ensure the output folder exists
os.makedirs(output_base_folder, exist_ok=True)

# Years to process
years = [2001, 2011]

# Dependent variables and their descriptive titles
dependent_variables = {
    "people_for_work": "Migration Due to Work",
    "people_for_business": "Migration Due to Business",
    "people_for_education": "Migration Due to Education",
    "people_for_marriage": "Migration Due to Marriage",
    "people_after_birth": "Migration Due to Birth-related Reasons",
    "people_moved_with_hh": "Migration with Household",
    "people_other_reasons": "Migration Due to Other Reasons",
}

# Load the shapefile
gdf = gpd.read_file(shapefile_path)
gdf['s_name'] = gdf['s_name'].str.strip().str.lower()

for year in years:
    # Dataset path for the given year
    dataset_path = os.path.join(data_folder, f"{year}/{year}_migrant_india_reason.csv")
    df = pd.read_csv(dataset_path)
    
    # Preprocess state names
    df['state'] = df['state'].str.strip().str.lower()

    # Fuzzy match state names
    state_choices = gdf['s_name'].unique()
    df['matched_state'] = df['state'].apply(lambda x: process.extractOne(x, state_choices)[0])

    # Merge shapefile with dataset
    merged_gdf = gdf.merge(df, left_on='s_name', right_on='matched_state', how='left')

    # Ensure year-specific output folder exists
    year_output_folder = os.path.join(output_base_folder, str(year))
    os.makedirs(year_output_folder, exist_ok=True)

    for variable, title in dependent_variables.items():
        # Normalize the dependent variable for better visualization
        merged_gdf['normalized'] = (
            (merged_gdf[variable] - merged_gdf[variable].min()) /
            (merged_gdf[variable].max() - merged_gdf[variable].min())
        )

        # Create the plot
        fig, ax = plt.subplots(1, 1, figsize=(15, 12))

        # Plot the base map with a blue color scheme
        merged_gdf.plot(
            ax=ax, column=variable, cmap='Blues', edgecolor='white', linewidth=0.5,
            legend=True, legend_kwds={'label': title, 'orientation': "vertical", 'shrink': 0.8}
        )

        # Add labels for each state
        for idx, row in merged_gdf.iterrows():
            x, y = row.geometry.centroid.x, row.geometry.centroid.y
            ax.text(
                x, y, f"{int(row[variable]):,}" if not pd.isna(row[variable]) else "",
                fontsize=8, ha='center', color='black', weight='bold',
                bbox=dict(facecolor='white', edgecolor='none', boxstyle='round,pad=0.3', alpha=0.7)
            )

        # Title and credits
        ax.set_title(f"{title} by State ({year})", fontsize=18, fontweight='bold')
        ax.annotate(f"Source: Census Dataset {year}", xy=(0.1, 0.08), xycoords='figure fraction', fontsize=10, color='gray')
        ax.axis('off')

        # Save the plot as a PNG file
        output_path = os.path.join(year_output_folder, f"{variable}.png")
        plt.tight_layout()
        plt.savefig(output_path, dpi=300)
        plt.close()

        print(f"Saved map for {title} ({year}) at {output_path}")
