In [None]:
# Importing Packages
import pandas as pd
import geopandas as gpd

In [None]:
# Read Crime Data
crime_data = pd.read_csv("CrimesOnWomenData.csv")

In [None]:
# Read Spatial Data
geo_data = gpd.read_file("IND_adm3.json")

In [None]:
# Drop unnecessary column
if 'Unnamed: 0' in crime_data.columns:
    crime_data = crime_data.drop(columns=['Unnamed: 0'])

# Standardize column names
crime_data.rename(columns={'State': 'NAME_1'}, inplace=True)

# Clean state names
crime_data['NAME_1'] = crime_data['NAME_1'].str.upper().str.strip()
geo_data['NAME_1'] = geo_data['NAME_1'].str.upper().str.strip()

# Standardize state names
geo_data['NAME_1'] = geo_data['NAME_1'].replace({
    'ANDAMAN AND NICOBAR': 'ANDAMAN & NICOBAR ISLANDS',
    'DADRA AND NAGAR HAVELI': 'DADRA & NAGAR HAVELI',
    'DAMAN AND DIU': 'DAMAN & DIU',
    'JAMMU AND KASHMIR': 'JAMMU & KASHMIR',
    'NCT OF DELHI': 'DELHI'
})

crime_data['NAME_1'] = crime_data['NAME_1'].replace({
    'A & N ISLANDS': 'ANDAMAN & NICOBAR ISLANDS',
    'D & N HAVELI': 'DADRA & NAGAR HAVELI',
    'D&N HAVELI': 'DADRA & NAGAR HAVELI',
    'DELHI UT': 'DELHI'
})

In [None]:
# Merge Geo + Crime data
merged_data = geo_data.merge(crime_data, on="NAME_1", how="left")

# Fill missing values
merged_data.fillna(0, inplace=True)

# Compute total crimes
merged_data['Total_Crimes'] = merged_data[['Rape', 'K&A', 'DD', 'AoW', 'AoM', 'DV', 'WT']].sum(axis=1)

# Compute total per crime category per state
crime_sums = merged_data.groupby('NAME_1', as_index=False)[['Rape', 'K&A', 'DD', 'AoW', 'AoM', 'DV', 'WT', 'Total_Crimes']].sum()

In [None]:
# Save merged file for use in Dash
crime_sums.to_csv("mergedcrime_data.csv", index=False)

# Optional: Download in Colab
from google.colab import files
files.download('mergedcrime_data.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>