In [50]:
import pandas as pd
import geopandas as gpd
import numpy as np
import plotly.express as px
import os

#!pip install openpyxl geopandas boto3 requests matplotlib plotly nbformat

In [38]:
def read_file(file_path):
    # Extract the file extension
    file_extension = os.path.splitext(file_path)  
    file_extension = file_extension[1:].lower()
    # Read CSV
    if file_extension == 'csv':
        return pd.read_csv(file_path)
    
    # Read Excel
    elif file_extension in ['xls', 'xlsx']:
        return pd.read_excel(file_path, engine='openpyxl')
    
    else:
        raise ValueError(f"Unsupported file format: {file_extension}")
    

In [51]:
file_path = '/home/akshay/Documents/akshaygrows/Clustering/mumbai2data.xlsx'
dataframe = read_file(file_path)
print(dataframe.head())


   pincode  hub cluster  l7_drr
0   400001  KRL  FRT 26     NaN
1   400002  KRL  KLB 22     NaN
2   400003  KRL  MSJ 25     NaN
3   400004  KRL  KLB 22     NaN
4   400005  KRL  FRT 26     NaN


In [40]:
local_geojson_path = '/home/akshay/Documents/akshaygrows/India_Pincodes/india_pincodes.shp'

# Load the GeoJSON content into a GeoDataFrame
gdf_geojson = gpd.read_file(local_geojson_path)

In [44]:
# Ensure the pincode column datatype is consistent in both dataframes
dataframe['pincode'] = dataframe['pincode'].astype(str)
gdf_geojson['pincode'] = gdf_geojson['pincode'].astype(str)

# Merge the GeoDataFrame and DataFrame based on the pincode column
merged_gdf = gdf_geojson.merge(dataframe, on='pincode')

# Round up the l7_drr column values and clusters to string
merged_gdf['l7_drr'].fillna(0, inplace=True)
merged_gdf['l7_drr'] = np.ceil(merged_gdf['l7_drr']).astype(int)
merged_gdf['cluster'] = merged_gdf['cluster'].astype(str)

In [45]:
# generating correct centroid
merged_gdf_projected = merged_gdf.to_crs(epsg=32644)  # Convert to UTM zone 44N (EPSG:32644)
centroid = merged_gdf_projected.geometry.centroid   # Calculate centroid
centroid_wgs84 = centroid.to_crs(epsg=4326) # Convert centroid back to original CRS (WGS 84) for plotting

In [49]:
#function to generate graphs based on clusters or drr input
def plot_map(merged_gdf, preference='clusters'):
    if preference == 'clusters':
        fig = px.choropleth_mapbox(merged_gdf,
                           geojson=merged_gdf.geometry,
                           locations=merged_gdf.index,
                           color='cluster',
                           hover_data=['pincode', 'l7_drr', 'cluster'],
                           mapbox_style="carto-positron",
                           title="Clusters by Pincode",
                           center={"lat": centroid_wgs84.y.mean(), "lon": centroid_wgs84.x.mean()},
                           zoom=10,
                           opacity=0.5,
                           color_discrete_sequence=px.colors.qualitative.Set1)

    elif preference == 'drr':
        fig = px.choropleth_mapbox(merged_gdf,
                                   geojson=merged_gdf.geometry,
                                   locations=merged_gdf.index,
                                   color='l7_drr',
                                   hover_data=['pincode', 'l7_drr', 'cluster'],
                                   mapbox_style="carto-positron",
                                   title="L7 DRR by Pincode",
                                   center={"lat": centroid_wgs84.y.mean(), "lon": centroid_wgs84.x.mean()},
                                   zoom=10,
                                   opacity=0.5,
                                   color_continuous_scale="RdBu_r",
                                   range_color=[merged_gdf['l7_drr'].min(), merged_gdf['l7_drr'].max()]
                                   )
    else:
        print("Invalid preference provided. Please choose 'clusters' or 'drr'.")
        return
    
    fig.update_layout(width=800, height=600, mapbox=dict(bearing=0, pitch=0, zoom=9.5))
    return fig

In [None]:
# Example usage:
fig = plot_map(merged_gdf, preference='drr')
fig.show()
# fig.write_html('mumbai_drr_based.html')