## Install required Python packages
!pip install pandas
!pip install numpy
!pip install folium

In [1]:
# Import Python packages 
import pandas as pd
import numpy as np
import folium
from folium.features import DivIcon

In [2]:
# load employees data with pincodes (PinCode is a mandatory column)
empdf = pd.read_csv('employee_pincodes.csv')
empdf.head()

Unnamed: 0,ID,PinCode,State_name,District_name
0,1,500035,Telangana,RANGAREDDY
1,2,500085,Telangana,HYDERABAD
2,3,506145,Telangana,Hanamkonda
3,4,500039,Telangana,RANGAREDDY
4,5,530049,Andhra Pradesh,VISAKHAPATNAM


In [3]:
# load India pincodes data with lat long
# refer https://pypi.org/project/pgeocode/ for postal codes of different countries
pindf = pd.read_csv('India_postal_codes_with_lat_long.csv')
pindf.head()

Unnamed: 0,country_code,postal_code,place_name,state_name,state_code,county_name,county_code,community_name,community_code,latitude,longitude,accuracy
0,IN,744301,Sawai,Andaman & Nicobar Islands,1,Nicobar,638.0,Carnicobar,,7.5166,93.6031,4
1,IN,744301,Lapathy,Andaman & Nicobar Islands,1,Nicobar,638.0,Carnicobar,,9.1833,92.7667,3
2,IN,744301,Mus,Andaman & Nicobar Islands,1,Nicobar,638.0,Carnicobar,,9.2333,92.7833,4
3,IN,744301,Carnicobar,Andaman & Nicobar Islands,1,Nicobar,638.0,Carnicobar,,9.1833,92.7667,3
4,IN,744301,Kakana,Andaman & Nicobar Islands,1,Nicobar,638.0,Carnicobar,,9.1167,92.8,4


In [4]:
# Data pre-processing
pin_latlon_df = pindf.groupby(by=['postal_code']).agg({'latitude':['mean'],'longitude':['mean']}).reset_index()
pin_latlon_df.columns = ['postal_code','latitude','longitude']
pin_latlon_df['county_name'] = pin_latlon_df.postal_code.apply(lambda x: pindf[pindf.postal_code==x].county_name.mode()[0] )

emp_count_df = pd.DataFrame(empdf.PinCode.value_counts()).rename(columns={'PinCode':'emp_count'})
emp_count_df.index.names = ['postal_code']

emp_count_latlon_df = emp_count_df.join(pin_latlon_df.set_index('postal_code'))
emp_count_latlon_loc_df = emp_count_latlon_df.groupby(by=['county_name']).agg({'latitude':['mean'],'longitude':['mean'],'emp_count':['sum']}).reset_index()
emp_count_latlon_loc_df.columns = ['county_name','latitude','longitude','emp_count']
emp_count_latlon_loc_df.head()

Unnamed: 0,county_name,latitude,longitude,emp_count
0,Adilabad,19.121611,78.957587,23
1,Akola,20.497348,77.681077,6
2,Aligarh,27.601778,78.609485,1
3,Allahabad,25.494742,81.5273,1
4,Amravati,21.1283,77.5561,1


In [5]:
# plot employee density on map
center_lat = emp_count_latlon_loc_df.latitude.mean()
center_long = emp_count_latlon_loc_df.longitude.mean()

m = folium.Map(location=[center_lat, center_long], zoom_start=5)

for _,row in emp_count_latlon_loc_df.dropna().iterrows():
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius = np.sqrt(row['emp_count']),
        popup= f"Location: {row['county_name']}, Employee count: {row['emp_count']}, Employee family count: {row['emp_count']*4}",
        tooltip = '{}: {}'.format(row['county_name'], row['emp_count']),
        color="orange",
        fill_color="orange"
    ).add_to(m)

    m.add_child(
        folium.Marker(
            location=[row['latitude'], row['longitude']],
            icon=DivIcon(
                icon_size=(100,36),
                icon_anchor=(7,15),
                html='<div style="font-size: 10pt; color : black">{}</div>'.format(row['emp_count']),
                )
            )
        )

m

In [6]:
m.save('Employee_distribution_India.html')