# Bihar Maps

In [None]:
# load libraries
!pip install pyarrow
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
from pathlib import Path
from matplotlib.ticker import FuncFormatter

In [None]:
# load and view our geopandas dataframe
file_path = Path('india_2011_district.shp')
geo_df = gpd.read_file(file_path)

# filter for Bihar
geo_df = geo_df[geo_df['st_nm'] == "Bihar"]

# now let's preview what our map looks like with no data in it
geo_df.plot()

In [None]:
# clean data
geo_df.drop(['cartodb_id','censuscode','dt_cen_cd','st_cen_cd','st_nm'],axis=1,inplace=True)
geo_df['district'] = geo_df['district'].str.upper()
geo_df.sort_values(by='district',inplace=True)
geo_df.rename({'district':'District'},axis=1,inplace=True)
geo_df.set_index("District",inplace=True)
geo_df.head()

In [None]:
# load in new csv file
file_path = Path('nss_bihar.csv')
demo_df = pd.read_csv(file_path)

# clean data
demo_df['District'] = demo_df['District'].str.upper()
demo_df.drop("Unnamed: 0", axis=1, inplace=True)
demo_df = demo_df[['HHID','District','hh_size','rural_urban','Religion']]
demo_df

## Mean Household Size

This section creates a cloropleth map of the mean household size by district in Bihar.

In [None]:
# aggregate by district
df_hhsize = demo_df.groupby(['District'])['hh_size'].mean().to_frame()
df_hhsize.head()

In [None]:
# check that length of data matches
len(df_hhsize.index) == len(geo_df.index)

In [None]:
#check data types are the same before merging
df_hhsize.dtypes

In [None]:
#check data types are the same before merging
geo_df.dtypes

In [None]:
# join the geodataframe with the cleaned up csv dataframe
merged = geo_df.join(df_hhsize)
merged = gpd.GeoDataFrame(merged)
merged.head()

In [None]:
# # Categorical
# gb_merged = merged.groupby(['District'])['Religion'].value_counts(normalize=True).to_frame(name = 'islam_pct').reset_index()
# gb_merged = gb_merged.set_index('District')
# gb_merged.head(100)

# Numeric
# gb_merged = merged.groupby(['District'])['hh_size'].mean().to_frame(name = 'Mean Household Size').reset_index()
# gb_merged = gb_merged.set_index('District')
# gb_merged.head(100)

In [None]:
merged.dtypes

In [None]:
merged['hh_size'].max()

In [None]:
# set a variable that will call whatever column we want to visualise on the map
variable = 'hh_size'

# set the range for the choropleth
vmin, vmax = 0, 6

# create figure and axes for Matplotlib
fig, ax = plt.subplots(1, figsize=(10, 6))

# create map
merged.plot(column=variable, cmap='YlGnBu', linewidth=0.5, ax=ax, edgecolor='0.0')

# Now we can customise and add annotations

# remove the axis
ax.axis('off')

# add a title
ax.set_title('Mean Household Size', \
              fontdict={'fontsize': '20',
                        'fontweight' : '3'})

# create an annotation for the  data source
ax.annotate('Sources: http://www.microdata.gov.in/NADA43/index.php/catalog/126/related_materials (Statistics)\nMIT GeoWeb Data (ESRI Shapefile)',
           xy=(0.1, .08), xycoords='figure fraction',
           horizontalalignment='left', verticalalignment='top',
           fontsize=10, color='#555555')

# Create colorbar as a legend
sm = plt.cm.ScalarMappable(cmap='YlGnBu', norm=plt.Normalize(vmin=vmin, vmax=vmax))
sm._A = []
comma_fmt = FuncFormatter(lambda x, p: format(int(x), ','))
cbar = fig.colorbar(sm, format=comma_fmt)
#cbar.ax.set_title('Thousands') #for horizontal label
cbar.set_label('Mean Household Size')

# this will save the figure as a high-res png. 
fig.savefig('hh_size.png', dpi=300)


## Religion Percentage

This section creates a cloropleth map of the proportion of Muslim households by district in Bihar.

In [None]:
# aggregate by district
df_religion = demo_df.groupby(['District'])['Religion'].value_counts(normalize=True).to_frame(name = 'Percentage').reset_index()
df_religion = df_religion.set_index('District')

# filter for Hinduism
df_religion = df_religion[df_religion['Religion'] == "Hinduism"]
df_religion.head()

In [None]:
# check that length of data matches
len(df_religion.index) == len(geo_df.index)

In [None]:
#check data types are the same before merging
df_religion.dtypes

In [None]:
#check data types are the same before merging
geo_df.dtypes

In [None]:
# join the geodataframe with the cleaned up csv dataframe
merged_religion = geo_df.join(df_religion)
merged_religion = gpd.GeoDataFrame(merged_religion)
merged_religion.head()

In [None]:
# Categorical

# set a variable that will call whatever column we want to visualise on the map
variable = 'Percentage'

vmin, vmax = 0, 100

# create figure and axes for Matplotlib
fig, ax = plt.subplots(1, figsize=(10, 6))

# create map
merged_religion.plot(column=variable, categorical=False,cmap='Blues', linewidth=0.6, ax=ax, edgecolor='0.2', legend=True)

# Now we can customise and add annotations

# ax.legend(loc='lower right',
#       fontsize=15,
#       frameon=True)

# remove the axis
ax.axis('off')

# # add a title
ax.set_title('Percent Hindu Population', \
               fontdict={'fontsize': '20',
                         'fontweight' : '3'})

# create an annotation for the  data source
ax.annotate('Sources: \nwww.census2011.co.in/census/state/districtlist/maharashtra.html (Statistics)\nMIT GeoWeb Data (ESRI Shapefile)',
            xy=(0.1, .08), xycoords='figure fraction',
           horizontalalignment='left', verticalalignment='top',
            fontsize=10, color='#555555')

# Create colorbar as a legend
# sm = plt.cm.ScalarMappable(cmap='Blues')
# sm._A = []
# comma_fmt = FuncFormatter(lambda x, p: format(int(x), ','))
# cbar = fig.colorbar(sm, format=comma_fmt)
# cbar.ax.set_title('Thousands') #for horizontal label
# cbar.set_label('Total Per Capita Household Expenditures')
               
# this will save the figure as a high-res png. 
fig.savefig('religion.png', dpi=300)

## Rural-Urban Percentage

In [None]:
# aggregate by district
df_urban = demo_df.groupby(['District'])['rural_urban'].value_counts(normalize=True).to_frame(name = 'Percentage').reset_index()
df_urban = df_urban.set_index('District')

# filter for Hinduism
df_urban = df_urban[df_urban['rural_urban'] == "urban"]
df_urban.head()

In [None]:
# check that length of data matches
len(df_urban.index) == len(geo_df.index)

In [None]:
# join the geodataframe with the cleaned up csv dataframe
merged_urban = geo_df.join(df_urban)
merged_urban  = gpd.GeoDataFrame(merged_urban)
merged_urban.head()

In [None]:
# Categorical

# set a variable that will call whatever column we want to visualise on the map
variable = 'Percentage'

vmin, vmax = 0, 100

# create figure and axes for Matplotlib
fig, ax = plt.subplots(1, figsize=(10, 6))

# create map
merged_urban.plot(column=variable, categorical=False,cmap='Blues', linewidth=0.6, ax=ax, edgecolor='0.2', legend=True)

# Now we can customise and add annotations

# ax.legend(loc='lower right',
#       fontsize=15,
#       frameon=True)

# remove the axis
ax.axis('off')

# # add a title
ax.set_title('Percent Urban Population', \
               fontdict={'fontsize': '20',
                         'fontweight' : '3'})

# create an annotation for the  data source
ax.annotate('Sources: \nwww.census2011.co.in/census/state/districtlist/maharashtra.html (Statistics)\nMIT GeoWeb Data (ESRI Shapefile)',
            xy=(0.1, .08), xycoords='figure fraction',
           horizontalalignment='left', verticalalignment='top',
            fontsize=10, color='#555555')

# # Create colorbar as a legend
# sm = plt.cm.ScalarMappable(cmap='Blues')
# sm._A = []
# comma_fmt = FuncFormatter(lambda x, p: format(int(x), ','))
# cbar = fig.colorbar(sm, format=comma_fmt)
# cbar.ax.set_title('Thousands') #for horizontal label
# cbar.set_label('Total Per Capita Household Expenditures')
               
# this will save the figure as a high-res png. 
fig.savefig('ruralurban.png', dpi=300)

## Mean Fruit Consumption

In [None]:
# load in new csv file
file_path = Path('qhat.csv')
qhat_df = pd.read_csv(file_path)
qhat_df.columns

In [None]:
qhat_df = qhat_df[['j','apple','guava','potato']]
qhat_df

In [None]:
# create a household-district map using NSS data
hh_dist_map = demo_df.set_index('HHID')
hh_dist_map = hh_dist_map[['District']]
hh_dist_map = hh_dist_map['District'].to_dict()

In [None]:
qhat_df['district'] = qhat_df['j'].map(hh_dist_map)
qhat_df

In [None]:
# aggregate apple by district
apple_df = qhat_df.groupby(['district'])['apple'].mean().to_frame()
apple_df['apple'] = apple_df['apple']/1000
apple_df.head()

In [None]:
# aggregate guava by district
guava_df = qhat_df.groupby(['district'])['guava'].mean().to_frame()
guava_df['guava'] = guava_df['guava']/1000
guava_df.head()

In [None]:
# aggregate tomato by district
potato_df = qhat_df.groupby(['district'])['potato'].mean().to_frame()
potato_df['potato'] = potato_df['potato']/1000
potato_df.head()

In [None]:
# check that length of data matches
len(potato_df.index) == len(geo_df.index)

In [None]:
# check that length of data matches
len(guava_df.index) == len(geo_df.index)

In [None]:
# check that length of data matches
len(potato_df.index) == len(geo_df.index)

In [None]:
# join the geodataframe with the cleaned up csv dataframe
merged_apple = geo_df.join(apple_df)
merged_apple = gpd.GeoDataFrame(merged_apple)
merged_apple.head()

In [None]:
merged_apple['apple'].max()

In [None]:
# set a variable that will call whatever column we want to visualise on the map
variable = 'apple'

# set the range for the choropleth
vmin, vmax = 0, 1.5

# create figure and axes for Matplotlib
fig, ax = plt.subplots(1, figsize=(10, 6))

# create map
merged_apple.plot(column=variable, cmap='YlGnBu', linewidth=0.5, ax=ax, edgecolor='0.0')

# Now we can customise and add annotations

# remove the axis
ax.axis('off')

# add a title
ax.set_title('Mean Monthly Apple Consumption', \
              fontdict={'fontsize': '20',
                        'fontweight' : '3'})

# create an annotation for the  data source
ax.annotate('Sources: http://www.microdata.gov.in/NADA43/index.php/catalog/126/related_materials (Statistics)\nMIT GeoWeb Data (ESRI Shapefile)',
           xy=(0.1, .08), xycoords='figure fraction',
           horizontalalignment='left', verticalalignment='top',
           fontsize=10, color='#555555')

# Create colorbar as a legend
sm = plt.cm.ScalarMappable(cmap='YlGnBu', norm=plt.Normalize(vmin=vmin, vmax=vmax))
sm._A = []
comma_fmt = FuncFormatter(lambda x, p: format(round(x,4), ','))
cbar = fig.colorbar(sm, format=comma_fmt)
#cbar.ax.set_title('Thousands') #for horizontal label
cbar.set_label('Mean Apple Consumption (kg per Month)')

# this will save the figure as a high-res png. 
fig.savefig('apple.png', dpi=600, facecolor='white', transparent=False)

In [None]:
# join the geodataframe with the cleaned up csv dataframe
merged_guava = geo_df.join(guava_df)
merged_guava = gpd.GeoDataFrame(merged_guava)
merged_guava.head()

In [None]:
merged_guava['guava'].max()

In [None]:
# set a variable that will call whatever column we want to visualise on the map
variable = 'guava'

# set the range for the choropleth
vmin, vmax = 0, 1.5

# create figure and axes for Matplotlib
fig, ax = plt.subplots(1, figsize=(10, 6))

# create map
merged_guava.plot(column=variable, cmap='YlGnBu', linewidth=0.5, ax=ax, edgecolor='0.0')

# Now we can customise and add annotations

# remove the axis
ax.axis('off')

# add a title
ax.set_title('Mean Monthly Guava Consumption', \
              fontdict={'fontsize': '20',
                        'fontweight' : '3'})

# create an annotation for the  data source
ax.annotate('Sources: http://www.microdata.gov.in/NADA43/index.php/catalog/126/related_materials (Statistics)\nMIT GeoWeb Data (ESRI Shapefile)',
           xy=(0.1, .08), xycoords='figure fraction',
           horizontalalignment='left', verticalalignment='top',
           fontsize=10, color='#555555')

# Create colorbar as a legend
sm = plt.cm.ScalarMappable(cmap='YlGnBu', norm=plt.Normalize(vmin=vmin, vmax=vmax))
sm._A = []
comma_fmt = FuncFormatter(lambda x, p: format(round(x,2), ','))
cbar = fig.colorbar(sm, format=comma_fmt)
#cbar.ax.set_title('Thousands') #for horizontal label
cbar.set_label('Mean Guava Consumption (kg per Month)')

# this will save the figure as a high-res png. 
fig.savefig('guava.png', dpi=600, facecolor='white', transparent=False)

In [None]:
# join the geodataframe with the cleaned up csv dataframe
merged_potato = geo_df.join(potato_df)
merged_potato = gpd.GeoDataFrame(merged_potato)
merged_potato.head()

In [None]:
merged_potato['potato'].max()

In [None]:
# set a variable that will call whatever column we want to visualise on the map
variable = 'potato'

# set the range for the choropleth
vmin, vmax = 0, 15

# create figure and axes for Matplotlib
fig, ax = plt.subplots(1, figsize=(10, 6))

# create map
merged_potato.plot(column=variable, cmap='YlGnBu', linewidth=0.5, ax=ax, edgecolor='0.0')

# Now we can customise and add annotations

# remove the axis
ax.axis('off')

# add a title
ax.set_title('Mean Monthly Potato Consumption', \
              fontdict={'fontsize': '20',
                        'fontweight' : '3'})

# create an annotation for the  data source
ax.annotate('Sources: http://www.microdata.gov.in/NADA43/index.php/catalog/126/related_materials (Statistics)\nMIT GeoWeb Data (ESRI Shapefile)',
           xy=(0.1, .08), xycoords='figure fraction',
           horizontalalignment='left', verticalalignment='top',
           fontsize=10, color='#555555')

# Create colorbar as a legend
sm = plt.cm.ScalarMappable(cmap='YlGnBu', norm=plt.Normalize(vmin=vmin, vmax=vmax))
sm._A = []
comma_fmt = FuncFormatter(lambda x, p: format(round(x,2), ','))
cbar = fig.colorbar(sm, format=comma_fmt)
#cbar.ax.set_title('Thousands') #for horizontal label
cbar.set_label('Mean Potato Consumption (kg per Month)')

# this will save the figure as a high-res png. 
fig.savefig('potato.png', dpi=600, facecolor='white', transparent=False)

## Household Nutrition

In [None]:
# load in new csv file
file_path = Path('hh_nutrition.csv')
n_df = pd.read_csv(file_path)
n_df

In [None]:
n_df['district'] = n_df['j'].map(hh_dist_map)
n_df = n_df[['j','Ascorbic Acid','district']]
n_df

In [None]:
# aggregate Vitamin C by district
n_df = n_df.groupby(['district'])['Ascorbic Acid'].mean().to_frame()
n_df.head()

In [None]:
# check that length of data matches
len(n_df.index) == len(geo_df.index)

In [None]:
# join the geodataframe with the cleaned up csv dataframe
merged_n = geo_df.join(n_df)
merged_n = gpd.GeoDataFrame(merged_n)
merged_n.head()

In [None]:
merged_n['Ascorbic Acid'] = merged_n['Ascorbic Acid'] * 30

In [None]:
merged_n['Ascorbic Acid'].max()

In [None]:
# set a variable that will call whatever column we want to visualise on the map
variable = 'Ascorbic Acid'

# set the range for the choropleth
vmin, vmax = 0, 8000

# create figure and axes for Matplotlib
fig, ax = plt.subplots(1, figsize=(10, 6))

# create map
merged_n.plot(column=variable, cmap='YlGnBu', linewidth=0.5, ax=ax, edgecolor='0.0')

# Now we can customise and add annotations

# remove the axis
ax.axis('off')

# add a title
ax.set_title('Mean Recommended Vitamin C per Household', \
              fontdict={'fontsize': '20',
                        'fontweight' : '3'})

# create an annotation for the  data source
ax.annotate('Sources: http://www.microdata.gov.in/NADA43/index.php/catalog/126/related_materials (Statistics)\nMIT GeoWeb Data (ESRI Shapefile)',
           xy=(0.1, .08), xycoords='figure fraction',
           horizontalalignment='left', verticalalignment='top',
           fontsize=10, color='#555555')

# Create colorbar as a legend
sm = plt.cm.ScalarMappable(cmap='YlGnBu', norm=plt.Normalize(vmin=vmin, vmax=vmax))
sm._A = []
comma_fmt = FuncFormatter(lambda x, p: format(int(x), ','))
cbar = fig.colorbar(sm, format=comma_fmt)
#cbar.ax.set_title('Thousands') #for horizontal label
cbar.set_label('Vitamin C (mg per month)')

ax.set_facecolor('w')

# this will save the figure as a high-res png. 
fig.savefig('vitaminc_recs.png', dpi=600, facecolor='white', transparent=False)

## Actual Consumption

In [None]:
# load in new csv file
file_path = Path('actual_consumption.csv')
consumption_df = pd.read_csv(file_path)
print(consumption_df.columns.tolist())

In [None]:
consumption_df['district'] = consumption_df['j'].map(hh_dist_map)
consumption_df.rename(columns={'Vitamin C, total ascorbic acid': 'Vitamin C'}, inplace=True)
consumption_df = consumption_df[['j','Vitamin C','district']]
consumption_df

In [None]:
# aggregate apple by district
vitc_df = consumption_df.groupby(['district'])['Vitamin C'].mean().to_frame()
vitc_df

In [None]:
# check that length of data matches
len(vitc_df.index) == len(geo_df.index)

In [None]:
# join the geodataframe with the cleaned up csv dataframe
merged_vitc = geo_df.join(vitc_df)
merged_vitc = gpd.GeoDataFrame(merged_vitc)
merged_vitc.head()

In [None]:
merged_vitc['Vitamin C'].max()

In [None]:
# set a variable that will call whatever column we want to visualise on the map
variable = 'Vitamin C'

# set the range for the choropleth
vmin, vmax = 0, 3500

# create figure and axes for Matplotlib
fig, ax = plt.subplots(1, figsize=(10, 6))

# create map
merged_vitc.plot(column=variable, cmap='YlGnBu', linewidth=0.5, ax=ax, edgecolor='0.0')

# Now we can customise and add annotations

# remove the axis
ax.axis('off')

# add a title
ax.set_title('Actual Vitamin C Consumption per Household', \
              fontdict={'fontsize': '20',
                        'fontweight' : '3'})

ax.set_facecolor('w')

# create an annotation for the  data source
ax.annotate('Sources: http://www.microdata.gov.in/NADA43/index.php/catalog/126/related_materials (Statistics)\nMIT GeoWeb Data (ESRI Shapefile)',
           xy=(0.1, .08), xycoords='figure fraction',
           horizontalalignment='left', verticalalignment='top',
           fontsize=10, color='#555555')

# Create colorbar as a legend
sm = plt.cm.ScalarMappable(cmap='YlGnBu', norm=plt.Normalize(vmin=vmin, vmax=vmax))
sm._A = []
comma_fmt = FuncFormatter(lambda x, p: format(int(x), ','))
cbar = fig.colorbar(sm, format=comma_fmt)
#cbar.ax.set_title('Thousands') #for horizontal label
cbar.set_label('Vitamin C (mg per month)')

# this will save the figure as a high-res png. 
fig.savefig('vitaminc_actual.png', dpi=600, facecolor='white', transparent=False)