In [None]:
#The project takes in two datasets, the slum data and toilet data for pune city. 
# The aim of the project is to create a visualization that will help us compare the toilets available in slum areas
# in Pune city. 
# The question answered by this comparision will be : 'Are there enough public toilets in the slum areas?'
import pandas as pd
import numpy as np
%matplotlib notebook
import matplotlib.pyplot as plt
import seaborn as sns

slumData = pd.read_csv("Pune-D25-Slum-Housing-and-Population-data_1.csv")
toiletData = pd.read_csv("Pune-D11-PublicToiletsData_1.csv")
toiletData.columns

In [None]:
slumData

In [None]:
#preparing the data 
slumData = slumData[['Ward Name','Ward No.','No. of Recognised Slums',
       'Slum Population - Total', 'Slum Population - Male',
       'Slum Population - Female', 'Slum Population - Child (0-6)']]
slumData

In [None]:
#preparing the data
toiletData = toiletData[['Ward Name', 'Ward No.',
       'Total number of households (HH)',
       'Number of Households with toilets',
       'Number of free public toilets - Female',
       'Number of free public toilets - Male']]
toiletData

In [None]:
#Calculating the number of households without toilets for every ward. 
df = slumData.merge(toiletData, on=['Ward Name','Ward No.'], how="left")
indices = df.index.tolist()
df['HH without toilets'] = df['Total number of households (HH)']-df['Number of Households with toilets']
df

In [None]:
#calculating the ratio 'Toilets per household'
df['Toilets per Household'] = df['Number of free public toilets - Female'] / df['HH without toilets']
df = df.rename(columns={'Number of free public toilets - Female':'Number of free public toilets'})
df

In [None]:

import matplotlib.pyplot as plt 
import seaborn as sns
import matplotlib.gridspec as gridspec

#plotting the required grid
fig = plt.figure(dpi=80, figsize=(12,8))
gspec = gridspec.GridSpec(3,3)
top = fig.add_subplot(gspec[0,0:2])
middle = fig.add_subplot(gspec[1,0:2])
bottom = fig.add_subplot(gspec[2,0:2])
ax  = fig.add_subplot(gspec[0:,2])

#creating the table for ward no. and ward name
table_data= [[n] for n in df['Ward Name'].tolist()]
colwidth = [0.9 for n in range(1,15)]
rowlabel = [n for n in range(1,15)]
colours =[['white'] for n in range(0,14)]
colours[7]=['red']
table = ax.table(cellText=table_data, loc='center right' , colWidths = colwidth, rowLabels = rowlabel, colLabels= ['Ward Name'],
                fontsize=14, colColours=['yellow'], cellColours = colours)
table.set_fontsize(14)
table.scale(1,2)
table.set_alpha(1.4)
ax.axis('off')

#plotting the top graph that shows the comparison between total number of households and households without toilets.
top.plot(df['Ward No.'],df['Total number of households (HH)'])
top.plot(df['Ward No.'],df['Number of Households with toilets'],'--', color='red')
top.set_ylabel("Number of Households", fontsize=14)
top.legend(loc = 'lower center')

#plotting the second graph that shows the comparison between the free public toilets and households without toilets.
middle.plot(df['Ward No.'],df['Number of free public toilets'],'--',color='green')
middle.plot(df['Ward No.'],df['HH without toilets'],color='yellow')
middle.legend(loc='lower center')

#plotting the final graph that is just a ratio 'Toilets per Household' for every ward number. 
bottom.bar(df['Ward No.'],df['Toilets per Household'], color='red')
bottom.set_xlabel("Ward No.", fontsize = 14)
bottom.set_ylabel("Toilets per Household", fontsize=15)
fig.suptitle("Toilets in Slum Areas - Pune City", fontsize=20)
fig.tight_layout(rect=[0, 0.03, 1, 0.95])

plt.show();