# Imports

In [438]:
## General
import pandas as pd
pd.set_option('display.max_columns', 500)
import numpy as np
import csv
import ruptures as rpt
import os

# Visualisation
import matplotlib.pyplot as plt
import seaborn as sns

# Data Path & Source Links

In [2]:
# General
root_path = '../'
data_path = 'Data/'

################## Air Quality Health Impact  #########################
# [SOURCE : https://www.stateofglobalair.org]

AQHEALTH_DATAPATH = root_path+data_path+'Air_Quality_Health_Impact_Selceted_Countries.csv'
#################################################


############# Air Quality PM2.5  ###############
# [SOURCE : https://climate.nasa.gov/vital-signs/global-temperature/]

AQPM25_DATAPATH = root_path+data_path+'Air_Quality_Population_Weighted_Selceted_Countries.csv'
#################################################



################## Statewise Airquality Data on India  ###############

INDIASTATEWISE_DATAPATH = root_path+data_path+'INDIA_AIRPOLLUTION_u2015.csv'
#################################################


################## Global Deaths by risk factor  ###############
# [SOURCE : https://ourworldindata.org/air-pollution]

DEATHS_BY_RISKFACTOR_DATAPATH = root_path+data_path+'number-of-deaths-by-risk-factor.csv'
#################################################


################## Percentage share of deaths by risk factor  ###############
# [SOURCE : https://ourworldindata.org/air-pollution]

AP_DEATH_SHARE_DATAPATH = root_path+data_path+'share-deaths-air-pollution.csv'
#################################################


# Data Processing Class

In [3]:
class DataProcessing:
    def __init__(self):
        print('Initialising the Dataprocessing class...')
        
    def processDF1(self,x):
        if type(x) == str:
            return x.replace('"', '').strip('=')
        else:
            return x

        
dataprocessing_handler = DataProcessing()

Initialising the Dataprocessing class...


# Helper Fucntions

In [54]:
class HelperFunctions:
    def __init__(self):
        print('Initialising Helper Function Class...')
        
    def save(self, filepath, fig=None):
        '''Save the current image with no whitespace
        Example filepath: "myfig.png" or r"C:\myfig.pdf" 
        '''
        import matplotlib.pyplot as plt
        if not fig:
            fig = plt.gcf()

        plt.subplots_adjust(0,0,1,1,0,0)
        for ax in fig.axes:
            ax.axis('off')
            ax.margins(0,0)
            ax.xaxis.set_major_locator(plt.NullLocator())
            ax.yaxis.set_major_locator(plt.NullLocator())
        fig.savefig(filepath, pad_inches = 0, bbox_inches='tight')
        plt.close()
        
helperFunctionHandler = HelperFunctions()

Initialising Helper Function Class...


# Load & Prepare the Data

In [4]:
AQHEALTH_DATA = pd.read_csv(AQHEALTH_DATAPATH)
AQHEALTH_DATA = AQHEALTH_DATA.applymap(dataprocessing_handler.processDF1)
AQHEALTH_DATA.columns = [e.replace('"', '').strip('=') for e in AQHEALTH_DATA.columns]
AQHEALTH_DATA.drop([144,145], inplace=True)
AQHEALTH_DATA.drop(['Exposure Id',
                    'Type',
                    'Name', 
                    'Region',
                    'Measure',
                    'Metric',
                    'Sex',
                    'REI Name',
                    'Cause Name',
                    'Age Group Name',
                    'Pollutant',
                    'Pollutant Name',
                    'Region Name',
                    'Measure Name',
                    'Metric Name'],inplace=True, axis=1)

AQHEALTH_DATA[['Exposure Mean']] = AQHEALTH_DATA[['Exposure Mean']].astype(int)
AQHEALTH_DATA[['Exposure Mean Rounded']] = AQHEALTH_DATA[['Exposure Mean Rounded']].astype(int)
AQHEALTH_DATA[['Exposure Upper']] = AQHEALTH_DATA[['Exposure Upper']].astype(int)
AQHEALTH_DATA[['Exposure Lower']] = AQHEALTH_DATA[['Exposure Lower']].astype(int)
AQHEALTH_DATA.Year = pd.to_datetime(AQHEALTH_DATA.Year)
AQHEALTH_DATA.set_index('Year', inplace=True) 

In [5]:
AQPM25_DATA = pd.read_csv(AQPM25_DATAPATH)
AQPM25_DATA = AQPM25_DATA.applymap(dataprocessing_handler.processDF1)
AQPM25_DATA.columns = [e.replace('"', '').strip('=') for e in AQPM25_DATA.columns]
AQPM25_DATA = AQPM25_DATA.iloc[:-2,:]
AQPM25_DATA.drop(['Exposure Id',
                  'Type',
                  'Name', 
                  'Region',
                  'Region Name',
                  'Units',
                  'Pollutant',
                  'Pollutant Name'],inplace=True, axis=1)

AQPM25_DATA[['Exposure Mean']] = AQPM25_DATA[['Exposure Mean']].astype(float)
AQPM25_DATA[['Exposure Upper']] = AQPM25_DATA[['Exposure Upper']].astype(float)
AQPM25_DATA[['Exposure Lower']] = AQPM25_DATA[['Exposure Lower']].astype(float)
AQPM25_DATA.Year = pd.to_datetime(AQPM25_DATA.Year)
AQPM25_DATA.set_index('Year', inplace=True) 

In [365]:
INDIASTATEWISE_DATA = pd.read_csv(INDIASTATEWISE_DATAPATH)
INDIASTATEWISE_DATA = INDIASTATEWISE_DATA[['date', 'state', 'location', 'type', 'so2', 'no2', 'rspm', 'spm', 'pm2_5']]
INDIASTATEWISE_DATA.date = pd.to_datetime(INDIASTATEWISE_DATA.date)
INDIASTATEWISE_DATA.set_index('date', inplace=True)
INDIASTATEWISE_DATA['Year'] = [e.year for e in INDIASTATEWISE_DATA.index]
INDIASTATEWISE_DATA = INDIASTATEWISE_DATA.groupby(['Year', 'state'])[['so2', 'no2', 'rspm', 'spm', 'pm2_5']].mean().reset_index()
INDIASTATEWISE_DATA = INDIASTATEWISE_DATA[INDIASTATEWISE_DATA.Year >= 2011][['Year', 'state','rspm']].reset_index().drop('index', axis=1)
INDIASTATEWISE_DATA.rspm = INDIASTATEWISE_DATA.rspm /300
INDIASTATEWISE_DATA.to_csv(root_path+data_path+'mapData.csv')


Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



In [12]:
DEATHS_BY_RISKFACTOR_DATA = pd.read_csv(DEATHS_BY_RISKFACTOR_DATAPATH, index_col=2)
DEATHS_BY_RISKFACTOR_DATA.index = pd.to_datetime(DEATHS_BY_RISKFACTOR_DATA.index, format='%Y')
DEATHS_BY_RISKFACTOR_DATA = DEATHS_BY_RISKFACTOR_DATA[DEATHS_BY_RISKFACTOR_DATA.Entity == 'World']
DEATHS_BY_RISKFACTOR_DATA = DEATHS_BY_RISKFACTOR_DATA.reset_index().drop(np.arange(1,DEATHS_BY_RISKFACTOR_DATA.shape[0]-1)).set_index('Year')
DEATHS_BY_RISKFACTOR_DATA.drop(['Entity', 'Code'], axis=1, inplace=True)

In [None]:
DEATHS_BY_RISKFACTOR_DATA = pd.read_csv(AP_DEATH_SHARE_DATAPATH, index_col=2)
DEATHS_BY_RISKFACTOR_DATA.index = pd.to_datetime(DEATHS_BY_RISKFACTOR_DATA.index, format='%Y')
DEATHS_BY_RISKFACTOR_DATA = DEATHS_BY_RISKFACTOR_DATA[DEATHS_BY_RISKFACTOR_DATA.Entity == 'World']
DEATHS_BY_RISKFACTOR_DATA = DEATHS_BY_RISKFACTOR_DATA.reset_index().drop(np.arange(1,DEATHS_BY_RISKFACTOR_DATA.shape[0]-1)).set_index('Year')
DEATHS_BY_RISKFACTOR_DATA.drop(['Entity', 'Code'], axis=1, inplace=True)

In [475]:
delhiap = ['cpcb_dly_aq_delhi-2004.csv',
 'cpcb_dly_aq_delhi-2005.csv',
 'cpcb_dly_aq_delhi-2011.csv',
 'cpcb_dly_aq_delhi-2007.csv',
 'cpcb_dly_aq_delhi-2013.csv',
 'cpcb_dly_aq_delhi-2012.csv',
 'cpcb_dly_aq_delhi-2006.csv',
 'cpcb_dly_aq_delhi-2002.csv',
 'cpcb_dly_aq_delhi-2003.csv',
 'cpcb_dly_aq_delhi-2015.csv',
 'cpcb_dly_aq_delhi-2001.csv',
 'cpcb_dly_aq_delhi-2000.csv',
 'cpcb_dly_aq_delhi-2014.csv',
 'cpcb_dly_aq_delhi-2008.csv',
 'cpcb_dly_aq_delhi-2009.csv']
DELHI_AQ = pd.DataFrame()
for each in delhiap:
    y = each.split('-')[-1][:4]
    t = pd.read_csv(root_path+data_path+each, index_col=0)
    t.index = [y]*t.shape[0]
    t.index.name = 'Year'
    DELHI_AQ = pd.concat([DELHI_AQ, t])

DELHI_AQ.reset_index(inplace=True)
DELHI_AQ =  DELHI_AQ[['Year', 'NO2', 'PM 2.5', 'RSPM/PM10', 'SO2', 'SPM']]
DELHI_AQ = DELHI_AQ.groupby(['Year']).mean()[['RSPM/PM10']].dropna()


Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.





# Prepare Graph

## Air Quality Health Measures

In [323]:
plt.rcParams['figure.dpi'] = 180
plt.rcParams['figure.figsize'] = (50,30)
plt.rcParams['lines.marker'] = 'o'
plt.rcParams['lines.markeredgecolor'] = 'black'
plt.rcParams['lines.markeredgewidth'] = 2
plt.rcParams['lines.markersize'] = 25
ax, fig = plt.subplots()

ax=sns.lineplot(x='Year', y='Exposure Mean',
                data = AQHEALTH_DATA.reset_index(),
                hue = 'Country',
                legend = 'brief',
                linewidth = 5,
                marker = 'o',
                markeredgecolor = 'black',
                markeredgewidth = 2,
                markersize = 25)
_=plt.legend(fontsize=40, bbox_to_anchor=(1.01, 1.0))
_=plt.grid(which = 'both')
_=plt.xlabel('Year', fontsize=45)
_=plt.ylabel('Number of deaths attributable to PM2.5(x10,000)', fontsize=45)
# _=plt.title('Exposure Mean', fontsize=60)
_=plt.xticks(fontsize=35)
_=plt.yticks(fontsize=35)


plt.close()
_=ax.figure.savefig(root_path+'Material/air_health.jpg', bbox_inches="tight")

## Air Quality PM2.5 Measures

In [342]:
plt.rcParams['figure.dpi'] = 180
plt.rcParams['figure.figsize'] = (50,30)
plt.rcParams['lines.marker'] = 'o'
plt.rcParams['lines.markeredgecolor'] = 'black'
plt.rcParams['lines.markeredgewidth'] = 2
plt.rcParams['lines.markersize'] = 25
ax, fig = plt.subplots()

AQPM25_DATA = AQPM25_DATA[AQPM25_DATA.Country.isin(['Afghanistan', 'Australia', 
                                                   'Bangladesh', 'Bhutan', 
                                                   'Canada','Brazil','India',
                                                   'China', 'Finland',
                                                   'Germany', 'Greenland', 
                                                   'Greenland', 'Nepal', 
                                                   'Pakistan','Russian Federation', 
                                                   'Sri Lanka', 'United Arab Emirates', 
                                                   'United Kingdom', 'United States'])]

ax=sns.lineplot(x='Year', y='Exposure Mean',
                data = AQPM25_DATA.reset_index(),
                hue = 'Country',
                legend = 'brief',
                linewidth = 5,
                marker = 'o',
#                 palette = 'twilight',
                palette = 'dark',

                markeredgecolor = 'black',
                markeredgewidth = 2,
                markersize = 25)
# ax.set_facecolor("RdYlGn")

_=plt.legend(fontsize=40, bbox_to_anchor=(1.01, 1.0))
_=plt.grid(which = 'both')
_=plt.xlabel('Year', fontsize=45)
_=plt.ylabel('PM2.5 Measures', fontsize=45)
# _=plt.title('Exposure Mean', fontsize=60)
_=plt.xticks(fontsize=35)
_=plt.yticks(fontsize=35)


plt.close()
_=ax.figure.savefig(root_path+'Material/pm2.5.jpg', bbox_inches="tight")

## Air Pollution Deaths Share

In [504]:
plt.rcParams['figure.dpi'] = 120
plt.rcParams['figure.figsize'] = (100,4)
t = pd.read_csv(AP_DEATH_SHARE_DATAPATH)
t = t[t.Entity == 'World'].iloc[:,-2:].set_index('Year')
# ax = t.plot.bar(color='black', width=0.5, legend=False)
ax = t.plot(color='black', legend=False)


_=plt.xticks(rotation=0, fontsize=50)
_=plt.yticks(fontsize=30)
_=ax.figure.savefig(root_path+'Material/worldShareDeath1.png', bbox_inches="tight",transparent=True)
plt.close()

## Death By Risk Factor

In [386]:
plt.rcParams['figure.dpi'] = 180
plt.rcParams['figure.figsize'] = (25,25)
plt.rcParams['font.weight'] = 'bold'

usefulCols = ['Unsafe water source (deaths)',
                'Poor sanitation (deaths)',
                'Alcohol use (deaths)',
                'Unsafe sex (deaths)',
                'Obesity (deaths)',
                'Smoking (deaths)',
                'Indoor air pollution (deaths)',
                'Air pollution (outdoor & indoor) (deaths)',
                'Outdoor air pollution (deaths)']

t = DEATHS_BY_RISKFACTOR_DATA[usefulCols]
t['Others'] = DEATHS_BY_RISKFACTOR_DATA[[e for e in DEATHS_BY_RISKFACTOR_DATA.columns if e not in usefulCols]].sum(axis=1)

ax = t.T['1990-01-01'].plot('pie', fontsize=60, 
                            rotatelabels=False,
                            explode = [0, 0, 0, 0, 0, 0, 0, 0, 0.2, 0],
                            shadow = True)
centre_circle = plt.Circle((0,0),0.40,fc='white')
fig = plt.gcf()
fig.gca().add_artist(centre_circle)

_=plt.xlabel('', fontsize=45)
_=plt.ylabel('', fontsize=45)
_=fig.patch.set_visible(False)
_=ax.figure.savefig(root_path+'Material/riskfactor1990.png', bbox_inches="tight",transparent=True)


In [387]:
plt.rcParams['figure.dpi'] = 180
plt.rcParams['figure.figsize'] = (25,25)
plt.rcParams['font.weight'] = 'bold'

usefulCols = ['Unsafe water source (deaths)',
                'Poor sanitation (deaths)',
                'Alcohol use (deaths)',
                'Unsafe sex (deaths)',
                'Obesity (deaths)',
                'Smoking (deaths)',
                'Indoor air pollution (deaths)',
                'Air pollution (outdoor & indoor) (deaths)',
                'Outdoor air pollution (deaths)']

t = DEATHS_BY_RISKFACTOR_DATA[usefulCols]
t['Others'] = DEATHS_BY_RISKFACTOR_DATA[[e for e in DEATHS_BY_RISKFACTOR_DATA.columns if e not in usefulCols]].sum(axis=1)

ax = t.T['2017-01-01'].plot('pie', fontsize=60, 
                            rotatelabels=False,
                            explode = [0, 0, 0, 0, 0, 0, 0, 0, 0.2, 0],
                            shadow = True)
centre_circle = plt.Circle((0,0),0.40,fc='white')
fig = plt.gcf()
fig.gca().add_artist(centre_circle)

_=plt.xlabel('', fontsize=45)
_=plt.ylabel('', fontsize=45)
_=ax.figure.savefig(root_path+'Material/riskfactor2017.png', bbox_inches="tight",transparent=True)



## Delhi Air Quality

In [489]:
plt.rcParams['figure.dpi'] = 120
plt.rcParams['figure.figsize'] = (25,10)
ax = DELHI_AQ.plot(kind='bar', color='gray', legend=False)
_=plt.xticks(rotation=0, fontsize=20)
_=plt.yticks(fontsize=20)
_=plt.xlabel('Year', fontsize=30)
_=plt.ylabel('RSPM/PM10', fontsize=30)

_=ax.figure.savefig(root_path+'Material/DELHIAQ.png', bbox_inches="tight",transparent=True)
plt.close()