In [107]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [82]:
year = 2020
df = pd.read_csv('2020_rural_cleaned.csv')

In [83]:
df.head()

Unnamed: 0,date,pm25,pm10,o3,no2,so2,co,city
0,2020-01-01,282,136,2,14,16,15,vapi
1,2020-01-02,223,139,2,11,23,17,vapi
2,2020-01-03,230,134,2,17,40,15,vapi
3,2020-01-04,217,115,2,12,49,15,vapi
4,2020-01-05,191,161,2,10,35,16,vapi


In [84]:
column_names = df.columns.values
column_names[0] = 'Date'
column_names[7] = 'City'
df.columns = column_names
df.head()

Unnamed: 0,Date,pm25,pm10,o3,no2,so2,co,City
0,2020-01-01,282,136,2,14,16,15,vapi
1,2020-01-02,223,139,2,11,23,17,vapi
2,2020-01-03,230,134,2,17,40,15,vapi
3,2020-01-04,217,115,2,12,49,15,vapi
4,2020-01-05,191,161,2,10,35,16,vapi


In [85]:
df.dtypes

Date    object
pm25     int64
pm10     int64
o3       int64
no2      int64
so2      int64
co       int64
City    object
dtype: object

In [96]:
df.Date = pd.to_datetime(df.Date, format = '%Y/%m/%d') # need the format string to specify which is the month and which is the year
df['month_num'] = pd.DatetimeIndex(df['Date']).month
df['month_name'] = df.Date.dt.month_name()
df.dtypes

Date          datetime64[ns]
pm25                   int64
pm10                   int64
o3                     int64
no2                    int64
so2                    int64
co                     int64
City                  object
month_num              int64
month_name            object
dtype: object

In [97]:
df.head()

Unnamed: 0,Date,pm25,pm10,o3,no2,so2,co,City,month_num,month_name
0,2020-01-01,282,136,2,14,16,15,vapi,1,January
1,2020-01-02,223,139,2,11,23,17,vapi,1,January
2,2020-01-03,230,134,2,17,40,15,vapi,1,January
3,2020-01-04,217,115,2,12,49,15,vapi,1,January
4,2020-01-05,191,161,2,10,35,16,vapi,1,January


In [98]:
print(df.shape)

df = df[df['Date'].dt.year == year]
print(df.shape)

(1260, 10)
(1260, 10)


In [99]:
df.head()

Unnamed: 0,Date,pm25,pm10,o3,no2,so2,co,City,month_num,month_name
0,2020-01-01,282,136,2,14,16,15,vapi,1,January
1,2020-01-02,223,139,2,11,23,17,vapi,1,January
2,2020-01-03,230,134,2,17,40,15,vapi,1,January
3,2020-01-04,217,115,2,12,49,15,vapi,1,January
4,2020-01-05,191,161,2,10,35,16,vapi,1,January


In [100]:

cities = ['guwahati', 'coimbatore', 'jabalpur', 'kota', 'mussoorie', 'vapi']
pollutants = ['co', 'no2', 'o3', 'pm10', 'pm25', 'so2']

print(cities)
print(pollutants)

## 24-hour means unless otherwise specified. Using 8-hour as a proxy when unavailable
pollutant_who = {}
pollutant_who['co']   = 0
pollutant_who['no2']  = 0
pollutant_who['o3']   = 100 ##  8-hour mean
pollutant_who['pm10'] = 50
pollutant_who['pm25'] = 25
pollutant_who['so2']  = 20  

## annual in 'industrial, residential, rural, others' areas
pollutant_naaqs = {}
pollutant_naaqs['co']   = 2   ##  8-hour mean
pollutant_naaqs['no2']  = 80
pollutant_naaqs['o3']   = 100 ##  8-hour mean
pollutant_naaqs['pm10'] = 100
pollutant_naaqs['pm25'] = 60
pollutant_naaqs['so2']  = 80  


['guwahati', 'coimbatore', 'jabalpur', 'kota', 'mussoorie', 'vapi']
['co', 'no2', 'o3', 'pm10', 'pm25', 'so2']


In [101]:
df.head()

Unnamed: 0,Date,pm25,pm10,o3,no2,so2,co,City,month_num,month_name
0,2020-01-01,282,136,2,14,16,15,vapi,1,January
1,2020-01-02,223,139,2,11,23,17,vapi,1,January
2,2020-01-03,230,134,2,17,40,15,vapi,1,January
3,2020-01-04,217,115,2,12,49,15,vapi,1,January
4,2020-01-05,191,161,2,10,35,16,vapi,1,January


In [102]:
df1 = df

In [103]:
df1=df1.groupby("City").resample('M', label='right', closed = 'right', on='Date').mean().reset_index().sort_values(by='Date')

In [104]:
df1.head()

Unnamed: 0,City,Date,pm25,pm10,o3,no2,so2,co,month_num
0,coimbatore,2020-01-31,103.806452,43.258065,26.806452,16.967742,3.774194,2.0,1.0
28,mussoorie,2020-01-31,49.419355,20.516129,0.0,10.935484,3.0,0.0,1.0
21,kota,2020-01-31,113.935484,69.290323,21.064516,9.903226,3.709677,5.322581,1.0
14,jabalpur,2020-01-31,154.451613,102.032258,22.354839,23.0,3.419355,10.354839,1.0
7,guwahati,2020-01-31,202.354839,150.580645,7.645161,5.774194,6.193548,13.225806,1.0


In [108]:
#figure out for sub-plot loop for plotly

fig = make_subplots(
rows=2, cols=3,
subplot_titles=('guwahati','coimbatore', 'jabalpur' ,'kota' ,'mussoorie', 'vapi'))
df11 = df1[df1['City'].isin([cities[0]])]
df12 = df1[df1['City'].isin([cities[1]])]
df21 = df1[df1['City'].isin([cities[2]])]
df22 = df1[df1['City'].isin([cities[3]])]
df31 = df1[df1['City'].isin([cities[4]])]
df32 = df1[df1['City'].isin([cities[5]])]

fig.add_trace(go.Scatter(x=df11['pm25'], y=df11['pm10']),row=1, col=1)
fig.add_trace(go.Scatter(x=df12['pm25'], y=df12['pm10']),row=1, col=2)
fig.add_trace(go.Scatter(x=df21['pm25'], y=df21['pm10']),row=1, col=3)
fig.add_trace(go.Scatter(x=df22['pm25'], y=df22['pm10']),row=2, col=1)
fig.add_trace(go.Scatter(x=df31['pm25'], y=df31['pm10']),row=2, col=2)
fig.add_trace(go.Scatter(x=df32['pm25'], y=df32['pm10']),row=2, col=3)

# fig['layout']['yaxis1'].update(title='', range=[0, 150], autorange=False)
# fig['layout']['yaxis2'].update(title='', range=[0, 110], autorange=False)
# fig['layout']['yaxis3'].update(title='', range=[0, 110], autorange=False)
# fig['layout']['yaxis4'].update(title='', range=[0, 110], autorange=False)
# fig['layout']['yaxis5'].update(title='', range=[0, 110], autorange=False)
# fig['layout']['yaxis6'].update(title='', range=[0, 110], autorange=False)

# fig['layout']['xaxis1'].update(title='', range=[0, 200], autorange=False)
# fig['layout']['xaxis2'].update(title='', range=[0, 170], autorange=False)
# fig['layout']['xaxis3'].update(title='', range=[0, 170], autorange=False)
# fig['layout']['xaxis4'].update(title='', range=[0, 170], autorange=False)
# fig['layout']['xaxis5'].update(title='', range=[0, 170], autorange=False)
# fig['layout']['xaxis6'].update(title='', range=[0, 170], autorange=False)

fig.update_layout(showlegend=False)
fig.update_layout(yaxis=dict(range=[0,300]))
fig.show()

In [106]:
df1.head()

Unnamed: 0,City,Date,pm25,pm10,o3,no2,so2,co,month_num
0,coimbatore,2020-01-31,103.806452,43.258065,26.806452,16.967742,3.774194,2.0,1.0
28,mussoorie,2020-01-31,49.419355,20.516129,0.0,10.935484,3.0,0.0,1.0
21,kota,2020-01-31,113.935484,69.290323,21.064516,9.903226,3.709677,5.322581,1.0
14,jabalpur,2020-01-31,154.451613,102.032258,22.354839,23.0,3.419355,10.354839,1.0
7,guwahati,2020-01-31,202.354839,150.580645,7.645161,5.774194,6.193548,13.225806,1.0
