In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plot
import re

In [4]:
df = pd.read_csv('city_day.csv')
station_hr_blr = pd.read_csv('station_hour_blr.csv')
stations = pd.read_csv('stations.csv')
blore_index = df[df['City'] != 'Bengaluru'].index # index values that are not City Bengaluru
blore_df = df.drop(blore_index) # New Data frame blore_df formed by dropping index values obtained from above

blore_df=blore_df.dropna(subset=['AQI'])


station_name = stations[stations['City'] == 'Bengaluru']

def get_PM25_subindex(x):
    if x <= 30:
        return x * 50 / 30
    elif x <= 60:
        return 50 + (x - 30) * 50 / 30
    elif x <= 90:
        return 100 + (x - 60) * 100 / 30
    elif x <= 120:
        return 200 + (x - 90) * 100 / 30
    elif x <= 250:
        return 300 + (x - 120) * 100 / 130
    elif x > 250:
        return 400 + (x - 250) * 100 / 130
    else:
        return 0

def get_PM10_subindex(x):
    if x <= 50:
        return x
    elif x <= 100:
        return x
    elif x <= 250:
        return 100 + (x - 100) * 100 / 150
    elif x <= 350:
        return 200 + (x - 250)
    elif x <= 430:
        return 300 + (x - 350) * 100 / 80
    elif x > 430:
        return 400 + (x - 430) * 100 / 80
    else:
        return 0

## SO2 Sub-Index calculation
def get_SO2_subindex(x):
    if x <= 40:
        return x * 50 / 40
    elif x <= 80:
        return 50 + (x - 40) * 50 / 40
    elif x <= 380:
        return 100 + (x - 80) * 100 / 300
    elif x <= 800:
        return 200 + (x - 380) * 100 / 420
    elif x <= 1600:
        return 300 + (x - 800) * 100 / 800
    elif x > 1600:
        return 400 + (x - 1600) * 100 / 800
    else:
        return 0

## NOx Sub-Index calculation
def get_NOx_subindex(x):
    if x <= 40:
        return x * 50 / 40
    elif x <= 80:
        return 50 + (x - 40) * 50 / 40
    elif x <= 180:
        return 100 + (x - 80) * 100 / 100
    elif x <= 280:
        return 200 + (x - 180) * 100 / 100
    elif x <= 400:
        return 300 + (x - 280) * 100 / 120
    elif x > 400:
        return 400 + (x - 400) * 100 / 120
    else:
        return 0

## NH3 Sub-Index calculation
def get_NH3_subindex(x):
    if x <= 200:
        return x * 50 / 200
    elif x <= 400:
        return 50 + (x - 200) * 50 / 200
    elif x <= 800:
        return 100 + (x - 400) * 100 / 400
    elif x <= 1200:
        return 200 + (x - 800) * 100 / 400
    elif x <= 1800:
        return 300 + (x - 1200) * 100 / 600
    elif x > 1800:
        return 400 + (x - 1800) * 100 / 600
    else:
        return 0
        
## CO Sub-Index calculation
def get_CO_subindex(x):
    if x <= 1:
        return x * 50 / 1
    elif x <= 2:
        return 50 + (x - 1) * 50 / 1
    elif x <= 10:
        return 100 + (x - 2) * 100 / 8
    elif x <= 17:
        return 200 + (x - 10) * 100 / 7
    elif x <= 34:
        return 300 + (x - 17) * 100 / 17
    elif x > 34:
        return 400 + (x - 34) * 100 / 17
    else:
        return 0

def get_O3_subindex(x):
    if x <= 50:
        return x * 50 / 50
    elif x <= 100:
        return 50 + (x - 50) * 50 / 50
    elif x <= 168:
        return 100 + (x - 100) * 100 / 68
    elif x <= 208:
        return 200 + (x - 168) * 100 / 40
    elif x <= 748:
        return 300 + (x - 208) * 100 / 539
    elif x > 748:
        return 400 + (x - 400) * 100 / 539
    else:
        return 0

def get_AQI_bucket(x):
    if x <= 50:
        return "Good"
    elif x <= 100:
        return "Satisfactory"
    elif x <= 200:
        return "Moderate"
    elif x <= 300:
        return "Poor"
    elif x <= 400:
        return "Very Poor"
    elif x > 400:
        return "Severe"
    else:
        return np.NaN


station_hr_blr["PM10_24hr_avg"]=station_hr_blr.groupby("StationId")["PM10"].rolling(window = 24,min_periods = 1).mean().values
station_hr_blr["PM2.5_24hr_avg"]=station_hr_blr.groupby("StationId")["PM2.5"].rolling(window = 24,min_periods = 1).mean().values
station_hr_blr["SO2_24hr_avg"]=station_hr_blr.groupby("StationId")["SO2"].rolling(window = 24,min_periods = 1).mean().values
station_hr_blr["NOx_24hr_avg"]=station_hr_blr.groupby("StationId")["NOx"].rolling(window = 24,min_periods = 1).mean().values
station_hr_blr["NH3_24hr_avg"]=station_hr_blr.groupby("StationId")["NH3"].rolling(window = 24,min_periods = 1).mean().values
station_hr_blr["CO_8hr_max"]=station_hr_blr.groupby("StationId")["CO"].rolling(window = 8, min_periods = 1).max().values
station_hr_blr["O3_8hr_max"]=station_hr_blr.groupby("StationId")["O3"].rolling(window = 8, min_periods = 1).max().values



station_hr_blr["PM2.5_SubIndex"] = station_hr_blr["PM2.5_24hr_avg"].apply(lambda x: get_PM25_subindex(x))

station_hr_blr["PM10_SubIndex"] = station_hr_blr["PM10_24hr_avg"].apply(lambda x: get_PM10_subindex(x))

station_hr_blr["SO2_SubIndex"] = station_hr_blr["SO2_24hr_avg"].apply(lambda x: get_SO2_subindex(x))

station_hr_blr["NOx_SubIndex"] = station_hr_blr["NOx_24hr_avg"].apply(lambda x: get_NOx_subindex(x))

station_hr_blr["NH3_SubIndex"] = station_hr_blr["NH3_24hr_avg"].apply(lambda x: get_NH3_subindex(x))

station_hr_blr["CO_SubIndex"] = station_hr_blr["CO_8hr_max"].apply(lambda x: get_CO_subindex(x))

station_hr_blr["O3_SubIndex"] = station_hr_blr["O3_8hr_max"].apply(lambda x: get_O3_subindex(x))

station_hr_blr["Checks"] = (station_hr_blr["PM2.5_SubIndex"] > 0).astype(int) + \
                (station_hr_blr["PM10_SubIndex"] > 0).astype(int) + \
                (station_hr_blr["SO2_SubIndex"] > 0).astype(int) + \
                (station_hr_blr["NOx_SubIndex"] > 0).astype(int) + \
                (station_hr_blr["NH3_SubIndex"] > 0).astype(int) + \
                (station_hr_blr["CO_SubIndex"] > 0).astype(int) + \
                (station_hr_blr["O3_SubIndex"] > 0).astype(int)

station_hr_blr["AQI_calculated"] = round(station_hr_blr[["PM2.5_SubIndex", "PM10_SubIndex", "SO2_SubIndex", "NOx_SubIndex",
                                 "NH3_SubIndex", "CO_SubIndex", "O3_SubIndex"]].max(axis = 1))

station_hr_blr.loc[station_hr_blr["PM2.5_SubIndex"] + station_hr_blr["PM10_SubIndex"] <= 0, "AQI_calculated"] = np.NaN
station_hr_blr.loc[station_hr_blr.Checks < 3, "AQI_calculated"] = np.NaN


station_hr_blr["AQI_bucket_calculated"] = station_hr_blr["AQI_calculated"].apply(lambda x: get_AQI_bucket(x))

station_hr_blr.to_csv('station_hour_blr.csv')

station_hr_blr








Unnamed: 0.1,Unnamed: 0,StationId,Datetime,PM2.5,PM10,NO,NO2,NOx,NH3,CO,...,PM2.5_SubIndex,PM10_SubIndex,SO2_SubIndex,NOx_SubIndex,NH3_SubIndex,CO_SubIndex,O3_SubIndex,Checks,AQI_calculated,AQI_bucket_calculated
0,1372542,KA002,2018-01-09 12:00:00,50.60,,1.96,11.09,6.12,,0.87,...,84.333333,0.000000,4.850000,7.650000,0.000000,43.50,87.82,5,88.0,Satisfactory
1,1372543,KA002,2018-01-09 13:00:00,35.34,,1.70,7.95,4.41,,1.40,...,71.616667,0.000000,8.687500,6.581250,0.000000,70.00,87.82,5,88.0,Satisfactory
2,1372544,KA002,2018-01-09 14:00:00,33.15,,2.19,16.03,8.71,,1.51,...,66.161111,0.000000,8.045833,8.016667,0.000000,75.50,87.82,5,88.0,Satisfactory
3,1372545,KA002,2018-01-09 15:00:00,37.04,,1.82,15.93,8.38,,0.68,...,65.054167,0.000000,6.728125,8.631250,0.000000,75.50,87.82,5,88.0,Satisfactory
4,1372546,KA002,2018-01-09 16:00:00,35.59,,1.90,15.01,8.00,,0.45,...,63.906667,0.000000,8.205000,8.905000,0.000000,75.50,87.82,5,88.0,Satisfactory
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
287559,1660101,KA011,2020-06-30 20:00:00,14.75,82.50,,19.30,21.40,9.30,0.91,...,18.916667,85.250000,0.000000,19.861111,2.429167,45.50,43.48,6,85.0,Satisfactory
287560,1660102,KA011,2020-06-30 21:00:00,11.50,68.50,,15.38,13.15,10.00,0.53,...,18.958333,82.857143,0.000000,19.926389,2.472778,45.50,43.48,6,83.0,Satisfactory
287561,1660103,KA011,2020-06-30 22:00:00,12.75,55.25,,15.50,11.85,10.47,2.66,...,19.285714,80.321429,0.000000,20.152778,2.521111,108.25,43.48,6,108.0,Moderate
287562,1660104,KA011,2020-06-30 23:00:00,12.50,49.25,,15.43,11.55,12.45,0.61,...,21.071429,77.892857,0.000000,20.288889,2.582778,108.25,43.48,6,108.0,Moderate
