In [None]:
## Seasonal trend analysis of AQI

## -----------------------------------------------------
import pandas as pd
import numpy as np
import plotly.graph_objects as go 

# load the data-------------------------------------------
data = pd.read_csv(r"D:\pollution_data.csv")

# change date column to date to time format and set as index------
data['Recorded'] = pd.to_datetime(data['Recorded'])

data = data.set_index('Recorded')

# Remove all the Nan and negative values---------
data = data.dropna()
data = data[(data['CO']>=0) & (data['NO2']>=0)]

## Removal of outliers using IQR-----------------------------------------
Q1 = data.quantile(0.25)
Q3  =data.quantile(0.75)

IQR = Q3-Q1

# limiting th data to remove outliers using bounds

lower_bound = Q1-1.5 * IQR
upper_bound = Q3+1.5 *IQR

# remove rows with the outliers

data = data[~((data<lower_bound) | (data>upper_bound)).any(axis = 1)]


## Air quality Index calculations-----------------------------------------------------------

# AQI break points based on ranges for good, bad and worse stage

# This is a defaul table for calculating AQI for different pollutants

aqi_pollutants = dict()

aqi_pollutants['CO'] = [((0, 4.4), (0, 50)), ((4.5, 9.4), (51, 100)), ((9.5, 12.4), (101, 150)),
    ((12.5, 15.4), (151, 200)), ((15.5, 30.4), (201, 300)), ((30.5, 70), (301, 400))]


aqi_pollutants['NO2'] = [((0, 53), (0, 50)), ((54, 100), (51, 100)), ((101, 360), (101, 150)),
    ((361, 649), (151, 200)), ((650, 1249), (201, 300)), ((1250, 3000), (301, 400))]

aqi_pollutants['O3'] = [((0.000, 0.054), (0, 50)), ((0.055, 0.070), (51, 100)), ((0.071, 0.085), (101, 150)),
    ((0.086, 0.105), (151, 200)), ((0.106, 0.200), (201, 300)), ((0.201, 0.300), (301, 400))]

aqi_pollutants['SO2'] = [((0, 35), (0, 50)), ((36, 75), (51, 100)), ((76, 185), (101, 150)),
    ((186, 304), (151, 200)), ((305, 604), (201, 300)), ((605, 700), (301, 400))]


aqi_pollutants['pm2_5'] = [((0, 9), (0, 50)), ((9.1, 35.4), (51, 100)), ((35.5, 55.4), (101, 150)),
    ((55.5, 125.4), (151, 200)), ((125.5, 225.4), (201, 300)), ((225.5, 300), (301, 400))]


aqi_pollutants['PM10Teom'] = [((0, 54), (0, 50)), ((55, 154), (51, 100)), ((155, 254), (101, 150)),
    ((255, 354), (151, 200)), ((355, 424), (201, 300)), ((425, 550), (301, 400))]


aqi_pollutants['nh3'] = [((0, 4.4), (0, 50)), ((4.5, 9.4), (51, 100)), ((9.5, 12.4), (101, 150)),
    ((12.5, 15.4), (151, 200)), ((15.5, 30.4), (201, 300)), ((30.5, 70), (301, 400))]



# function made for one pollutant to calculate AQI

def calculate_aqi (pollutant_name, concentration):
    for (low, high), (low_aqi, high_aqi) in aqi_pollutants[pollutant_name]:
        if low <= concentration <= high:
            aqi = ((high_aqi-low_aqi)/(high-low))*(concentration-low) + low_aqi
            return aqi
        
    return None


# calculating aqi for all pollutants

def calculate_overall_aqi(row, pollutants):
    aqi_values = []
    for pollutant in pollutants:
        aqi = calculate_aqi(pollutant, row[pollutant])
        if aqi is not None:
            aqi_values.append(aqi)

    return max(aqi_values)



## caling function for calculating AQI for all pollutnts
Pollutants = ['CO', 'NO2']

data['AQI'] = data.apply(calculate_overall_aqi, axis = 1, args = (Pollutants, ))


# default AQI categories basd on AQI values 

aqi_categories = [(0,50, 'Good'), (51, 100, 'Moderate'), (101, 150, 'Unhealthy for Sensitive Groups'), (151, 200, 'Unhealthy'), 
                  (201, 300, 'Very Unhealthy'), (301, 400, 'Hazardous')]



def categorize_aqi(aqi_value):
    for low, high, category in aqi_categories:
        if low <= aqi_value <= high:
            return category
    return None


## categorize  AQI

data['AQI_category'] = data['AQI'].apply(categorize_aqi)




## Analysing seasonal trend in the data----------------------------------------

# Assign season based on the index (DateTimeIndex)

def get_season_from_index(index):
    if index.month in [12, 1, 2]:
        return 'Winter'
    elif index.month in [3, 4, 5]:
        return 'Spring'
    elif index.month in [6, 7, 8]:
        return 'Summer'
    else:
        return 'Autumn'
    

# Create a new 'Season' column by calling the season function
data['Season'] = data.index.map(get_season_from_index)

# To analyse the seasonal trend, we need to calculate average AQI using the season
Seasonal_trend = data.groupby('Season')[['AQI', 'CO', 'NO2']].mean()


# Plotting of seasonal trend for average AQI--------------------------------------------

# Reset index for plotting
Seasonal_trend = Seasonal_trend.reset_index()

fig = go.Figure()
colors = ['darkred', 'olive', 'darkorange', 'dimgrey']

fig.add_trace(go.Bar(x= Seasonal_trend['Season'], y = Seasonal_trend['AQI'], marker_color = colors, name = 'AQI'))



fig.update_layout(
                  title=dict(text='Seasonal AQI Average for GrÃ¸nnelykkevej, Odense', 
                        font=dict(family='Arialblack', size=24, color='black'), x = 0.5 ),
                  xaxis = dict(title='Season'),
                  yaxis=dict(title='Average AQI'),
                  height=700,  
                  width=1000
                    )

# Show the plot
fig.show()

