# MAKING 24 HOURS, 8 HOURS BASED COLUMN


In [4]:
import pandas as pd

# Load the dataset
file_path = './Final_Weather_Data.csv'
df = pd.read_csv(file_path, parse_dates=[['Date', 'Start Time']])

# Sort by datetime to ensure proper rolling calculation
df.sort_values('Date_Start Time', inplace=True)

# Define function to create rolling features
def create_rolling_features(df, column, hours):
    df[f'{column} ({hours}HR)'] = df[column].rolling(window=hours, min_periods=1).mean()
    df[f'{column} (MIN)'] = df[column].rolling(window=hours, min_periods=1).min()
    df[f'{column} (MAX)'] = df[column].rolling(window=hours, min_periods=1).max()

# Create rolling features for PM2.5, PM10, NO2, Ozone, CO, SO2, NH3
create_rolling_features(df, 'PM2.5 (ug/m3)', 24)
create_rolling_features(df, 'PM10 (ug/m3)', 24)
create_rolling_features(df, 'NO2 (ug/m3)', 24)
create_rolling_features(df, 'Ozone (ug/m3)', 8)
create_rolling_features(df, 'CO (mg/m3)', 8)
create_rolling_features(df, 'SO2 (ug/m3)', 24)
create_rolling_features(df, 'NH3 (ug/m3)', 24)

# Drop rows with NaN values generated by rolling operation
print(df.isna().sum())
df = df.iloc[8:]

# Save the updated dataframe to a new CSV file
output_path = './Updated_Weather_Data.csv'
df.to_csv(output_path, index=False)

output_path


  df = pd.read_csv(file_path, parse_dates=[['Date', 'Start Time']])


Date_Start Time         0
Sr.NO                   0
End Time                0
PM2.5 (ug/m3)           0
PM10 (ug/m3)            0
NO2 (ug/m3)             0
NH3 (ug/m3)             0
SO2 (ug/m3)             0
CO (mg/m3)              0
Ozone (ug/m3)           0
PM2.5 (ug/m3) (24HR)    0
PM2.5 (ug/m3) (MIN)     0
PM2.5 (ug/m3) (MAX)     0
PM10 (ug/m3) (24HR)     0
PM10 (ug/m3) (MIN)      0
PM10 (ug/m3) (MAX)      0
NO2 (ug/m3) (24HR)      0
NO2 (ug/m3) (MIN)       0
NO2 (ug/m3) (MAX)       0
Ozone (ug/m3) (8HR)     0
Ozone (ug/m3) (MIN)     0
Ozone (ug/m3) (MAX)     0
CO (mg/m3) (8HR)        0
CO (mg/m3) (MIN)        0
CO (mg/m3) (MAX)        0
SO2 (ug/m3) (24HR)      0
SO2 (ug/m3) (MIN)       0
SO2 (ug/m3) (MAX)       0
NH3 (ug/m3) (24HR)      0
NH3 (ug/m3) (MIN)       0
NH3 (ug/m3) (MAX)       0
dtype: int64


'./Updated_Weather_Data.csv'

## AQI CREATION


In [18]:
import pandas as pd

file_path = './Updated_Weather_Data.csv'
df = pd.read_csv(file_path)

print(df.isna().sum())

aqi_breakpoints = {
    'PM2.5 (ug/m3)': [(0, 30, 0, 50), (30, 60, 51, 100), (60, 90, 101, 200), (90, 120, 201, 300), (120, 250, 301, 400), (250, float('inf'), 401, 500)],
    'PM10 (ug/m3)': [(0, 50, 0, 50), (50, 100, 51, 100), (100, 250, 101, 200), (250, 350, 201, 300), (350, 430, 301, 400), (430, float('inf'), 401, 500)],
    'NO2 (ug/m3)': [(0, 40, 0, 50), (40, 80, 51, 100), (80, 180, 101, 200), (180, 280, 201, 300), (280, 400, 301, 400), (400, float('inf'), 401, 500)],
    'CO (mg/m3)': [(0, 1, 0, 50), (1, 2, 51, 100), (2, 10, 101, 200), (10, 17, 201, 300), (17, 34, 301, 400), (34, float('inf'), 401, 500)],
    'SO2 (ug/m3)': [(0, 40, 0, 50), (40, 80, 51, 100), (80, 380, 101, 200), (380, 800, 201, 300), (800, 1600, 301, 400), (1600, float('inf'), 401, 500)]
}

# Function to calculate AQI sub-index
def calculate_sub_index(value, breakpoints):
    for (low, high, aqi_low, aqi_high) in breakpoints:
        if low <= value <= high:
            return (aqi_high - aqi_low) / (high - low) * (value - low) + aqi_low
    return None

# Calculate AQI for each pollutant
df['AQI_PM2.5'] = df['PM2.5 (ug/m3) (24HR)'].apply(lambda x: calculate_sub_index(x, aqi_breakpoints['PM2.5 (ug/m3)']))
df['AQI_PM10'] = df['PM10 (ug/m3) (24HR)'].apply(lambda x: calculate_sub_index(x, aqi_breakpoints['PM10 (ug/m3)']))
df['AQI_NO2'] = df['NO2 (ug/m3) (24HR)'].apply(lambda x: calculate_sub_index(x, aqi_breakpoints['NO2 (ug/m3)']))
df['AQI_CO'] = df['CO (mg/m3) (8HR)'].apply(lambda x: calculate_sub_index(x, aqi_breakpoints['CO (mg/m3)']))
df['AQI_SO2'] = df['SO2 (ug/m3) (24HR)'].apply(lambda x: calculate_sub_index(x, aqi_breakpoints['SO2 (ug/m3)']))

# Calculate overall AQI
df['AQI'] = df[['AQI_PM2.5', 'AQI_PM10', 'AQI_NO2', 'AQI_CO', 'AQI_SO2']].max(axis=1)


# Save the updated dataframe to a new CSV file
output_path = './AQI_Weather_Data.csv'
df.to_csv(output_path, index=False)

output_path


Date_Start Time         0
Sr.NO                   0
End Time                0
PM2.5 (ug/m3)           0
PM10 (ug/m3)            0
NO2 (ug/m3)             0
NH3 (ug/m3)             0
SO2 (ug/m3)             0
CO (mg/m3)              0
Ozone (ug/m3)           0
PM2.5 (ug/m3) (24HR)    0
PM2.5 (ug/m3) (MIN)     0
PM2.5 (ug/m3) (MAX)     0
PM10 (ug/m3) (24HR)     0
PM10 (ug/m3) (MIN)      0
PM10 (ug/m3) (MAX)      0
NO2 (ug/m3) (24HR)      0
NO2 (ug/m3) (MIN)       0
NO2 (ug/m3) (MAX)       0
Ozone (ug/m3) (8HR)     0
Ozone (ug/m3) (MIN)     0
Ozone (ug/m3) (MAX)     0
CO (mg/m3) (8HR)        0
CO (mg/m3) (MIN)        0
CO (mg/m3) (MAX)        0
SO2 (ug/m3) (24HR)      0
SO2 (ug/m3) (MIN)       0
SO2 (ug/m3) (MAX)       0
NH3 (ug/m3) (24HR)      0
NH3 (ug/m3) (MIN)       0
NH3 (ug/m3) (MAX)       0
dtype: int64


'./AQI_Weather_Data.csv'

In [19]:
df.isna().sum()

Date_Start Time         0
Sr.NO                   0
End Time                0
PM2.5 (ug/m3)           0
PM10 (ug/m3)            0
NO2 (ug/m3)             0
NH3 (ug/m3)             0
SO2 (ug/m3)             0
CO (mg/m3)              0
Ozone (ug/m3)           0
PM2.5 (ug/m3) (24HR)    0
PM2.5 (ug/m3) (MIN)     0
PM2.5 (ug/m3) (MAX)     0
PM10 (ug/m3) (24HR)     0
PM10 (ug/m3) (MIN)      0
PM10 (ug/m3) (MAX)      0
NO2 (ug/m3) (24HR)      0
NO2 (ug/m3) (MIN)       0
NO2 (ug/m3) (MAX)       0
Ozone (ug/m3) (8HR)     0
Ozone (ug/m3) (MIN)     0
Ozone (ug/m3) (MAX)     0
CO (mg/m3) (8HR)        0
CO (mg/m3) (MIN)        0
CO (mg/m3) (MAX)        0
SO2 (ug/m3) (24HR)      0
SO2 (ug/m3) (MIN)       0
SO2 (ug/m3) (MAX)       0
NH3 (ug/m3) (24HR)      0
NH3 (ug/m3) (MIN)       0
NH3 (ug/m3) (MAX)       0
AQI_PM2.5               0
AQI_PM10                0
AQI_NO2                 0
AQI_CO                  0
AQI_SO2                 0
AQI                     0
dtype: int64