In [1]:
import pandas as pd
import numpy as np

In [4]:
df=pd.read_csv("C:\\Users\\Chinmaya holla\\Desktop\\Airpollution\\Data\\Processed_data\\analysis_ready_data.csv")
df['Timestamp'] = pd.to_datetime(df['Timestamp'],dayfirst=True)
df = df.sort_values(['Station', 'Timestamp'])

In [6]:
def create_features(data):
    df_eng = data.copy()
    
    # Lag Features (Past context)
    df_eng['PM2_5_Lag1'] = df_eng.groupby('Station')['PM2_5'].shift(1)
    df_eng['PM2_5_Lag2'] = df_eng.groupby('Station')['PM2_5'].shift(2)
    df_eng['PM2_5_Lag7'] = df_eng.groupby('Station')['PM2_5'].shift(7)
    
    # Rolling Mean (Trend context)
    df_eng['Rolling_Mean_3'] = df_eng.groupby('Station')['PM2_5'].transform(lambda x: x.shift(1).rolling(window=3).mean())
    
    # Date Features (Seasonal context)
    df_eng['Month'] = df_eng['Timestamp'].dt.month
    df_eng['DayOfWeek'] = df_eng['Timestamp'].dt.dayofweek
    
    # Drop the first 7 days (which have NaNs due to lags)
    df_eng = df_eng.dropna()
    return df_eng

df_features = create_features(df)

# 3. Encoding (The "Station" Logic)
# We convert 'Station' column into numbers (0 and 1)
print("Encoding stations...")
df_final = pd.get_dummies(df_features, columns=['Station'])

# 4. Save to Folder
# This file is now 100% ready for ANY model (Random Forest, XGBoost, etc.)
output_path = '../data/Processed_data/model_ready_data.csv'
df_final.to_csv(output_path, index=False)

print("-" * 30)
print(f"Success! Preprocessing complete.")
print(f"Model-ready data saved to: {output_path}")
print(f"Columns created: {list(df_final.columns)}")
print("-" * 30)

Encoding stations...
------------------------------
Success! Preprocessing complete.
Model-ready data saved to: ../data/Processed_data/model_ready_data.csv
Columns created: ['Timestamp', 'PM2_5', 'PM10', 'NO', 'NO2', 'NOx', 'NH3', 'SO2', 'CO', 'Ozone', 'Benzene', 'Toluene', 'AT_C', 'RH_Percent', 'WD_deg', 'BP_mmHg', 'PM2_5_AQI', 'PM2_5_Lag1', 'PM2_5_Lag2', 'PM2_5_Lag7', 'Rolling_Mean_3', 'Month', 'DayOfWeek', 'Station_BapujiNagar', 'Station_Hombegowda', 'Station_Jayanagar', 'Station_Kadabasenahalli', 'Station_RVCE', 'Station_Silkboard']
------------------------------
