In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
file_path = '/content/Air Quality data.csv'  # Replace with your file's path
data = pd.read_csv(file_path)

# Data Cleaning
data['Date of the record'] = pd.to_datetime(data['Date of the record'], errors='coerce')
data = data.dropna(subset=['Air Quality Index (AQI) value'])  # Drop rows with missing AQI
for col in ['PM2.5 levels', 'PM10 levels.', 'CO (Carbon Monoxide) levels', 'NOx levels']:
    data[col] = pd.to_numeric(data[col], errors='coerce')

# Simulate Improvements (based on recommendations)
# Assuming reductions in pollutant levels:
reductions = {
    'PM2.5 levels': 0.3,  # 30% reduction
    'PM10 levels.': 0.25,  # 25% reduction
    'CO (Carbon Monoxide) levels': 0.2,  # 20% reduction
    'NOx levels': 0.15  # 15% reduction
}

# Create a copy of the dataset for "after" scenario
data_after = data.copy()

# Apply reductions
for pollutant, reduction in reductions.items():
    data_after[pollutant] = data_after[pollutant] * (1 - reduction)

# Recalculate AQI (simplified assumption: AQI is proportional to pollutants)
# This can be replaced with a more complex formula if available
data_after['Air Quality Index (AQI) value'] = (
    data_after[['PM2.5 levels', 'PM10 levels.', 'CO (Carbon Monoxide) levels', 'NOx levels']].mean(axis=1)
)

# Comparison Graphs
# 1. City-wise AQI Before vs After
city_aqi_before = data.groupby('Name of the city')['Air Quality Index (AQI) value'].mean()
city_aqi_after = data_after.groupby('Name of the city')['Air Quality Index (AQI) value'].mean()

city_comparison = pd.DataFrame({'Before': city_aqi_before, 'After': city_aqi_after}).sort_values(by='Before', ascending=False)

city_comparison.plot(kind='bar', figsize=(12, 6), width=0.8)
plt.title('City-wise AQI: Before vs After Implementation')
plt.ylabel('Average AQI')
plt.xlabel('City')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# 2. AQI Trends Over Time (Before vs After)
data['Month'] = data['Date of the record'].dt.month
data_after['Month'] = data_after['Date of the record'].dt.month

monthly_aqi_before = data.groupby('Month')['Air Quality Index (AQI) value'].mean()
monthly_aqi_after = data_after.groupby('Month')['Air Quality Index (AQI) value'].mean()

plt.figure(figsize=(10, 5))
plt.plot(monthly_aqi_before.index, monthly_aqi_before.values, marker='o', label='Before', color='red')
plt.plot(monthly_aqi_after.index, monthly_aqi_after.values, marker='o', label='After', color='green')
plt.title('Monthly AQI: Before vs After Implementation')
plt.xlabel('Month')
plt.ylabel('Average AQI')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
