# 1. Setup and Installation
First, let's install the necessary libraries for this exploration. We'll need `requests` to fetch data from the API, `pandas` for data manipulation, and `plotly` for interactive visualizations.

In [None]:
!pip install requests pandas plotly kaleido

# 2. Import Libraries and Define Constants
Now, we import the libraries and define the constants we'll use throughout the notebook, including the API key and the coordinates for Islamabad.

In [1]:
import requests
import pandas as pd
import plotly.express as px
from datetime import datetime, timedelta

# --- Constants ---
# IMPORTANT: Replace with your actual OpenWeatherMap API key
API_KEY = "3e5573c559d066b9120b40bc0c08617d" 
ISB_LAT = 33.7380
ISB_LON = 73.0845

# 3. Fetch Sample Data
We will fetch data from three different API endpoints:
1.  **Air Pollution (Historical):** To get past air quality data (like PM2.5).
2.  **History (Weather):** To get past weather data (like temperature, humidity).
3.  **Hourly Forecast (Weather):** To see the structure of future weather predictions.

We'll fetch data for a small, recent time window for this initial exploration.

In [2]:
# Define time range (e.g., last 48 hours)
end_dt = datetime.now()
start_dt = end_dt - timedelta(days=2)
start_timestamp = int(start_dt.timestamp())
end_timestamp = int(end_dt.timestamp())

# 1. Air Pollution API (historical)
pollution_url = f"http://api.openweathermap.org/data/2.5/air_pollution/history?lat={ISB_LAT}&lon={ISB_LON}&start={start_timestamp}&end={end_timestamp}&appid={API_KEY}"
pollution_response = requests.get(pollution_url)
pollution_data = pollution_response.json()

# 2. History API (weather)
weather_history_url = f"https://history.openweathermap.org/data/2.5/history/city?lat={ISB_LAT}&lon={ISB_LON}&type=hour&start={start_timestamp}&end={end_timestamp}&appid={API_KEY}"
weather_history_response = requests.get(weather_history_url)
weather_history_data = weather_history_response.json()

# 3. Hourly Forecast 4 days (weather)
weather_forecast_url = f"http://api.openweathermap.org/data/2.5/forecast/hourly?lat={ISB_LAT}&lon={ISB_LON}&appid={API_KEY}&cnt=96" # 96 hours = 4 days
weather_forecast_response = requests.get(weather_forecast_url)
weather_forecast_data = weather_forecast_response.json()

print("--- Sample Air Pollution Data ---")
print(pollution_data['list'][0])
print("\n--- Sample Weather History Data ---")
print(weather_history_data['list'][0])
print("\n--- Sample Weather Forecast Data ---")
print(weather_forecast_data['list'][0])

--- Sample Air Pollution Data ---
{'main': {'aqi': 5}, 'components': {'co': 1112.98, 'no': 0, 'no2': 8.14, 'o3': 77.29, 'so2': 4.67, 'pm2_5': 188.25, 'pm10': 279.33, 'nh3': 8.6}, 'dt': 1762210800}

--- Sample Weather History Data ---
{'dt': 1762210800, 'main': {'temp': 290.51, 'feels_like': 290.16, 'pressure': 1008, 'humidity': 71, 'temp_min': 290.51, 'temp_max': 290.51}, 'wind': {'speed': 3.29, 'deg': 86, 'gust': 3.79}, 'clouds': {'all': 0}, 'weather': [{'id': 800, 'main': 'Clear', 'description': 'clear sky', 'icon': '01n'}]}

--- Sample Weather Forecast Data ---
{'dt': 1762383600, 'main': {'temp': 287.58, 'feels_like': 286.72, 'temp_min': 284.66, 'temp_max': 287.58, 'pressure': 1014, 'sea_level': 1014, 'grnd_level': 930, 'humidity': 63, 'temp_kf': 2.92}, 'weather': [{'id': 800, 'main': 'Clear', 'description': 'clear sky', 'icon': '01n'}], 'clouds': {'all': 0}, 'wind': {'speed': 2.47, 'deg': 3, 'gust': 2.26}, 'visibility': 10000, 'pop': 0, 'sys': {'pod': 'n'}, 'dt_txt': '2025-11-05 23

# 4. Convert to Pandas DataFrames
The raw data is in JSON format. We'll convert the historical pollution and weather data into pandas DataFrames to make them easier to work with. This involves extracting the relevant fields from the nested JSON structure.

In [3]:
# Pollution DataFrame
pollution_df = pd.DataFrame(pollution_data['list'])
pollution_df['pm2_5'] = pollution_df['components'].apply(lambda x: x.get('pm2_5'))
pollution_df['no2'] = pollution_df['components'].apply(lambda x: x.get('no2'))
pollution_df['o3'] = pollution_df['components'].apply(lambda x: x.get('o3'))
pollution_df = pollution_df[['dt', 'pm2_5', 'no2', 'o3']]
pollution_df['timestamp'] = pd.to_datetime(pollution_df['dt'], unit='s')

# Weather DataFrame
weather_df = pd.DataFrame(weather_history_data['list'])
weather_df['temp'] = weather_df['main'].apply(lambda x: x.get('temp'))
weather_df['humidity'] = weather_df['main'].apply(lambda x: x.get('humidity'))
weather_df['wind_speed'] = weather_df['wind'].apply(lambda x: x.get('speed'))
weather_df = weather_df[['dt', 'temp', 'humidity', 'wind_speed']]
weather_df['timestamp'] = pd.to_datetime(weather_df['dt'], unit='s')

print("--- Pollution DataFrame ---")
display(pollution_df.head())
print("\n--- Weather DataFrame ---")
display(weather_df.head())

--- Pollution DataFrame ---


Unnamed: 0,dt,pm2_5,no2,o3,timestamp
0,1762210800,188.25,8.14,77.29,2025-11-03 23:00:00
1,1762214400,196.54,6.65,85.21,2025-11-04 00:00:00
2,1762218000,200.15,5.65,94.32,2025-11-04 01:00:00
3,1762221600,196.42,5.92,94.52,2025-11-04 02:00:00
4,1762225200,189.39,7.3,92.15,2025-11-04 03:00:00



--- Weather DataFrame ---


Unnamed: 0,dt,temp,humidity,wind_speed,timestamp
0,1762210800,290.51,71,3.29,2025-11-03 23:00:00
1,1762214400,289.95,73,3.06,2025-11-04 00:00:00
2,1762218000,289.4,75,2.98,2025-11-04 01:00:00
3,1762221600,289.4,74,3.25,2025-11-04 02:00:00
4,1762225200,290.51,71,3.24,2025-11-04 03:00:00


# 5. Merge DataFrames
To analyze the relationship between weather and air quality, we need to combine these two datasets. We'll merge them based on the `dt` (timestamp) column. Since the timestamps might not be perfectly aligned, `pd.merge_asof` is a good choice to find the nearest match.

In [4]:
# Sort by 'dt' before merging
pollution_df = pollution_df.sort_values('dt')
weather_df = weather_df.sort_values('dt')

# Merge the two dataframes
merged_df = pd.merge_asof(pollution_df, weather_df, on='dt', direction='nearest', suffixes=('_pol', '_wea'))

print("--- Merged DataFrame ---")
display(merged_df.head())
print("\n--- Data Quality Check (Missing Values) ---")
print(merged_df.isnull().sum())

--- Merged DataFrame ---


Unnamed: 0,dt,pm2_5,no2,o3,timestamp_pol,temp,humidity,wind_speed,timestamp_wea
0,1762210800,188.25,8.14,77.29,2025-11-03 23:00:00,290.51,71,3.29,2025-11-03 23:00:00
1,1762214400,196.54,6.65,85.21,2025-11-04 00:00:00,289.95,73,3.06,2025-11-04 00:00:00
2,1762218000,200.15,5.65,94.32,2025-11-04 01:00:00,289.4,75,2.98,2025-11-04 01:00:00
3,1762221600,196.42,5.92,94.52,2025-11-04 02:00:00,289.4,74,3.25,2025-11-04 02:00:00
4,1762225200,189.39,7.3,92.15,2025-11-04 03:00:00,290.51,71,3.24,2025-11-04 03:00:00



--- Data Quality Check (Missing Values) ---
dt               0
pm2_5            0
no2              0
o3               0
timestamp_pol    0
temp             0
humidity         0
wind_speed       0
timestamp_wea    0
dtype: int64


# 6. Basic Analysis and Visualization
Now we can perform some initial analysis. We'll focus on **PM2.5** as our primary target for prediction. A simple time-series plot will show us how it has changed over the fetched period. We can also look at the correlation between different variables.

In [5]:
# Plot PM2.5 over time
fig = px.line(merged_df, x='timestamp_pol', y='pm2_5', title='PM2.5 Concentration in Islamabad (Last 48 Hours)')
fig.update_xaxes(title_text='Timestamp')
fig.update_yaxes(title_text='PM2.5 (μg/m³)')
fig.show()

# Plot all pollutants
fig2 = px.line(merged_df, x='timestamp_pol', y=['pm2_5', 'no2', 'o3'], title='Air Pollutants in Islamabad')
fig2.update_xaxes(title_text='Timestamp')
fig2.update_yaxes(title_text='Concentration (μg/m³)')
fig2.show()

# Correlation matrix
correlation_matrix = merged_df[['pm2_5', 'temp', 'humidity', 'wind_speed']].corr()
print("\n--- Correlation Matrix ---")
display(correlation_matrix)

# Plot correlation heatmap
fig3 = px.imshow(correlation_matrix, text_auto=True, title='Correlation Between PM2.5 and Weather')
fig3.show()

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed