In [13]:
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry

In [4]:
df = pd.read_csv("../../data/lahore-us-embassy-air-quality.csv", parse_dates=['date'], skipinitialspace=True)

In [5]:
air_quality_df = df[["date", "pm25"]]
air_quality_df["country"] = "pakistan"
air_quality_df["city"] = "lahore"
air_quality_df["street"] = "lahore-us-embassy"
air_quality_df["url"] = "https://aqicn.org/city/pakistan/lahore/us-embassy/"

In [6]:
air_quality_df.isna().sum()

date       0
pm25       0
country    0
city       0
street     0
url        0
dtype: int64

In [7]:
air_quality_df.dropna(inplace=True)

In [8]:
air_quality_df

Unnamed: 0,date,pm25,country,city,street,url
0,2024-11-01,209,pakistan,lahore,lahore-us-embassy,https://aqicn.org/city/pakistan/lahore/us-emba...
1,2024-11-02,319,pakistan,lahore,lahore-us-embassy,https://aqicn.org/city/pakistan/lahore/us-emba...
2,2024-11-03,396,pakistan,lahore,lahore-us-embassy,https://aqicn.org/city/pakistan/lahore/us-emba...
3,2024-11-04,299,pakistan,lahore,lahore-us-embassy,https://aqicn.org/city/pakistan/lahore/us-emba...
4,2024-11-05,378,pakistan,lahore,lahore-us-embassy,https://aqicn.org/city/pakistan/lahore/us-emba...
...,...,...,...,...,...,...
1737,2019-06-26,166,pakistan,lahore,lahore-us-embassy,https://aqicn.org/city/pakistan/lahore/us-emba...
1738,2019-06-27,135,pakistan,lahore,lahore-us-embassy,https://aqicn.org/city/pakistan/lahore/us-emba...
1739,2019-06-28,186,pakistan,lahore,lahore-us-embassy,https://aqicn.org/city/pakistan/lahore/us-emba...
1740,2019-06-29,151,pakistan,lahore,lahore-us-embassy,https://aqicn.org/city/pakistan/lahore/us-emba...


Lat and long are 31.559945, 74.336052

## Get weather data from meteo historical weather api

In [16]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

In [20]:
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": 31.559945,
	"longitude": 74.336052,
	"start_date": "2019-06-30",
	"end_date": "2024-11-03",
	"hourly": ["temperature_2m", "precipitation", "wind_speed_10m", "wind_direction_10m"]
}
responses = openmeteo.weather_api(url, params=params)

In [21]:
# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(1).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(2).ValuesAsNumpy()
hourly_wind_direction_10m = hourly.Variables(3).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}
hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["precipitation"] = hourly_precipitation
hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
hourly_data["wind_direction_10m"] = hourly_wind_direction_10m

hourly_dataframe = pd.DataFrame(data = hourly_data)
print(hourly_dataframe)

Coordinates 31.52899742126465°N 74.28229522705078°E
Elevation 217.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
                           date  temperature_2m  precipitation  \
0     2019-06-30 00:00:00+00:00       29.778500            0.0   
1     2019-06-30 01:00:00+00:00       30.378500            0.0   
2     2019-06-30 02:00:00+00:00       31.978498            0.0   
3     2019-06-30 03:00:00+00:00       33.878502            0.0   
4     2019-06-30 04:00:00+00:00       36.028500            0.0   
...                         ...             ...            ...   
46891 2024-11-03 19:00:00+00:00       21.378500            0.0   
46892 2024-11-03 20:00:00+00:00       19.078499            0.0   
46893 2024-11-03 21:00:00+00:00       18.428499            0.0   
46894 2024-11-03 22:00:00+00:00       17.978498            0.0   
46895 2024-11-03 23:00:00+00:00       19.078499            0.0   

       wind_speed_10m  wind_direction_10m  
0            8.049845          243.43

In [28]:
from functions import util
historical_weather_df = util.get_historical_weather("Lahore", "2019-06-30",  "2024-11-03",  31.559945, 74.336052 )
weather_forecast_df = util.get_hourly_weather_forecast("Lahore", 31.559945, 74.336052)

Coordinates 31.52899742126465°N 74.28229522705078°E
Elevation 217.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
Coordinates 31.5°N 74.25°E
Elevation 217.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s


In [29]:
historical_weather_df

Unnamed: 0,date,temperature_2m_mean,precipitation_sum,wind_speed_10m_max,wind_direction_10m_dominant,city
0,2019-06-30,35.328495,0.0,17.072504,279.071411,Lahore
1,2019-07-01,35.707664,0.0,21.897945,267.265564,Lahore
2,2019-07-02,36.649334,0.0,12.245293,229.049637,Lahore
3,2019-07-03,37.328503,0.0,12.313893,184.561737,Lahore
4,2019-07-04,35.141003,0.0,22.027763,103.664215,Lahore
...,...,...,...,...,...,...
1949,2024-10-30,25.120165,0.0,14.861722,335.210999,Lahore
1950,2024-10-31,23.665998,0.0,14.178927,295.485901,Lahore
1951,2024-11-01,23.120165,0.0,11.726277,301.739075,Lahore
1952,2024-11-02,22.820166,0.0,8.865799,122.566154,Lahore


## Creating and backfilling Feature Groups