In [75]:
import openmeteo_requests

import requests_cache
import pandas as pd
from retry_requests import retry
pd.set_option('display.max_columns', None)

In [76]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

In [77]:
url = "https://historical-forecast-api.open-meteo.com/v1/forecast"
params = {
	"latitude": 51.8959,
	"longitude": 0.8919,
	"start_date": "2022-01-01",
	"end_date": "2024-10-31",
	"hourly": ["temperature_2m", "relative_humidity_2m", "apparent_temperature", "precipitation", "rain", "showers", "snowfall", "weather_code", "pressure_msl", "surface_pressure", "cloud_cover", "wind_speed_10m", "wind_direction_10m", "wind_gusts_10m", "is_day"]
}
responses = openmeteo.weather_api(url, params=params)

In [78]:
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

Coordinates 51.900001525878906°N 0.8999996185302734°E
Elevation 9.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s


In [79]:
# Current values. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
hourly_apparent_temperature = hourly.Variables(2).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(3).ValuesAsNumpy()
hourly_rain = hourly.Variables(4).ValuesAsNumpy()
hourly_showers = hourly.Variables(5).ValuesAsNumpy()
hourly_snowfall = hourly.Variables(6).ValuesAsNumpy()
hourly_weather_code = hourly.Variables(7).ValuesAsNumpy()
hourly_pressure_msl = hourly.Variables(8).ValuesAsNumpy()
hourly_surface_pressure = hourly.Variables(9).ValuesAsNumpy()
hourly_cloud_cover = hourly.Variables(10).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(11).ValuesAsNumpy()
hourly_wind_direction_10m = hourly.Variables(12).ValuesAsNumpy()
hourly_wind_gusts_10m = hourly.Variables(13).ValuesAsNumpy()
hourly_is_day = hourly.Variables(14).ValuesAsNumpy()

In [80]:
hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}

In [81]:
hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
hourly_data["apparent_temperature"] = hourly_apparent_temperature
hourly_data["precipitation"] = hourly_precipitation
hourly_data["rain"] = hourly_rain
hourly_data["showers"] = hourly_showers
hourly_data["snowfall"] = hourly_snowfall
hourly_data["pressure_msl"] = hourly_pressure_msl
hourly_data["surface_pressure"] = hourly_surface_pressure
hourly_data["cloud_cover"] = hourly_cloud_cover
hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
hourly_data["wind_direction_10m"] = hourly_wind_direction_10m
hourly_data["wind_gusts_10m"] = hourly_wind_gusts_10m
hourly_data["is_day"] = hourly_is_day

In [82]:
hourly_dataframe = pd.DataFrame(data = hourly_data)
hourly_dataframe.head()

Unnamed: 0,date,temperature_2m,relative_humidity_2m,apparent_temperature,precipitation,rain,showers,snowfall,pressure_msl,surface_pressure,cloud_cover,wind_speed_10m,wind_direction_10m,wind_gusts_10m,is_day
0,2022-01-01 00:00:00+00:00,11.0215,93.0,8.542295,0.0,0.0,0.0,0.0,1019.900024,1018.797241,7.0,16.981165,212.005341,51.119999,0.0
1,2022-01-01 01:00:00+00:00,10.5215,95.0,8.32338,0.0,0.0,0.0,0.0,1019.700012,1018.59552,19.0,14.707222,201.541046,43.560001,0.0
2,2022-01-01 02:00:00+00:00,10.721499,96.0,8.677092,0.0,0.0,0.0,0.0,1019.799988,1018.696106,100.0,14.345898,197.525665,39.959999,0.0
3,2022-01-01 03:00:00+00:00,11.0715,97.0,9.138726,0.0,0.0,0.0,0.0,1019.700012,1018.597839,100.0,14.587776,195.751236,42.119999,0.0
4,2022-01-01 04:00:00+00:00,11.221499,96.0,8.969191,0.0,0.0,0.0,0.0,1019.5,1018.398254,100.0,16.766108,194.931473,49.32,0.0


In [83]:
# Ensure the 'date' column is in datetime format
hourly_dataframe['date'] = pd.to_datetime(hourly_dataframe['date'])

# Extract the month and hour from the 'date' column
hourly_dataframe['year'] = hourly_dataframe['date'].dt.year
hourly_dataframe['month'] = hourly_dataframe['date'].dt.month
hourly_dataframe['day'] = hourly_dataframe['date'].dt.day
hourly_dataframe['hour'] = hourly_dataframe['date'].dt.hour


# Remove the time component from the 'date' column, keeping only the date
hourly_dataframe['date'] = hourly_dataframe['date'].dt.date

In [84]:
hourly_dataframe.head()

Unnamed: 0,date,temperature_2m,relative_humidity_2m,apparent_temperature,precipitation,rain,showers,snowfall,pressure_msl,surface_pressure,cloud_cover,wind_speed_10m,wind_direction_10m,wind_gusts_10m,is_day,year,month,day,hour
0,2022-01-01,11.0215,93.0,8.542295,0.0,0.0,0.0,0.0,1019.900024,1018.797241,7.0,16.981165,212.005341,51.119999,0.0,2022,1,1,0
1,2022-01-01,10.5215,95.0,8.32338,0.0,0.0,0.0,0.0,1019.700012,1018.59552,19.0,14.707222,201.541046,43.560001,0.0,2022,1,1,1
2,2022-01-01,10.721499,96.0,8.677092,0.0,0.0,0.0,0.0,1019.799988,1018.696106,100.0,14.345898,197.525665,39.959999,0.0,2022,1,1,2
3,2022-01-01,11.0715,97.0,9.138726,0.0,0.0,0.0,0.0,1019.700012,1018.597839,100.0,14.587776,195.751236,42.119999,0.0,2022,1,1,3
4,2022-01-01,11.221499,96.0,8.969191,0.0,0.0,0.0,0.0,1019.5,1018.398254,100.0,16.766108,194.931473,49.32,0.0,2022,1,1,4


In [85]:
hourly_dataframe.isna().sum()

date                    0
temperature_2m          0
relative_humidity_2m    0
apparent_temperature    0
precipitation           0
rain                    0
showers                 0
snowfall                0
pressure_msl            0
surface_pressure        0
cloud_cover             0
wind_speed_10m          0
wind_direction_10m      0
wind_gusts_10m          0
is_day                  0
year                    0
month                   0
day                     0
hour                    0
dtype: int64

In [86]:
# from sklearn.impute import KNNImputer

# features_for_imputation = ['temperature_2m', 'precipitation', 'snowfall', 'cloud_cover', 'weather_code']

# # Subset the data
# subset = hourly_dataframe[features_for_imputation]

# # Apply KNN Imputation
# knn_imputer = KNNImputer(n_neighbors=5)  # You can adjust n_neighbors
# imputed_data = knn_imputer.fit_transform(subset)

# # Replace the original snow_depth column with imputed values
# hourly_dataframe['weather_code'] = imputed_data[:, features_for_imputation.index('weather_code')]

In [87]:
hourly_dataframe.to_csv('Data/raw.csv', index = False)

In [None]:
def categorize_weather(row):
    # Extract relevant values
    precipitation = row['precipitation']
    snowfall = row['snowfall']
    cloud_cover = row['cloud_cover']

    # Sunny
    if precipitation == 0 and cloud_cover < 25:
        return 'sunny'
    
    # Cloudy
    elif precipitation == 0 and cloud_cover >= 25:
        return 'cloudy'
    
    # Rain and Sunny
    elif precipitation > 0 and cloud_cover < 50:
        return 'rain and sunny'
    
    # Rain and Cloudy
    elif precipitation > 0 and cloud_cover >= 50:
        return 'rain and cloudy'
    
    # Rain and Snow
    elif precipitation > 0 and snowfall > 0:
        return 'rain and snow'
    
    # Snow and Sunny
    elif snowfall > 0 and cloud_cover < 50:
        return 'snow and sunny'
    
    # Snow and Cloudy
    elif snowfall > 0 and cloud_cover >= 50:
        return 'snow and cloudy'

# Apply the function to create the new column
hourly_dataframe['weather_category'] = hourly_dataframe.apply(categorize_weather, axis=1)


In [125]:
# Display the updated DataFrame with weather categories
hourly_dataframe[['temperature_2m', 'precipitation', 'rain', 'snowfall', 'cloud_cover', 'weather_category']].head()

Unnamed: 0,temperature_2m,precipitation,rain,snowfall,cloud_cover,weather_category
0,4.5625,0.0,0.0,0.0,27.0,cloudy
1,4.7125,0.0,0.0,0.0,76.0,cloudy
2,4.7625,0.0,0.0,0.0,83.0,cloudy
3,4.9625,0.0,0.0,0.0,95.0,cloudy
4,5.3125,0.0,0.0,0.0,100.0,cloudy
