# This Jupyter notebook demonstrates the process of retrieving weather data using the Open-Meteo API and processing it for a specific location

In [1]:
!pip install openmeteo-requests
!pip install requests-cache retry-requests

Collecting openmeteo-requests
  Downloading openmeteo_requests-1.1.0-py3-none-any.whl (5.5 kB)
Collecting openmeteo-sdk>=1.4.0
  Downloading openmeteo_sdk-1.7.2-py3-none-any.whl (12 kB)
Installing collected packages: openmeteo-sdk, openmeteo-requests
Successfully installed openmeteo-requests-1.1.0 openmeteo-sdk-1.7.2

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3.1[0m[39;49m -> [0m[32;49m23.3.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Collecting requests-cache
  Downloading requests_cache-1.1.1-py3-none-any.whl (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.3/60.3 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0mta [36m0:00:01[0m
[?25hCollecting retry-requests
  Downloading retry_requests-2.0.0-py3-none-any.whl (15 kB)
Collecting url-normalize>=1.4
  Downloading url_normalize-1.4.3-py2.py3-none-any.whl (6.8 kB)
Coll

In [4]:
import openmeteo_requests

import requests_cache
import pandas as pd
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
  "latitude": 50.4547,
  "longitude": 30.5238,
  "start_date": "2019-01-01",
  "end_date": "2024-01-29",
  "hourly": ["temperature_2m", "relative_humidity_2m", "apparent_temperature", "precipitation", "rain", "surface_pressure", "cloud_cover", "wind_speed_100m", "wind_direction_100m"]
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°E {response.Longitude()}°N")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
hourly_apparent_temperature = hourly.Variables(2).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(3).ValuesAsNumpy()
hourly_rain = hourly.Variables(4).ValuesAsNumpy()
hourly_surface_pressure = hourly.Variables(5).ValuesAsNumpy()
hourly_cloud_cover = hourly.Variables(6).ValuesAsNumpy()
hourly_wind_speed_100m = hourly.Variables(7).ValuesAsNumpy()
hourly_wind_direction_100m = hourly.Variables(8).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
  start = pd.to_datetime(hourly.Time(), unit = "s"),
  end = pd.to_datetime(hourly.TimeEnd(), unit = "s"),
  freq = pd.Timedelta(seconds = hourly.Interval()),
  inclusive = "left"
)}
hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
hourly_data["apparent_temperature"] = hourly_apparent_temperature
hourly_data["precipitation"] = hourly_precipitation
hourly_data["rain"] = hourly_rain
hourly_data["surface_pressure"] = hourly_surface_pressure
hourly_data["cloud_cover"] = hourly_cloud_cover
hourly_data["wind_speed_100m"] = hourly_wind_speed_100m
hourly_data["wind_direction_100m"] = hourly_wind_direction_100m

hourly_dataframe = pd.DataFrame(data = hourly_data)
print(hourly_dataframe)


Coordinates 50.43936538696289°E 30.476192474365234°N
Elevation 188.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
                     date  temperature_2m  relative_humidity_2m  \
0     2019-01-01 00:00:00         -2.2425             91.135132   
1     2019-01-01 01:00:00         -2.2925             91.131615   
2     2019-01-01 02:00:00         -2.3925             91.807915   
3     2019-01-01 03:00:00         -2.0925             92.858681   
4     2019-01-01 04:00:00         -1.6925             93.226448   
...                   ...             ...                   ...   
44515 2024-01-29 19:00:00             NaN                   NaN   
44516 2024-01-29 20:00:00             NaN                   NaN   
44517 2024-01-29 21:00:00             NaN                   NaN   
44518 2024-01-29 22:00:00             NaN                   NaN   
44519 2024-01-29 23:00:00             NaN                   NaN   

       apparent_temperature  precipitation  rain  surface_pressure  

In [5]:
hourly_dataframe.head(5)

Unnamed: 0,date,temperature_2m,relative_humidity_2m,apparent_temperature,precipitation,rain,surface_pressure,cloud_cover,wind_speed_100m,wind_direction_100m
0,2019-01-01 00:00:00,-2.2425,91.135132,-6.811514,0.1,0.0,1002.210876,100.0,24.675106,203.198608
1,2019-01-01 01:00:00,-2.2925,91.131615,-6.956277,0.0,0.0,1001.71814,100.0,25.537172,201.501495
2,2019-01-01 02:00:00,-2.3925,91.807915,-7.015568,0.0,0.0,1001.220947,100.0,25.759504,206.564987
3,2019-01-01 03:00:00,-2.0925,92.858681,-6.601203,0.1,0.0,1000.563599,100.0,24.464113,212.988525
4,2019-01-01 04:00:00,-1.6925,93.226448,-6.070883,0.1,0.0,1000.012573,100.0,23.4,216.86998


In [6]:
hourly_dataframe.nunique()

date                    44520
temperature_2m           1039
relative_humidity_2m    36549
apparent_temperature    44427
precipitation              68
rain                       95
surface_pressure        37949
cloud_cover               829
wind_speed_100m          4515
wind_direction_100m     14606
dtype: int64

In [7]:
hourly_dataframe.isna().sum()

date                     0
temperature_2m          48
relative_humidity_2m    48
apparent_temperature    48
precipitation           48
rain                    48
surface_pressure        48
cloud_cover             48
wind_speed_100m         48
wind_direction_100m     48
dtype: int64

In [11]:
hourly_dataframe[hourly_dataframe['temperature_2m'].isnull()]

Unnamed: 0,date,temperature_2m,relative_humidity_2m,apparent_temperature,precipitation,rain,surface_pressure,cloud_cover,wind_speed_100m,wind_direction_100m
44472,2024-01-28 00:00:00,,,,,,,,,
44473,2024-01-28 01:00:00,,,,,,,,,
44474,2024-01-28 02:00:00,,,,,,,,,
44475,2024-01-28 03:00:00,,,,,,,,,
44476,2024-01-28 04:00:00,,,,,,,,,
44477,2024-01-28 05:00:00,,,,,,,,,
44478,2024-01-28 06:00:00,,,,,,,,,
44479,2024-01-28 07:00:00,,,,,,,,,
44480,2024-01-28 08:00:00,,,,,,,,,
44481,2024-01-28 09:00:00,,,,,,,,,


In [12]:
filtered_df = hourly_dataframe[hourly_dataframe['date'] < '2024-01-28 00:00:00']

In [13]:
filtered_df.isna().sum()

date                    0
temperature_2m          0
relative_humidity_2m    0
apparent_temperature    0
precipitation           0
rain                    0
surface_pressure        0
cloud_cover             0
wind_speed_100m         0
wind_direction_100m     0
dtype: int64

In [14]:
filtered_df.describe().T

Unnamed: 0,count,mean,min,25%,50%,75%,max,std
date,44472.0,2021-07-15 11:30:00,2019-01-01 00:00:00,2020-04-08 05:45:00,2021-07-15 11:30:00,2022-10-21 17:15:00,2024-01-27 23:00:00,
temperature_2m,44472.0,9.456667,-23.0425,1.3075,8.907499,17.3575,34.307499,9.72684
relative_humidity_2m,44472.0,73.570244,14.91567,60.913239,77.097752,88.888327,100.0,18.094131
apparent_temperature,44472.0,6.688212,-28.68,-2.988262,5.744978,16.118525,36.315945,11.499243
precipitation,44472.0,0.070822,0.0,0.0,0.0,0.0,10.5,0.3184
rain,44472.0,0.059662,0.0,0.0,0.0,0.0,10.5,0.305727
surface_pressure,44472.0,993.240417,961.857056,988.106506,993.33606,998.552612,1022.762573,8.196391
cloud_cover,44472.0,53.637917,0.0,14.1,53.100002,98.1,100.0,39.111752
wind_speed_100m,44472.0,21.672243,0.0,15.379206,21.413191,27.534704,73.929832,9.00713
wind_direction_100m,44472.0,202.341827,0.572935,123.023878,214.676819,291.644501,360.0,103.061142


In [15]:
filtered_df.to_csv('data/kyiv_hourly.csv', index=False)