# Download weather data for Prague

or any other city

In [1]:
from datetime import datetime

import polars as pl
from meteostat import Hourly, Stations

In [2]:
CITY = "Prague"
COUNTRY = "Czechia"

In [3]:
# Load our cities dataset and find the city coordinates
cities = pl.read_parquet("../data/worldcities.parquet")
city_info = cities.filter(city=CITY, country=COUNTRY).row(0, named=True)
city_info


{'city': 'Prague',
 'city_ascii': 'Prague',
 'lat': 50.0875,
 'lng': 14.4214,
 'country': 'Czechia',
 'iso2': 'CZ',
 'iso3': 'CZE',
 'admin_name': 'Praha',
 'capital': 'primary',
 'population': 1357326,
 'id': 1203744823}

In [4]:
# Find nearby weather stations
s = Stations()
s = s.nearby(
    lat=city_info["lat"],
    lon=city_info["lng"],
    radius=100000  # in m?
)
stations = s.fetch(10)
stations   # pandas DataFrame

Unnamed: 0_level_0,name,country,region,wmo,icao,latitude,longitude,elevation,timezone,hourly_start,hourly_end,daily_start,daily_end,monthly_start,monthly_end,distance
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
11520,Praha-Libus,CZ,PR,11520.0,,50.0167,14.45,304.0,Europe/Prague,NaT,NaT,1973-01-01,2024-06-12,1972-01-01,2021-01-01,8133.107961
11567,Praha Kbely,CZ,PR,11567.0,,50.1167,14.5333,286.0,Europe/Prague,2022-04-19,2022-04-23,2022-04-20,2022-04-22,NaT,NaT,8616.206889
11518,Praha / Ruzyne,CZ,ST,11518.0,LKPR,50.1,14.25,365.0,Europe/Prague,1929-08-02,2024-06-21,1940-01-01,2024-06-12,1940-01-01,2022-01-01,12305.608643
LKVO0,Vodochody,CZ,ST,,LKVO,50.2166,14.3958,280.0,Europe/Prague,2020-01-14,2024-06-20,NaT,NaT,NaT,NaT,14470.675695
11509,Doksany,CZ,US,11509.0,,50.4667,14.1667,158.0,Europe/Prague,NaT,NaT,NaT,NaT,NaT,NaT,45885.523266
11464,Milesovka,CZ,UK,11464.0,,50.55,13.9333,836.0,Europe/Prague,NaT,NaT,1906-05-01,2024-06-12,1906-01-01,2021-01-01,62013.961753
11624,Caslav,CZ,ST,11624.0,LKCV,49.9333,15.3833,242.0,Europe/Prague,1997-01-01,2024-06-21,1997-03-09,2022-04-25,2005-01-01,2022-01-01,70842.378076
11502,Usti Nad Labem,CZ,US,11502.0,,50.6833,14.0333,377.0,Europe/Prague,NaT,NaT,NaT,NaT,NaT,NaT,71736.776174
11628,Kramolin-Kosetice,CZ,VY,11628.0,,49.5833,15.0833,534.0,Europe/Prague,NaT,NaT,NaT,NaT,NaT,NaT,73461.700845
11487,Kocelovice,CZ,JC,11487.0,,49.4667,13.8333,519.0,Europe/Prague,2018-01-27,2024-06-21,1977-06-18,2024-06-12,1988-01-01,2022-01-01,80921.509625


In [7]:
# Take the third one (Praha/Ruzyně) for Prague
station_id = stations.index[2]
station_id

'11518'

In [8]:
# Get all accessible data
station = stations.loc[station_id]
source = Hourly(station_id, start=station["hourly_start"], end=datetime.now())
source



<meteostat.interface.hourly.Hourly at 0x22419583750>

In [9]:
# Create a DataFrame from it (we are learning polars, aren't we?)
df = pl.from_pandas(source.fetch().reset_index(), nan_to_null=True)
# Be explicit about the Nones
df = df.fill_nan(None)
df

time,temp,dwpt,rhum,prcp,snow,wdir,wspd,wpgt,pres,tsun,coco
datetime[ns],f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1929-08-02 06:00:00,17.2,,,,,270.0,16.6,,1010.2,,
1929-08-02 18:00:00,17.8,,,,,270.0,16.6,,1012.4,,
1929-08-03 06:00:00,15.0,,,,,20.0,3.6,,1019.6,,
1929-08-04 06:00:00,15.0,,,,,0.0,0.0,,1018.4,,
1929-08-05 06:00:00,17.2,,,,,180.0,3.6,,1012.0,,
…,…,…,…,…,…,…,…,…,…,…,…
2024-06-22 03:00:00,17.0,15.7,92.0,0.3,,240.0,20.4,33.3,1011.1,,18.0
2024-06-22 04:00:00,16.8,15.5,92.0,0.2,,241.0,20.4,33.3,1011.3,,7.0
2024-06-22 05:00:00,16.7,15.2,91.0,1.2,,242.0,20.4,33.3,1011.7,,7.0
2024-06-22 06:00:00,17.0,15.2,89.0,3.2,,246.0,20.4,33.3,1011.9,,7.0


In [10]:
# Save
df.write_parquet(f"../data/{CITY.lower()}-meteostat.parquet")