# Download weather data for Vienna

(note: Eisenstadt does not have enough historical data)

or any other city

In [2]:
from datetime import datetime

import polars as pl
from meteostat import Hourly, Stations

In [5]:
CITY = "Vienna"
COUNTRY = "Austria"

In [6]:
# Load our cities dataset and find the city coordinates
cities = pl.read_parquet("../data/worldcities.parquet")
city_info = cities.filter(city=CITY, country=COUNTRY).row(0, named=True)
city_info


{'city': 'Vienna',
 'city_ascii': 'Vienna',
 'lat': 48.2083,
 'lng': 16.3725,
 'country': 'Austria',
 'iso2': 'AT',
 'iso3': 'AUT',
 'admin_name': 'Wien',
 'capital': 'primary',
 'population': 2223236,
 'id': 1040261752}

In [7]:
# Find nearby weather stations
s = Stations()
s = s.nearby(
    lat=city_info["lat"],
    lon=city_info["lng"],
    radius=100000  # in m?
)
stations = s.fetch(10)
stations   # pandas DataFrame

Unnamed: 0_level_0,name,country,region,wmo,icao,latitude,longitude,elevation,timezone,hourly_start,hourly_end,daily_start,daily_end,monthly_start,monthly_end,distance
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
11034,Wien / City,AT,WI,11034.0,,48.2,16.3667,171.0,Europe/Vienna,NaT,NaT,NaT,NaT,NaT,NaT,1018.102783
11035,Wien / Hohe Warte,AT,WI,11035.0,,48.25,16.3667,203.0,Europe/Vienna,2018-01-27,2025-04-02,1855-02-01,2025-03-25,1855-01-01,2022-01-01,4656.689109
11040,Wien / Kledering,AT,WI,11040.0,,48.1247,16.4192,201.0,Europe/Vienna,NaT,NaT,NaT,NaT,NaT,NaT,9920.133522
11036,Wien / Schwechat,AT,NO,11036.0,LOWW,48.1167,16.5667,183.0,Europe/Vienna,1952-01-01,2025-04-02,1973-01-04,2022-04-25,2003-01-01,2022-01-01,17641.114896
11082,Gumpoldskirchen,AT,NO,11082.0,,48.0333,16.2833,218.0,Europe/Vienna,NaT,NaT,NaT,NaT,NaT,NaT,20554.76013
11030,Tulln,AT,NO,11030.0,LOXT,48.3167,16.1167,175.0,Europe/Vienna,1940-03-11,2025-04-01,1946-05-01,2022-04-22,1946-01-01,2017-01-01,22446.36421
11181,Bad Voeslau Airfield,AT,NO,11181.0,LOAV,47.95,16.25,233.0,Europe/Vienna,2004-05-10,2025-04-01,NaT,NaT,NaT,NaT,30128.912537
11190,Eisenstadt,AT,BGLD,11190.0,,47.85,16.5333,184.0,Europe/Vienna,2018-01-27,2025-04-02,2018-01-28,2022-04-24,2019-01-01,2022-01-01,41596.797263
LOAN0,Wiener / Wr. / Lichtenwörth,AT,NO,,LOAN,47.8414,16.2586,271.0,Europe/Vienna,2004-05-10,2025-04-01,NaT,NaT,NaT,NaT,41667.469115
LOXN0,Wiener Neustadt West / Wr. Neustadt West / The...,AT,NO,,LOXN,47.84,16.2217,285.0,Europe/Vienna,2004-05-10,2025-03-08,NaT,NaT,NaT,NaT,42460.884211


In [10]:
# Take the first one (Wien-Schwechat)
station_id = stations.index[3]
station_id

'11036'

In [15]:
# Get all accessible data
station = stations.loc[station_id]
source = Hourly(station_id, start=station["hourly_start"], end=datetime(2024, 12, 31, 23, 59, 59))
source

<meteostat.interface.hourly.Hourly at 0x1f8bdbcd6d0>

In [16]:
# Create a DataFrame from it (we are learning polars, aren't we?)
df = pl.from_pandas(source.fetch().reset_index(), nan_to_null=True)
# Be explicit about the Nones
df = df.fill_nan(None)
df

time,temp,dwpt,rhum,prcp,snow,wdir,wspd,wpgt,pres,tsun,coco
datetime[ns],f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1952-01-01 06:00:00,2.2,-0.1,85.0,,,120.0,18.4,,1012.5,,
1952-01-01 12:00:00,3.9,1.1,82.0,,,150.0,24.1,,1011.6,,
1952-01-02 00:00:00,3.9,-2.3,64.0,,,290.0,29.5,,1015.3,,
1952-01-02 06:00:00,2.8,-2.7,67.0,,,310.0,16.6,,1016.9,,
1952-01-02 18:00:00,3.9,-2.7,62.0,,,180.0,13.0,,1015.5,,
…,…,…,…,…,…,…,…,…,…,…,…
2024-12-31 19:00:00,-3.8,-4.5,95.0,0.0,,140.0,10.8,20.5,1033.2,0.0,5.0
2024-12-31 20:00:00,-3.9,-4.4,96.0,0.0,,130.0,10.8,18.4,1033.4,0.0,5.0
2024-12-31 21:00:00,-3.8,-4.3,96.0,0.0,,150.0,14.4,22.3,1033.1,0.0,5.0
2024-12-31 22:00:00,-3.5,-4.0,96.0,0.0,,160.0,14.4,22.3,1032.9,0.0,5.0


In [17]:
# Save
df.write_parquet(f"../data/{CITY.lower()}-meteostat.parquet")