In [23]:
import polars as pl

battery = "data/huduma_centre_battery_voltage_01May24-16May24.csv"
iqair = "data/IQAir_raw_twrgmdgz53t_15May23-16May24_hourly.csv"

df_battery = pl.read_csv(source=battery, try_parse_dates=True)
df_battery = df_battery.with_columns(pl.col("Date").str.strptime(format="%d.%m.%Y %H:%M:%S", dtype=pl.Datetime))
df_battery.head()

Date,Voltage
datetime[μs],f64
2024-05-01 00:00:00,11.61
2024-05-01 00:10:00,11.43
2024-05-01 00:20:00,11.53
2024-05-01 00:30:00,11.53
2024-05-01 00:40:00,11.53


In [33]:
df_battery = df_battery.sort("Date")
df_battery_1h = df_battery.group_by_dynamic("Date", every="1h").agg(pl.col("Voltage").mean().alias("mean_voltage"))
df_battery_1h.head()

Date,mean_voltage
datetime[μs],f64
2024-04-30 09:00:00,12.092
2024-04-30 10:00:00,12.21
2024-04-30 11:00:00,12.403333
2024-04-30 12:00:00,12.473333
2024-04-30 13:00:00,12.508333


In [37]:
df_iqair = pl.read_csv(source=iqair, try_parse_dates=True)

# shift timestamps to UTC
df_iqair = df_iqair.with_columns(pl.col("Datetime_start") - pl.duration(hours=2).alias("Date"),
                                 (pl.col("Datetime_end") - pl.duration(hours=2)),
                                 )
df_iqair = df_iqair.drop(["Timezone", "CO (ppm)", "TVOC (index)", "NOx (index)", "O3 (ug/m3)", "HCHO (ppb)", "TVOC (ppb)"])
df_iqair.head()
# df_iqair.schema

Source,Datetime_start,Datetime_end,AQI US,AQI CN,PM2.5 (ug/m3),PM10 (ug/m3),PM1 (ug/m3),CO2 (ppm),Temperature (Celsius),Temperature (Fahrenheit),Humidity (%),Outdoor PM2.5 (ug/m3),Pressure (pascal),Particle Count,Fan Speed Level,slot1.pm25,slot1.pm1,slot1.pm10,slot2.pm25,slot2.pm1,slot2.pm10,slot3.co2,slot4.pm25,slot4.pm1,slot4.pm10
str,datetime[μs],datetime[μs],i64,i64,i64,i64,i64,i64,i64,f64,i64,str,i64,str,str,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64
"""twrgmdgz53t""",2023-07-11 16:00:00,2023-07-11 16:59:59,55,16,11,18,8,849,25,77.0,63,,96111,,,14,11,22,11,8,17,849,10,8,17
"""twrgmdgz53t""",2023-07-11 17:00:00,2023-07-11 17:59:59,56,17,12,19,8,901,26,78.8,61,,95968,,,14,11,22,11,8,17,901,12,8,20
"""twrgmdgz53t""",2023-07-11 18:00:00,2023-07-11 18:59:59,53,14,10,17,8,923,26,78.8,61,,95874,,,13,10,20,10,7,16,923,10,8,16
"""twrgmdgz53t""",2023-07-11 19:00:00,2023-07-11 19:59:59,50,13,9,15,7,899,26,78.8,61,,96153,,,12,9,19,9,6,14,899,9,6,14
"""twrgmdgz53t""",2023-07-11 20:00:00,2023-07-11 20:59:59,28,7,5,8,3,684,26,78.8,60,,96718,,,7,5,11,5,3,8,684,5,3,8


In [41]:
df = pl.concat([df_battery_1h, df_iqair], how="diagonal")
df.plot.scatter(x="Date", y=["mean_voltage", "Temperature (Celcius)"])

ModuleUpgradeRequired: hvplot>=0.9.1 is required for `.plot`