In [1]:
import polars as pl 
import requests

In [2]:
## read orig csv file, hardcode to this file
df = pl.read_csv("../data/historical_Data/PV-electricity_2024_01_01.csv", skip_lines=3, has_header=False, separator=";", new_columns=["date_time", "pv_electricity(kW)"], decimal_comma=True)
## convert date_time to datetime, other to numeric
df = df.with_columns(
    pl.col("date_time").str.to_datetime("%d.%m.%Y %H:%M")
)
df.head()

date_time,pv_electricity(kW)
datetime[μs],f64
2024-01-01 00:00:00,0.0
2024-01-01 00:15:00,0.0
2024-01-01 00:30:00,0.0
2024-01-01 00:45:00,0.0
2024-01-01 01:00:00,0.0


In [3]:
## downsample to hourly data
df = df.sort("date_time")
df = df.group_by_dynamic("date_time", every="1h").agg(pl.col("pv_electricity(kW)").mean())

In [4]:
## get sun position
from suncalc import get_position

lat= 53.5511
lon= 9.9937
date_time = df["date_time"].to_list()[0]
sun_pos = get_position(date_time, lng=lon, lat=lat)
print(sun_pos)

{'azimuth': np.float64(2.9575346136447305), 'altitude': np.float64(-1.0336211802887731)}


In [5]:
## calculate sun position for every timestamp
df = df.with_columns(
    pl.col("date_time").map_elements(lambda x: get_position(x, lng=lon, lat=lat)).alias("sun_position")
)


  df = df.with_columns(


In [6]:

df = df.with_columns(
    pl.col("sun_position").struct.unnest()
)
df = df.drop(["sun_position"])
df.head()

date_time,pv_electricity(kW),azimuth,altitude
datetime[μs],f64,f64,f64
2024-01-01 00:00:00,0.0,2.957535,-1.033621
2024-01-01 01:00:00,0.0,-2.855597,-1.025577
2024-01-01 02:00:00,0.0,-2.436587,-0.951218
2024-01-01 03:00:00,0.0,-2.101757,-0.83198
2024-01-01 04:00:00,0.0,-1.833674,-0.688703


In [7]:
df = df.with_columns(
    pl.col("date_time").dt.week().alias("week"),
    pl.col("date_time").dt.hour().alias("hour")
)
df

date_time,pv_electricity(kW),azimuth,altitude,week,hour
datetime[μs],f64,f64,f64,i8,i8
2024-01-01 00:00:00,0.0,2.957535,-1.033621,1,0
2024-01-01 01:00:00,0.0,-2.855597,-1.025577,1,1
2024-01-01 02:00:00,0.0,-2.436587,-0.951218,1,2
2024-01-01 03:00:00,0.0,-2.101757,-0.83198,1,3
2024-01-01 04:00:00,0.0,-1.833674,-0.688703,1,4
…,…,…,…,…,…
2025-07-07 20:00:00,3.3075,1.927138,0.227632,28,20
2025-07-07 21:00:00,0.586012,2.127166,0.088165,28,21
2025-07-07 22:00:00,0.0,2.335557,-0.034575,28,22
2025-07-07 23:00:00,0.0,2.556617,-0.134235,28,23


In [None]:
## get weather data from open-meteo
def get_houly_weather(lat: float, lon: float, start_date, end_date, variables: list, timezone="Europe/Berlin"):
    url = "https://archive-api.open-meteo.com/v1/archive"
    params = {
        "latitude": lat,
        "longitude": lon,
        "hourly": ",".join(variables),
        "start_date": start_date.strftime('%Y-%m-%d'),
        "end_date": end_date.strftime('%Y-%m-%d'),
        "timezone": timezone
    }
    response = requests.get(url, params=params)
    data = response.json()
    weather = dict()
    for var in data["hourly"].keys():
        weather[var] = data["hourly"][var]
    return weather

In [9]:
start_date = df["date_time"].to_list()[0]
end_date = df["date_time"].to_list()[-1]
variables = ["direct_radiation", "cloud_cover"]

weather = get_houly_weather(lat, lon, start_date, end_date, variables)

In [10]:
weather_df = pl.from_dict(weather)
weather_df = weather_df.with_columns(pl.col("time").str.to_datetime())
weather_df

time,direct_radiation,cloud_cover
datetime[μs],f64,i64
2024-01-01 00:00:00,0.0,88
2024-01-01 01:00:00,0.0,88
2024-01-01 02:00:00,0.0,98
2024-01-01 03:00:00,0.0,96
2024-01-01 04:00:00,0.0,100
…,…,…
2025-07-08 19:00:00,,
2025-07-08 20:00:00,,
2025-07-08 21:00:00,,
2025-07-08 22:00:00,,


In [11]:
df = df.join(weather_df, left_on="date_time", right_on="time")


In [12]:
df.head()

date_time,pv_electricity(kW),azimuth,altitude,week,hour,direct_radiation,cloud_cover
datetime[μs],f64,f64,f64,i8,i8,f64,i64
2024-01-01 00:00:00,0.0,2.957535,-1.033621,1,0,0.0,88
2024-01-01 01:00:00,0.0,-2.855597,-1.025577,1,1,0.0,88
2024-01-01 02:00:00,0.0,-2.436587,-0.951218,1,2,0.0,98
2024-01-01 03:00:00,0.0,-2.101757,-0.83198,1,3,0.0,96
2024-01-01 04:00:00,0.0,-1.833674,-0.688703,1,4,0.0,100


In [13]:
import plotly.express as px

px.scatter(
    df,
    x="direct_radiation",
    y="pv_electricity(kW)",
)

In [14]:
px.scatter(
    df,
    x="week",
    y="pv_electricity(kW)",
)

In [16]:
px.scatter(
    df,
    x="azimuth",
    y="pv_electricity(kW)",
)