In [44]:

import pandas as pd
from statsmodels.tsa.statespace.sarimax import SARIMAX
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.io as pio



# Dateipfade für die CSV-Dateien
file_path_sun_data = '../data/meteo/meteo_swiss_data.csv'
file_path_stn_data = '../data/meteo/meteo_swiss_stn_data.csv'

# Laden der Daten
sun_data = pd.read_csv(file_path_sun_data, delimiter=';')
stn_data = pd.read_csv(file_path_stn_data, delimiter=';')


# Entfernen von Einträgen für Liechtenstein
stn_data = stn_data[stn_data['canton'] != 'LI']

# Verknüpfen der Wetter- und Stationsdaten
data = pd.merge(sun_data, stn_data, on='stn', how='inner')

# Konvertieren der Zeitspalte in Datumsformat
data['time'] = pd.to_datetime(data['time'], format='%Y%m')
data['year'] = data['time'].dt.year
data['month'] = data['time'].dt.to_period('M').astype(str)

# Ersetzen von '-' durch '0' und Konvertieren in Float
data['sunhours'] = data['su2000m0'].replace('-', '0').astype(float)

# Entfernen ungültiger Werte
df = data[(data['sunhours'] > 0) & (data['year'] != 2025)]

monthly_data = data.groupby(data['time'].dt.to_period('M'))['sunhours'].mean().reset_index()
monthly_data['time'] = monthly_data['time'].astype(str)
monthly_data['time'] = pd.to_datetime(monthly_data['time'], format='%Y-%m')

df = monthly_data.copy()

# Sicherstellen, dass die Zeitreihe lückenlos ist
full_date_range = pd.date_range(start=df['time'].min(), end=df['time'].max(), freq='MS')
df = df.set_index('time').reindex(full_date_range).rename_axis('time').reset_index()
df['sunhours'] = df['sunhours'].fillna(0)

# Modell trainieren
model = SARIMAX(df['sunhours'], order=(1,1,1), seasonal_order=(1,1,1,12), enforce_invertibility=False)
results = model.fit()

# Prognose für 72 Monate (6 Jahre)
forecast = results.get_forecast(steps=72)
forecast_mean = forecast.predicted_mean

# Improved forecast visualization (IBSC optimized)
forecast_ci = forecast.conf_int()

# Create interactive plot with Plotly
fig = go.Figure()

# Add historical data
fig.add_trace(go.Scatter(x=df['time'], y=df['sunhours'], mode='lines', name='Historische Daten', line=dict(color='#1f77b4')))

# Add forecasted data
forecast_index = pd.date_range(start=df['time'].iloc[-1], periods=72, freq='ME')
fig.add_trace(go.Scatter(x=forecast_index, y=forecast_mean, mode='lines', name='Prognose', line=dict(color='#ff7f0e')))

# Add confidence interval as shaded area with single legend label
fig.add_trace(go.Scatter(
    x=list(forecast_index) + list(forecast_index[::-1]),
    y=list(forecast_ci.iloc[:, 1]) + list(forecast_ci.iloc[:, 0][::-1]),
    fill='toself',
    fillcolor='rgba(255, 127, 14, 0.2)',
    line=dict(color='rgba(255,255,255,0)'),
    hoverinfo='skip',
    name='Konfidenzintervall'
))

# Customize layout
fig.update_layout(
    title='Prognose der durchschnittliche Sonnenstunden mit Konfidenzintervall',

    yaxis_title='Durchschnittliche Sonnenstunden',
    template='plotly_white',
    legend=dict(font=dict(size=12)),
    xaxis=dict(
        tickformat='%Y',
        dtick='M12'  # Include every year, including 2030
    )
)

# Display plot
fig.show()

# Speichern des Diagramms als HTML-Datei
fig.write_html('../docs/assets/diagramme/sunhours_forecast.html')