In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import os
import joblib

from joblib import Parallel, delayed, dump, load
from scipy.stats import multivariate_normal


In [None]:
#Input Parameters
randomState= 21
nComponents = 16 #wurde durch K-fold als bestes bestimmt
nInit = 20 #wurde als bestes bestimmt
nIter = 302 #wurde als bestes bestimmt
nJobs= -1
selected_Station = ['Hersfeld, Bad'] #None wenn letztes Jahr

storagePath= "/content/drive/MyDrive/26-2BJXXXX_model_weather_station Data/rowWiseModelFinal/"


In [None]:
#Laden der Rowwise Daten da bessser geeignet
df = pd.read_pickle("/content/drive/MyDrive/26-2BJXXXX_model_weather_station Data/rowwiseDf.pkl")

In [None]:
os.makedirs(storagePath, exist_ok=True)

In [None]:
def run_gmm_for_station(station, df, n_comp, n_init, max_iter, random_state, output_dir):
    if station is None:
        # Split nach Datum: letzte 10% als Test
        maxDate = df['MESS_DATUM'].max()
        cutoffDate = maxDate - pd.DateOffset(years=1)
        df_test = df[df['MESS_DATUM'] >= cutoffDate]
        df_train = df[df['MESS_DATUM'] < cutoffDate]
        station_name = "None"
    else:
        # Split nach Station
        df_test = df[df['Stationsname'] == station]
        df_train = df[df['Stationsname'] != station]
        station_name = station

    # Drop unnötige Spalten
    drop_cols = ['MESS_DATUM', 'STATIONS_ID', 'RS_IND', 'WRTR', '   P', '   D', 'Stationsname', 'hour', 'day', 'month']
    df_train = df_train.drop(columns=drop_cols, errors='ignore')
    df_test = df_test.drop(columns=drop_cols, errors='ignore')

    # Feature-Matrix
    X_train = df_train.values
    X_test = df_test.values
    print(df_train.columns)
    # Scaler
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # GMM trainieren
    gmm = GaussianMixture(n_components=n_comp, n_init=n_init, max_iter=max_iter, random_state=random_state)
    gmm.fit(X_train_scaled)

    # Dateien speichern
    os.makedirs(output_dir, exist_ok=True)
    dump(scaler, os.path.join(output_dir, f'scaler_{station_name}.joblib'))
    dump(gmm, os.path.join(output_dir, f'gmm_{station_name}.joblib'))

    # BIC berechnen
    bic_value = gmm.bic(X_test_scaled)

    return {'station': station_name, 'bic': bic_value}

In [None]:
# Parallel ausführen
results = Parallel(n_jobs=-1)(
    delayed(run_gmm_for_station)(
        station, df, nComponents, nInit, nIter, randomState, storagePath
    ) for station in selected_Station
)


In [None]:
# Ergebnisse als CSV speichern
results_df = pd.DataFrame(results)
results_df.to_csv(storagePath + 'bic_results_Stations.csv', index=False)
