In [None]:
# Random Forest Hyperparametertuning mittels RandomizedSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
import numpy as np
from Load_Data_for_Modelling import Get_data
from Splitting_Scaling_Function import Split_Scaling
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
import os
import json
import joblib

data = Get_data(0,1800,0)
X_train, X_test, Y_train, Y_test = Split_Scaling(data, size=0.2, Train_Test_Split=2, Datengröße=1800, random=42)

# Anzahl an Bäume
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
# Anzahl an Features bei jedem Split
max_features = ['log2', 'sqrt', 10, 0.5]
# Maximale tiefe der Bäume
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
# Minimum von Samples einen Baum zu splitten
min_samples_split = [2, 5, 10]
# Minimum um ein Blattknoten zu splitten
min_samples_leaf = [1, 2, 3, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]
# Erzeuge das Random grid mit den festgelegten Hyperparametern
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}

print(random_grid)

# Lege den RFR fest
rf_op=RandomForestRegressor()

# Random search of parameters, 3 Folds pro Suche
# Suche 50 Kombinationen und validiere jede 3 mal 
rf_random = RandomizedSearchCV(estimator = rf_op, param_distributions = random_grid, n_iter = 50, cv = 3, verbose=2, random_state=42, n_jobs = -1)

# Modell fitten um Suche auszuführen
rf_random.fit(X_train, Y_train)

# Printe die besten Hyperparameter
print("Beste Parameter:", rf_random.best_params_)

# Speichern der besten Parameter
best_params = rf_random.best_params_
Pfad = r'C:\Users\corvi\OneDrive - stud.tu-darmstadt.de\Desktop\Masterthesis\14_Modelle_Hyperparameter\Konventionell'
hyperparameters_pfad = os.path.join(Pfad, 'best_hyperparameters_RandomForest.json')
with open(hyperparameters_pfad, 'w') as json_file:
    json.dump(best_params, json_file)

# Speichern des besten Modells
model_pfad = os.path.join(Pfad, 'best_random_forest_model.pkl')
joblib.dump(rf_random.best_estimator_, model_pfad)

In [6]:
# Validieren des besten Modells
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

# Beste Parameter und bestes Modell anzeigen
print("Beste Parameter:", rf_random.best_params_)

# Predicte die Variablen 
Y_pred_best_train = rf_random.predict(X_train)
Y_pred_best_test = rf_random.predict(X_test)

# MAEs berechnen für die predicteten Values mit den besten Hyperparameter
MAE_best_train = mean_absolute_error(Y_train, Y_pred_best_train, multioutput='raw_values')
MAE_best_test = mean_absolute_error(Y_test, Y_pred_best_test, multioutput='raw_values')

# R2 Score
r2__best_train = r2_score(Y_train, Y_pred_best_train, multioutput='raw_values')
r2_best_test = r2_score(Y_test, Y_pred_best_test, multioutput='raw_values')

#Print des MAEs
print(f'MAE für die Trainingsdaten des best fits: {MAE_best_train}')
print(f'MAE für die Testdaten des best fits: {MAE_best_test}')

#Print des R2
print(f'R2-Score für die Trainingsdaten des best fits: {r2__best_train}')
print(f'R2-Score für die Testdaten des best fits: {r2_best_test}')


Beste Parameter: {'n_estimators': 800, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'max_depth': 20, 'bootstrap': False}
MAE für die Trainingsdaten des best fits: [0. 0. 0.]
MAE für die Testdaten des best fits: [0.35       1.25611607 1.5771875 ]
R2-Score für die Trainingsdaten des best fits: [1. 1. 1.]
R2-Score für die Testdaten des best fits: [0.32324344 0.57946661 0.39636909]


In [7]:
# Kneighbor Hyperparametertuning mittels RandomizedSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
import numpy as np
from Load_Data_for_Modelling import Get_data
from Splitting_Scaling_Function import Split_Scaling
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
import os
import json
import joblib
from sklearn.metrics import make_scorer, mean_absolute_error

# Reinladen der Daten
data = Get_data(0,1800)
X_train, X_test, Y_train, Y_test = Split_Scaling(data, size=0.2, Train_Test_Split=2, Datengröße=1800, random=42, Validation_Data=0)

# Definition des Hyperparameterraums
random_grid = {'n_neighbors': [3, 5, 7,9,11,13,15],
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'metric' :['minkowski','euclidean','manhattan']
    }

#Initialisere KNR
KNR = KNeighborsRegressor()

# Definieren Sie die Scoring-Funktion
mae_scorer = make_scorer(mean_absolute_error, greater_is_better=False)

# Durchführung der Hyperparametersuche
KNR_opt = RandomizedSearchCV(estimator=KNR, param_distributions= random_grid, n_iter=50, cv=3, scoring=mae_scorer, verbose=2, n_jobs=-1)
KNR_opt.fit(X_train, Y_train)

# Beste Parameter und bestes Modell anzeigen
print("Beste Parameter:", KNR_opt.best_params_)

# Speichern der besten Parameter
best_params = KNR_opt.best_params_
Pfad = r'C:\Users\corvi\OneDrive - stud.tu-darmstadt.de\Desktop\Masterthesis\14_Modelle_Hyperparameter\Konventionell'
hyperparameters_pfad = os.path.join(Pfad, 'best_hyperparameters_KNR_30Trials.json')
with open(hyperparameters_pfad, 'w') as json_file:
    json.dump(best_params, json_file)

# Speichern des besten Modells
model_pfad = os.path.join(Pfad, 'best_KNR_model.pkl')
joblib.dump(KNR_opt.best_params_, model_pfad)

Anzahl der Zeilen im finalen DataFrame: 255600
None
[41, 19, 83, 75, 70, 48, 39, 152, 35, 121, 21, 20, 36, 68]
25200
25200
<class 'sklearn.preprocessing._data.StandardScaler'>
Fitting 3 folds for each of 30 candidates, totalling 90 fits
Beste Parameter: {'weights': 'uniform', 'n_neighbors': 3, 'metric': 'manhattan', 'algorithm': 'kd_tree'}


In [6]:
# Validieren des besten Modells des KNRs
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

# Predicte die Values
Y_pred_best_train = KNR_opt.predict(X_train)
Y_pred_best_test = KNR_opt.predict(X_test)

# MAE der Vorhersagen
MAE_best_train = mean_absolute_error(Y_train, Y_pred_best_train, multioutput='raw_values')
MAE_best_test = mean_absolute_error(Y_test, Y_pred_best_test, multioutput='raw_values')

# R2 Score
r2__best_train = r2_score(Y_train, Y_pred_best_train, multioutput='raw_values')
r2_best_test = r2_score(Y_test, Y_pred_best_test, multioutput='raw_values')

#Printe MAEs
print(f'MAE für die Trainingsdaten des best fits: {MAE_best_train}')
print(f'MAE für die Testdaten des best fits: {MAE_best_test}')

# Printe R2 Score
print(f'R2-Score für die Trainingsdaten des best fits: {r2__best_train}')
print(f'R2-Score für die Testdaten des best fits: {r2_best_test}')

MAE für die Trainingsdaten des best fits: [0. 0. 0.]
MAE für die Testdaten des best fits: [0.28571429 0.66160549 1.85424797]
R2-Score für die Trainingsdaten des best fits: [1. 1. 1.]
R2-Score für die Testdaten des best fits: [ 0.54098361  0.88411528 -0.25882954]


In [None]:
# Decision Tree Regressor Hyperparametersuche
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from Load_Data_for_Modelling import Get_data
from Splitting_Scaling_Function import Split_Scaling

#Reinladen der Daten
data = Get_data(0,1800,0)
X_train, X_test, Y_train, Y_test = Split_Scaling(data, size=0.2, Train_Test_Split=2, Datengröße=1800, random=42)

# Definition des Modells
model = DecisionTreeRegressor()

# Definition des Parameterraums
param_grid = {
    'criterion': ['absolute_error', 'friedman_mse', 'squared_error'],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 10, 20],
    'min_samples_leaf': [1, 5, 10]
}

# Durchführung der Rastersuche mit MAE als Bewertungsmetrik
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_mean_absolute_error')
grid_search.fit(X_train, Y_train)

# Beste Parameter und bestes Modell anzeigen
print("Beste Parameter:", grid_search.best_params_)

Y_pred_best_train = KNR_opt.predict(X_train)
Y_pred_best_test = KNR_opt.predict(X_test)

MAE_best_train = mean_absolute_error(Y_train, Y_pred_best_train, multioutput='raw_values')
MAE_best_test = mean_absolute_error(Y_test, Y_pred_best_test, multioutput='raw_values')

r2__best_train = r2_score(Y_train, Y_pred_best_train, multioutput='raw_values')
r2_best_test = r2_score(Y_test, Y_pred_best_test, multioutput='raw_values')

print(f'MAE für die Trainingsdaten des best fits: {MAE_best_train}')
print(f'MAE für die Testdaten des best fits: {MAE_best_test}')

print(f'R2-Score für die Trainingsdaten des best fits: {r2__best_train}')
print(f'R2-Score für die Testdaten des best fits: {r2_best_test}')

In [None]:
#SVR Hyperparametertuning mittels RandomizedSearchCV

from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import GridSearchCV
import numpy as np
from Load_Data_for_Modelling import Get_data
from Splitting_Scaling_Function import Split_Scaling

#Reinladen der Daten 
data = Get_data(0,1800)
X_train, X_test, Y_train, Y_test = Split_Scaling(data, size=0.2, Train_Test_Split=2, Datengröße=1800, random=42)

# Cross validation grid search (beste Parameters) 
c_range = np.logspace(-0, 4, 8)
gamma_range = np.logspace(-4, 0, 8)
epsilon = [0.1,0.2,0.3]
kernel= ['rbf','linear','sigmoid']

# Erzeuge das Random Grid
random_grid = {'C': c_range,
               'gamma': gamma_range,
               'kernel': kernel,
               'epsilon': epsilon
               }

print(random_grid)
print(Y_train['Y_Opt-Y_ist'])

# Initalisiere Modelle
svr = SVR()

# SVR muss als MultiOutputRegressor definiert werden
SVR_op = MultiOutputRegressor(svr)

# Random search of parameters, using 3 fold cross validation, 
# search across 100 different combinations, and use all available cores
SVR_random = RandomizedSearchCV(svr , param_distributions = random_grid, n_iter = 50, cv = 3, verbose=2, random_state=42)

# Fit the random search model
SVR_random.fit(X_train, Y_train)

# Validiere das Modell mit den Testdaten und dem entsprechenden MAE und R2 Score
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
print(SVR_random.best_params_)

Y_pred_best_train_SVR = SVR_random.predict(X_train)
Y_pred_best_test_SVR = SVR_random.predict(X_test)

MAE_best_train = mean_absolute_error(Y_train['X_opt-X-Ist'], Y_pred_best_train_SVR, multioutput='raw_values')
MAE_best_test = mean_absolute_error(Y_test['X_opt-X-Ist'], Y_pred_best_test_SVR, multioutput='raw_values')

r2__best_train = r2_score(Y_train['Y_Opt-Y_ist'], Y_pred_best_train_SVR, multioutput='raw_values')
r2_best_test = r2_score(Y_test['Y_Opt-Y_ist'], Y_pred_best_test_SVR, multioutput='raw_values')

print(f'MAE für die Trainingsdaten des best fits: {MAE_best_train}')
print(f'MAE für die Testdaten des best fits: {MAE_best_test}')

print(f'R2-Score für die Trainingsdaten des best fits: {r2__best_train}')
print(f'R2-Score für die Testdaten des best fits: {r2_best_test}')