In [52]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 

In [53]:
# # Data inladen
df = pd.read_csv("final_df.csv")

# # Apply One-Hot Encoding for the 'binary_trainingtype' column
# df = pd.get_dummies(df, columns=['binary_trainingtype'], drop_first=False)

# # Convert boolean columns to 0 and 1
# df[df.columns[df.columns.str.startswith('binary_trainingtype')]] = df[df.columns[df.columns.str.startswith('binary_trainingtype')]].astype(int)

# # Display the updated DataFrame
# print(df.head())

In [54]:
from sklearn.model_selection import train_test_split

# Eerst de data opschudden om bias te voorkomen
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

# Zorg ervoor dat elke ervaring, geslacht en gewichtsklasse in elke set vertegenwoordigd zijn
train_data = pd.DataFrame()
val_data = pd.DataFrame()
test_data = pd.DataFrame()

for ervaring in df['ervaring'].unique():
    for geslacht in df['binary_geslacht'].unique():
        for gewichtsklasse in df['binary_gewichtsklasse'].unique():
            subset = df[(df['ervaring'] == ervaring) & (df['binary_geslacht'] == geslacht) & (df['binary_gewichtsklasse'] == gewichtsklasse)]
            if not subset.empty:
                temp_train, temp_temp = train_test_split(subset, test_size=0.3, random_state=42)
                temp_val, temp_test = train_test_split(temp_temp, test_size=0.3, random_state=42)
                train_data = pd.concat([train_data, temp_train])
                val_data = pd.concat([val_data, temp_val])
                test_data = pd.concat([test_data, temp_test])

# Reset indexen
train_data.reset_index(drop=True, inplace=True)
val_data.reset_index(drop=True, inplace=True)
test_data.reset_index(drop=True, inplace=True)

# Controleren op juiste verdeling
print(f"Trainingsdata: {len(train_data)} rijen")
print(f"Validatiedata: {len(val_data)} rijen")
print(f"Testdata: {len(test_data)} rijen")

Trainingsdata: 3018 rijen
Validatiedata: 905 rijen
Testdata: 391 rijen


In [55]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Separate features (X) and target (y) for training, validation, and test sets
X_train = train_data.drop(columns=['2k tijd'])
y_train = train_data['2k tijd']
X_val = val_data.drop(columns=['2k tijd'])
y_val = val_data['2k tijd']
X_test = test_data.drop(columns=['2k tijd'])
y_test = test_data['2k tijd']

# Standardize the features
scaler = StandardScaler()
scaler.fit(X_train)  # Fit on training data only
X_train_scaled = scaler.transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Create and train the MLPRegressor
mlp = MLPRegressor(hidden_layer_sizes=(128, 64, 32), activation='relu', solver='adam', max_iter=2000, random_state=42)
mlp.fit(X_train_scaled, y_train)

# Validate the model
y_val_pred = mlp.predict(X_val_scaled)
val_mse = mean_squared_error(y_val, y_val_pred)
val_r2 = r2_score(y_val, y_val_pred)

print(f"Validation MSE: {val_mse:.2f}")
print(f"Validation R^2: {val_r2:.2f}")

# Evaluate on test data
y_test_pred = mlp.predict(X_test_scaled)
test_mse = mean_squared_error(y_test, y_test_pred)
test_r2 = r2_score(y_test, y_test_pred)

print(f"Test MSE: {test_mse:.2f}")
print(f"Test R^2: {test_r2:.2f}")

# Get the predicted values on the test set
y_test_pred = mlp.predict(X_test_scaled)

# Create a DataFrame to compare the actual vs predicted values
results = pd.DataFrame({
    'Actual 2k tijd': y_test,
    'Predicted 2k tijd': y_test_pred,
    'Difference': y_test - y_test_pred
})

# Display a few examples
print(results.head(10))  # You can adjust the number of rows displayed


Validation MSE: 101.94
Validation R^2: 0.90
Test MSE: 99.26
Test R^2: 0.90
   Actual 2k tijd  Predicted 2k tijd  Difference
0           427.7         422.825412    4.874588
1           420.5         427.364604   -6.864604
2           418.6         419.036015   -0.436015
3           427.7         426.561294    1.138706
4           427.7         429.832566   -2.132566
5           420.5         426.882781   -6.382781
6           421.9         423.548391   -1.648391
7           449.0         425.418140   23.581860
8           431.6         427.103463    4.496537
9           431.6         425.175518    6.424482
