In [7]:
# Importare le librerie necessarie
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.model_selection import KFold
import joblib
import matplotlib.pyplot as plt
import numpy as np
import os

In [8]:
# Percorsi dei file
path = 'speed_notebook/data'
transformed_csv = 'speed_notebook/data/transformed_carTelemetry.csv'
model_path = 'speed_notebook/data/random_forest_model.pkl'

# Verificare la presenza dei file
if not os.path.exists(transformed_csv):
    raise FileNotFoundError(f"File not found: {transformed_csv}")
else:
    print(f"File {transformed_csv} is in the directory {path}!")
    
if not os.path.exists(model_path):
    raise FileNotFoundError(f"File not found: {model_path}")
else:
        print(f"File {model_path} is in the directory {path}!")
        
print('All necessary files are ready for be used!')


FileNotFoundError: File not found: speed_notebook/data/transformed_carTelemetry.csv

In [None]:
car_telemetry_transformed = pd.read_csv(transformed_csv, header=None)

print('CSV Head: ')
print(car_telemetry_transformed.head())

# Impostare i nomi delle colonne direttamente
car_telemetry_transformed.columns = ['m_speed', 'm_throttle', 'm_steer', 'm_brake', 'm_clutch', 'm_gear', 'm_engineRPM', 'm_drs', 'm_revLightsPercent', 'm_revLightsBitValue', 'm_brakesTemperature', 'm_tyresSurfaceTemperature', 'm_tyresInnerTemperature', 'm_engineTemperature', 'm_tyresPressure', 'm_surfaceType']

print('\nColumns: ', car_telemetry_transformed.columns)

In [None]:
# Convertire tutte le colonne in numeri per evitare errori di tipo
car_telemetry_transformed = car_telemetry_transformed.apply(pd.to_numeric, errors='coerce')

print('CSV Head after conversion: ')
print(car_telemetry_transformed.head())

In [None]:
# Rimuovere tutte le righe con valori NaN
car_telemetry_transformed = car_telemetry_transformed.dropna()

print('CSV Head after removing NaN values: ')
print(car_telemetry_transformed.head())

In [None]:
# Separare le feature dalla variabile target
X = car_telemetry_transformed.drop('m_speed', axis=1)
y = car_telemetry_transformed['m_speed']

print(f'X: {X}')
print(f'y: {y}')

In [None]:
# Caricare il modello addestrato
model = joblib.load(model_path)

# Funzione per effettuare la cross-validazione e calcolare gli errori
def cross_validate_model(X, y, model, n_splits=5):
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    mse_scores = []
    r2_scores = []
    mae_scores = []

    for train_index, test_index in kf.split(X):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        mse = mean_squared_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)
        mae = mean_absolute_error(y_test, y_pred)

        mse_scores.append(mse)
        r2_scores.append(r2)
        mae_scores.append(mae)

        print(f"Fold {len(mse_scores)} - Mean Squared Error: {mse}")
        print(f"Fold {len(mse_scores)} - R-squared: {r2}")
        print(f"Fold {len(mse_scores)} - Mean Absolute Error: {mae}")

        if len(mse_scores) == 3:
            print("\n\tDetailed analysis for fold 3:")
            print("\t\tActual vs. Predicted values sample:")
            for actual, pred in zip(y_test[:10], y_pred[:10]):
                print(f"\t\t\tActual: {actual}, Predicted: {pred}")

            outliers = y_test[(y_test - y_pred).abs() > 2 * mae]
            print("\n\t\tOutliers:")
            for outlier in outliers:
                print(f'\t\t\t{outlier}')
        
        print('---------------------------------------------------')
            
    print(f"Mean R-squared score: {np.mean(r2_scores)}")

    return mse_scores, r2_scores, mae_scores

# Eseguire la cross-validazione sul modello Random Forest
mse_scores, r2_scores, mae_scores = cross_validate_model(X, y, model)

In [None]:
# Funzione per plottare l'importanza delle feature
def plot_feature_importance(model, X):
    importances = model.feature_importances_
    indices = np.argsort(importances)[::-1]

    plt.figure(figsize=(12, 6))
    plt.title("Feature Importance")
    plt.bar(range(X.shape[1]), importances[indices], align="center")
    plt.xticks(range(X.shape[1]), X.columns[indices], rotation=90)
    plt.tight_layout()
    plt.show()

plot_feature_importance(model, X)

In [None]:
# Distribuzione dei valori reali vs. predetti
def plot_actual_vs_predicted(y, y_pred):
    plt.figure(figsize=(12, 6))

    plt.subplot(1, 2, 1)
    plt.hist(y, bins=30, alpha=0.5, label='Actual')
    plt.hist(y_pred, bins=30, alpha=0.5, label='Predicted')
    plt.legend(loc='upper right')
    plt.title('Distribution of Actual vs. Predicted Values')

    plt.subplot(1, 2, 2)
    plt.scatter(y_pred, y - y_pred)
    plt.axhline(y=0, color='r', linestyle='-')
    plt.title('Residuals vs. Predicted Values')
    plt.xlabel('Predicted Values')
    plt.ylabel('Residuals')

    plt.tight_layout()
    plt.show()

# Esempio di predizioni per l'uso nei grafici
y_pred = model.predict(X)
plot_actual_vs_predicted(y, y_pred)