In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

# Convenience function to create display a progress bar.
# Source : https://stackoverflow.com/questions/3173320/text-progress-bar-in-the-console
def print_progress_bar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█', printEnd = "\r"):
    """
    Call in a loop to create terminal progress bar
    @params:
        iteration   - Required  : current iteration (Int)
        total       - Required  : total iterations (Int)
        prefix      - Optional  : prefix string (Str)
        suffix      - Optional  : suffix string (Str)
        decimals    - Optional  : positive number of decimals in percent complete (Int)
        length      - Optional  : character length of bar (Int)
        fill        - Optional  : bar fill character (Str)
        printEnd    - Optional  : end character (e.g. "\r", "\r\n") (Str)
    """
    percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
    filledLength = int(length * iteration // total)
    bar = fill * filledLength + '-' * (length - filledLength)
    print(f'\r{prefix} |{bar}| {percent}% {suffix}', end = printEnd)
    # Print New Line on Complete
    if iteration == total:
        print()

script_dir = Path.cwd() 
df = pd.read_csv(script_dir.parent / '4 - Dataset' / "regression_weld_data.csv")
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1477 entries, 0 to 1476
Data columns (total 32 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   Carbon concentration (weight%)                1477 non-null   float64
 1   Silicon concentration (weight%)               1477 non-null   float64
 2   Manganese concentration (weight%)             1477 non-null   float64
 3   Sulphur concentration (weight%)               1477 non-null   float64
 4   Phosphorus concentration (weight%)            1477 non-null   float64
 5   Oxygen concentration (%)                      1477 non-null   float64
 6   Nitrogen concentration (%)                    1477 non-null   float64
 7   Current (A)                                   1477 non-null   float64
 8   Voltage (V)                                   1477 non-null   float64
 9   Heat input (kJ/mm)                            1477 non-null   f

In [2]:

df = df.sample(frac=1, random_state=42).reset_index(drop=True)
print(df.info())
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1477 entries, 0 to 1476
Data columns (total 32 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   Carbon concentration (weight%)                1477 non-null   float64
 1   Silicon concentration (weight%)               1477 non-null   float64
 2   Manganese concentration (weight%)             1477 non-null   float64
 3   Sulphur concentration (weight%)               1477 non-null   float64
 4   Phosphorus concentration (weight%)            1477 non-null   float64
 5   Oxygen concentration (%)                      1477 non-null   float64
 6   Nitrogen concentration (%)                    1477 non-null   float64
 7   Current (A)                                   1477 non-null   float64
 8   Voltage (V)                                   1477 non-null   float64
 9   Heat input (kJ/mm)                            1477 non-null   f

Unnamed: 0,Carbon concentration (weight%),Silicon concentration (weight%),Manganese concentration (weight%),Sulphur concentration (weight%),Phosphorus concentration (weight%),Oxygen concentration (%),Nitrogen concentration (%),Current (A),Voltage (V),Heat input (kJ/mm),...,AC or DC,Electrode positive or negative,Type of weld,Yield strength (MPa),Ultimate tensile strength (MPa),Elongation (%),Reduction of Area (%),Charpy temperature (deg C),Charpy impact toughness (J),Hardness (kg/mm2)
0,0.047,0.42,1.0,0.015,0.016,0.0695,0.0165,300.0,28.0,2.08,...,0.0,1.0,2.0,537.0,620.0,11.0,24.0,60.0,164.0,
1,0.068,0.32,1.38,0.004,0.005,0.0376,0.0074,170.0,21.0,1.0,...,0.0,1.0,4.0,,,,,-51.0,100.0,
2,0.084,0.3,0.54,0.005,0.008,0.066,0.0066,170.0,21.0,2.4,...,0.0,1.0,1.0,530.0,653.0,21.5,69.0,,,
3,0.045,0.31,1.39,0.005,0.007,0.0422,0.0083,170.0,21.0,1.0,...,0.0,1.0,0.0,414.0,502.0,31.0,80.6,-50.0,129.0,
4,0.077,0.35,1.02,0.007,0.005,0.0399,0.0075,170.0,21.0,1.0,...,0.0,1.0,0.0,332.0,461.0,33.3,80.3,-80.0,100.0,


## Création X et y

In [3]:
L_features = [
    'Carbon concentration (weight%)', 'Silicon concentration (weight%)',
       'Manganese concentration (weight%)', 'Sulphur concentration (weight%)',
       'Phosphorus concentration (weight%)', 'Nickel concentration (weight%)',
       'Chromium concentration (weight%)',
       'Molybdenum concentration (weight%)',
       'Vanadium concentration (weight%)', 'Copper concentration (weight%)',
       'Oxygen concentration (%)', 'Titanium concentration (%)',
       'Nitrogen concentration (%)', 'Aluminium concentration (%)',
       'Boron concentration (%)', 'Niobium concentration (%)', 'Current (A)',
       'Voltage (V)', 'AC or DC', 'Electrode positive or negative',
       'Heat input (kJ/mm)', 'Interpass temperature (deg C)', 'Type of weld',
       'Post weld heat treatment temperature (deg C)',
       'Post weld heat treatment time (hours)', 'Charpy temperature (deg C)'
]


L_targets = [
    'Yield strength (MPa)', 
       'Ultimate tensile strength (MPa)', 'Elongation (%)',
       'Reduction of Area (%)', 'Charpy impact toughness (J)', 'Hardness (kg/mm2)'
]


X = df.loc[:, L_features]
ys = df.loc[:,L_targets]
print(ys.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1477 entries, 0 to 1476
Data columns (total 6 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Yield strength (MPa)             1116 non-null   float64
 1   Ultimate tensile strength (MPa)  1023 non-null   float64
 2   Elongation (%)                   971 non-null    float64
 3   Reduction of Area (%)            962 non-null    float64
 4   Charpy impact toughness (J)      879 non-null    float64
 5   Hardness (kg/mm2)                118 non-null    float64
dtypes: float64(6)
memory usage: 69.4 KB
None


In [4]:
from sklearn.model_selection import train_test_split

def trainTest(X,y) :
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    return X_train, X_test, y_train, y_test


In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import RandomizedSearchCV, cross_val_predict, KFold
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import BayesianRidge
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso

# Fonction pour calculer le RMSE
def calculate_rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

# Fonction pour calculer le R² ajusté
def calculate_adjusted_r2(r2, n, p):
    return 1 - (1 - r2) * (n - 1) / (n - p - 1)

# Configuration de la validation croisée (5 folds)
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Fonction pour entraîner et évaluer le modèle Bayesian Ridge
def bayesian_ridge_regression(X_train, X_test, y_train, y_test):
    model = BayesianRidge()

    # Validation croisée sur l'ensemble d'entraînement
    y_train_pred_cv = cross_val_predict(model, X_train, y_train, cv=kf)

    # Ajustement du modèle et prédictions sur l'ensemble de test
    model.fit(X_train, y_train)
    y_test_pred = model.predict(X_test)

    # Calcul des scores
    rmse_train = calculate_rmse(y_train, y_train_pred_cv)
    rmse_test = calculate_rmse(y_test, y_test_pred)
    r2_train = r2_score(y_train, y_train_pred_cv)
    r2_test = r2_score(y_test, y_test_pred)

    # Nombre d'observations et de prédicteurs
    n_train, p_train = X_train.shape

    return {
        "Model": "Bayesian Ridge",
        "RMSE_Train": rmse_train,
        "RMSE_Test": rmse_test,
        "R2_Train": r2_train,
        "R2_Test": r2_test,
        "Adjusted_R2_Train": calculate_adjusted_r2(r2_train, n_train, p_train),
        "Adjusted_R2_Test": calculate_adjusted_r2(r2_test, len(y_test), 1),  # 1 car on a 1 seul prédicteur
        "Hyperparameters": model.get_params()
    }

# Fonction pour entraîner et évaluer le modèle SVR avec Random Grid Search
def svr_regression(X_train, X_test, y_train, y_test):
    model = SVR()
    param_grid = {
        'kernel': ['rbf', 'poly'],
        'C': np.logspace(-3, 3, 7),
        'gamma': ['scale', 'auto'],
        'degree': [2, 3, 4]
    }
    random_search = RandomizedSearchCV(model, param_distributions=param_grid, n_iter=4, cv=kf, random_state=42)
    random_search.fit(X_train, y_train)
    best_model = random_search.best_estimator_

    # Validation croisée sur l'ensemble d'entraînement
    y_train_pred_cv = cross_val_predict(best_model, X_train, y_train, cv=kf)

    # Prédictions sur l'ensemble de test
    y_test_pred = best_model.predict(X_test)

    # Calcul des scores
    rmse_train = calculate_rmse(y_train, y_train_pred_cv)
    rmse_test = calculate_rmse(y_test, y_test_pred)
    r2_train = r2_score(y_train, y_train_pred_cv)
    r2_test = r2_score(y_test, y_test_pred)

    # Nombre d'observations et de prédicteurs
    n_train, p_train = X_train.shape

    return {
        "Model": "SVR",
        "RMSE_Train": rmse_train,
        "RMSE_Test": rmse_test,
        "R2_Train": r2_train,
        "R2_Test": r2_test,
        "Adjusted_R2_Train": calculate_adjusted_r2(r2_train, n_train, p_train),
        "Adjusted_R2_Test": calculate_adjusted_r2(r2_test, len(y_test), 1),  # 1 car on a 1 seul prédicteur
        "Hyperparameters": best_model.get_params()
    }

# Fonction pour entraîner et évaluer le modèle de Gradient Boosting avec Random Grid Search
def gradient_boosting_regression(X_train, X_test, y_train, y_test):
    model = GradientBoostingRegressor()
    param_grid = {
        'n_estimators': [100, 200, 300],
        'learning_rate': [0.01, 0.1, 0.05],
        'max_depth': [3, 4, 5],
        'min_samples_split': [2, 5, 10]
    }
    random_search = RandomizedSearchCV(model, param_distributions=param_grid, n_iter=4, cv=kf, random_state=42)
    random_search.fit(X_train, y_train)
    best_model = random_search.best_estimator_

    # Validation croisée sur l'ensemble d'entraînement
    y_train_pred_cv = cross_val_predict(best_model, X_train, y_train, cv=kf)

    # Prédictions sur l'ensemble de test
    y_test_pred = best_model.predict(X_test)

    # Calcul des scores
    rmse_train = calculate_rmse(y_train, y_train_pred_cv)
    rmse_test = calculate_rmse(y_test, y_test_pred)
    r2_train = r2_score(y_train, y_train_pred_cv)
    r2_test = r2_score(y_test, y_test_pred)

    # Nombre d'observations et de prédicteurs
    n_train, p_train = X_train.shape

    return {
        "Model": "Gradient Boosting",
        "RMSE_Train": rmse_train,
        "RMSE_Test": rmse_test,
        "R2_Train": r2_train,
        "R2_Test": r2_test,
        "Adjusted_R2_Train": calculate_adjusted_r2(r2_train, n_train, p_train),
        "Adjusted_R2_Test": calculate_adjusted_r2(r2_test, len(y_test), 1),  # 1 car on a 1 seul prédicteur
        "Hyperparameters": best_model.get_params()
    }

# Fonction pour entraîner et évaluer le modèle de Forêt Aléatoire avec Random Grid Search
def random_forest_regression(X_train, X_test, y_train, y_test):
    model = RandomForestRegressor()
    param_grid = {
        'n_estimators': [100, 200, 300],
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10]
    }
    random_search = RandomizedSearchCV(model, param_distributions=param_grid, n_iter=4, cv=kf, random_state=42)
    random_search.fit(X_train, y_train)
    best_model = random_search.best_estimator_

    # Validation croisée sur l'ensemble d'entraînement
    y_train_pred_cv = cross_val_predict(best_model, X_train, y_train, cv=kf)

    # Prédictions sur l'ensemble de test
    y_test_pred = best_model.predict(X_test)

    # Calcul des scores
    rmse_train = calculate_rmse(y_train, y_train_pred_cv)
    rmse_test = calculate_rmse(y_test, y_test_pred)
    r2_train = r2_score(y_train, y_train_pred_cv)
    r2_test = r2_score(y_test, y_test_pred)

    # Nombre d'observations et de prédicteurs
    n_train, p_train = X_train.shape

    return {
        "Model": "Random Forest",
        "RMSE_Train": rmse_train,
        "RMSE_Test": rmse_test,
        "R2_Train": r2_train,
        "R2_Test": r2_test,
        "Adjusted_R2_Train": calculate_adjusted_r2(r2_train, n_train, p_train),
        "Adjusted_R2_Test": calculate_adjusted_r2(r2_test, len(y_test), 1),  # 1 car on a 1 seul prédicteur
        "Hyperparameters": best_model.get_params()
    }

# Fonction pour entraîner et évaluer la régression linéaire
def linear_regression(X_train, X_test, y_train, y_test):
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    # Validation croisée sur l'ensemble d'entraînement
    y_train_pred_cv = cross_val_predict(model, X_train, y_train, cv=kf)

    # Prédictions sur l'ensemble de test
    y_test_pred = model.predict(X_test)

    # Calcul des scores
    rmse_train = calculate_rmse(y_train, y_train_pred_cv)
    rmse_test = calculate_rmse(y_test, y_test_pred)
    r2_train = r2_score(y_train, y_train_pred_cv)
    r2_test = r2_score(y_test, y_test_pred)

    # Nombre d'observations et de prédicteurs
    n_train, p_train = X_train.shape

    return {
        "Model": "Linear Regression",
        "RMSE_Train": rmse_train,
        "RMSE_Test": rmse_test,
        "R2_Train": r2_train,
        "R2_Test": r2_test,
        "Adjusted_R2_Train": calculate_adjusted_r2(r2_train, n_train, p_train),
        "Adjusted_R2_Test": calculate_adjusted_r2(r2_test, len(y_test), 1),  # 1 car on a 1 seul prédicteur
        "Hyperparameters": model.get_params()
    }

# Fonction pour entraîner et évaluer la régression Ridge avec Random Grid Search
def ridge_regression(X_train, X_test, y_train, y_test):
    model = Ridge()
    param_grid = {
        'alpha': [0.01, 0.1, 1, 10, 100]
    }
    random_search = RandomizedSearchCV(model, param_distributions=param_grid, n_iter=4, cv=kf, random_state=42)
    random_search.fit(X_train, y_train)
    best_model = random_search.best_estimator_

    # Validation croisée sur l'ensemble d'entraînement
    y_train_pred_cv = cross_val_predict(best_model, X_train, y_train, cv=kf)

    # Prédictions sur l'ensemble de test
    y_test_pred = best_model.predict(X_test)

    # Calcul des scores
    rmse_train = calculate_rmse(y_train, y_train_pred_cv)
    rmse_test = calculate_rmse(y_test, y_test_pred)
    r2_train = r2_score(y_train, y_train_pred_cv)
    r2_test = r2_score(y_test, y_test_pred)

    # Nombre d'observations et de prédicteurs
    n_train, p_train = X_train.shape

    return {
        "Model": "Ridge Regression",
        "RMSE_Train": rmse_train,
        "RMSE_Test": rmse_test,
        "R2_Train": r2_train,
        "R2_Test": r2_test,
        "Adjusted_R2_Train": calculate_adjusted_r2(r2_train, n_train, p_train),
        "Adjusted_R2_Test": calculate_adjusted_r2(r2_test, len(y_test), 1),  # 1 car on a 1 seul prédicteur
        "Hyperparameters": best_model.get_params()
    }

# Fonction pour entraîner et évaluer la régression Lasso avec Random Grid Search
def lasso_regression(X_train, X_test, y_train, y_test):
    model = Lasso()
    param_grid = {
        'alpha': [0.01, 0.1, 1, 10, 100]
    }
    random_search = RandomizedSearchCV(model, param_distributions=param_grid, n_iter=4, cv=kf, random_state=42)
    random_search.fit(X_train, y_train)
    best_model = random_search.best_estimator_

    # Validation croisée sur l'ensemble d'entraînement
    y_train_pred_cv = cross_val_predict(best_model, X_train, y_train, cv=kf)

    # Prédictions sur l'ensemble de test
    y_test_pred = best_model.predict(X_test)

    # Calcul des scores
    rmse_train = calculate_rmse(y_train, y_train_pred_cv)
    rmse_test = calculate_rmse(y_test, y_test_pred)
    r2_train = r2_score(y_train, y_train_pred_cv)
    r2_test = r2_score(y_test, y_test_pred)

    # Nombre d'observations et de prédicteurs
    n_train, p_train = X_train.shape

    return {
        "Model": "Lasso Regression",
        "RMSE_Train": rmse_train,
        "RMSE_Test": rmse_test,
        "R2_Train": r2_train,
        "R2_Test": r2_test,
        "Adjusted_R2_Train": calculate_adjusted_r2(r2_train, n_train, p_train),
        "Adjusted_R2_Test": calculate_adjusted_r2(r2_test, len(y_test), 1),  # 1 car on a 1 seul prédicteur
        "Hyperparameters": best_model.get_params()
    }


# Fonction principale qui exécute tous les modèles et retourne les résultats dans un DataFrame
def evaluate_all_models(X_train, X_test, y_train, y_test):
    results = []

    # Appel des fonctions de régression et stockage des résultats
    results.append(bayesian_ridge_regression(X_train, X_test, y_train, y_test))
    results.append(svr_regression(X_train, X_test, y_train, y_test))
    results.append(gradient_boosting_regression(X_train, X_test, y_train, y_test))
    results.append(random_forest_regression(X_train, X_test, y_train, y_test))
    results.append(linear_regression(X_train, X_test, y_train, y_test))
    results.append(ridge_regression(X_train, X_test, y_train, y_test))
    results.append(lasso_regression(X_train, X_test, y_train, y_test))
    # Conversion des résultats en DataFrame
    return pd.DataFrame(results)


def evaluateAllTarget(df, Features, Targets):
    progress = 0
    results_dict = {}  # Dictionnaire pour stocker les DataFrames

    # Display a progress bar
    print_progress_bar(progress, int(len(Targets)), prefix='Progress:', suffix='Complete', length=50)

    for i in range(len(Targets)):
        progress += 1
        print_progress_bar(progress, int(len(Targets)), prefix='Progress:', suffix='Complete', length=50)

        col_name = Targets[i]

        # Filtrer les colonnes selon la condition
        if Targets[i] == 'Charpy impact toughness (J)':
            df_weld = df.loc[:, Features + [Targets[i]]]
        else:
            df_weld = df.loc[:, Features + [Targets[i]]]
            df_weld = df_weld.drop('Charpy temperature (deg C)', axis=1)

        df_weld = df_weld.dropna()  # Supprimer les lignes avec des valeurs manquantes
        print(f"\n\nTaille du dataset avant drop duplicates : {col_name} : {df_weld.shape}")
        
        df_weld = df_weld.drop_duplicates()  # Supprimer les doublons
        X = df_weld.drop(Targets[i], axis=1)  # Features
        y = df_weld.loc[:, Targets[i]]  # Target

        print(f"Taille du dataset avec target {col_name} : {df_weld.shape}")

        # Diviser en ensemble d'entraînement et de test
        X_train, X_test, y_train, y_test = train_test_split(X, y)

        print(f"Résultat pour {col_name} avec pour moyenne {y.mean()}: ")

        # Évaluer tous les modèles et obtenir un DataFrame des résultats
        df_results = evaluate_all_models(X_train, X_test, y_train, y_test)

        # Stocker df_results dans le dictionnaire avec col_name comme clé
        results_dict[col_name] = df_results

        print(df_results)

    return results_dict  # Retourner le dictionnaire de DataFrames

results_dict = evaluateAllTarget(df, L_features, L_targets)

Progress: |████████------------------------------------------| 16.7% Complete

Taille du dataset avant drop duplicates : Yield strength (MPa) : (1116, 26)
Taille du dataset avec target Yield strength (MPa) : (1064, 26)
Résultat pour Yield strength (MPa) avec pour moyenne 500.44605263157894: 
               Model  RMSE_Train  RMSE_Test  R2_Train   R2_Test  \
0     Bayesian Ridge   85.659885  70.750137  0.190890  0.148333   
1                SVR   81.217264  64.174482  0.272641  0.299288   
2  Gradient Boosting   45.985937  41.451613  0.766814  0.707653   
3      Random Forest   48.899404  41.294730  0.736331  0.709862   
4  Linear Regression   78.728987  67.243349  0.316526  0.230668   
5   Ridge Regression   78.948849  66.948422  0.312704  0.237402   
6   Lasso Regression   78.737568  66.967695  0.316378  0.236963   

   Adjusted_R2_Train  Adjusted_R2_Test  \
0           0.164689          0.145107   
1           0.249086          0.296633   
2           0.759263          0.706546   
3 

In [6]:
results_dict['Yield strength (MPa)'].sort_values(by='R2_Test', ascending=False)

Unnamed: 0,Model,RMSE_Train,RMSE_Test,R2_Train,R2_Test,Adjusted_R2_Train,Adjusted_R2_Test,Hyperparameters
3,Random Forest,48.899404,41.29473,0.736331,0.709862,0.727792,0.708763,"{'bootstrap': True, 'ccp_alpha': 0.0, 'criteri..."
2,Gradient Boosting,45.985937,41.451613,0.766814,0.707653,0.759263,0.706546,"{'alpha': 0.9, 'ccp_alpha': 0.0, 'criterion': ..."
1,SVR,81.217264,64.174482,0.272641,0.299288,0.249086,0.296633,"{'C': 10.0, 'cache_size': 200, 'coef0': 0.0, '..."
5,Ridge Regression,78.948849,66.948422,0.312704,0.237402,0.290447,0.234513,"{'alpha': 0.01, 'copy_X': True, 'fit_intercept..."
6,Lasso Regression,78.737568,66.967695,0.316378,0.236963,0.294239,0.234072,"{'alpha': 0.01, 'copy_X': True, 'fit_intercept..."
4,Linear Regression,78.728987,67.243349,0.316526,0.230668,0.294393,0.227754,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
0,Bayesian Ridge,85.659885,70.750137,0.19089,0.148333,0.164689,0.145107,"{'alpha_1': 1e-06, 'alpha_2': 1e-06, 'alpha_in..."


In [7]:
results_dict['Charpy impact toughness (J)'].sort_values(by='R2_Test', ascending=False)

Unnamed: 0,Model,RMSE_Train,RMSE_Test,R2_Train,R2_Test,Adjusted_R2_Train,Adjusted_R2_Test,Hyperparameters
3,Random Forest,25.441196,25.241548,0.736814,0.760525,0.725952,0.759427,"{'bootstrap': True, 'ccp_alpha': 0.0, 'criteri..."
2,Gradient Boosting,25.239356,27.457379,0.740973,0.716635,0.730283,0.715336,"{'alpha': 0.9, 'ccp_alpha': 0.0, 'criterion': ..."
4,Linear Regression,33.542138,37.734147,0.542522,0.464824,0.523642,0.462369,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
6,Lasso Regression,33.829283,37.966905,0.534656,0.458202,0.515451,0.455716,"{'alpha': 0.01, 'copy_X': True, 'fit_intercept..."
5,Ridge Regression,34.176537,38.131062,0.525054,0.453506,0.505453,0.450999,"{'alpha': 0.01, 'copy_X': True, 'fit_intercept..."
1,SVR,37.431188,39.720152,0.430288,0.407008,0.406776,0.404287,"{'C': 10.0, 'cache_size': 200, 'coef0': 0.0, '..."
0,Bayesian Ridge,37.44853,40.530645,0.42976,0.38256,0.406226,0.379728,"{'alpha_1': 1e-06, 'alpha_2': 1e-06, 'alpha_in..."


In [8]:
results_dict['Ultimate tensile strength (MPa)'].sort_values(by='R2_Test', ascending=False)

Unnamed: 0,Model,RMSE_Train,RMSE_Test,R2_Train,R2_Test,Adjusted_R2_Train,Adjusted_R2_Test,Hyperparameters
2,Gradient Boosting,41.707355,41.076623,0.776012,0.794601,0.768035,0.793748,"{'alpha': 0.9, 'ccp_alpha': 0.0, 'criterion': ..."
3,Random Forest,46.084784,47.904556,0.726527,0.720641,0.716787,0.719481,"{'bootstrap': True, 'ccp_alpha': 0.0, 'criteri..."
6,Lasso Regression,67.304517,70.314775,0.416705,0.39813,0.395932,0.395633,"{'alpha': 0.01, 'copy_X': True, 'fit_intercept..."
1,SVR,68.49774,70.40199,0.395839,0.396636,0.374324,0.394132,"{'C': 10.0, 'cache_size': 200, 'coef0': 0.0, '..."
4,Linear Regression,67.343323,70.686444,0.416032,0.39175,0.395235,0.389227,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
5,Ridge Regression,67.570171,70.914381,0.412091,0.387821,0.391154,0.385281,"{'alpha': 0.01, 'copy_X': True, 'fit_intercept..."
0,Bayesian Ridge,75.200008,78.954495,0.271825,0.241137,0.245893,0.237988,"{'alpha_1': 1e-06, 'alpha_2': 1e-06, 'alpha_in..."


In [9]:
results_dict['Charpy impact toughness (J)'].sort_values(by='R2_Test', ascending=False)

Unnamed: 0,Model,RMSE_Train,RMSE_Test,R2_Train,R2_Test,Adjusted_R2_Train,Adjusted_R2_Test,Hyperparameters
3,Random Forest,25.441196,25.241548,0.736814,0.760525,0.725952,0.759427,"{'bootstrap': True, 'ccp_alpha': 0.0, 'criteri..."
2,Gradient Boosting,25.239356,27.457379,0.740973,0.716635,0.730283,0.715336,"{'alpha': 0.9, 'ccp_alpha': 0.0, 'criterion': ..."
4,Linear Regression,33.542138,37.734147,0.542522,0.464824,0.523642,0.462369,"{'copy_X': True, 'fit_intercept': True, 'n_job..."
6,Lasso Regression,33.829283,37.966905,0.534656,0.458202,0.515451,0.455716,"{'alpha': 0.01, 'copy_X': True, 'fit_intercept..."
5,Ridge Regression,34.176537,38.131062,0.525054,0.453506,0.505453,0.450999,"{'alpha': 0.01, 'copy_X': True, 'fit_intercept..."
1,SVR,37.431188,39.720152,0.430288,0.407008,0.406776,0.404287,"{'C': 10.0, 'cache_size': 200, 'coef0': 0.0, '..."
0,Bayesian Ridge,37.44853,40.530645,0.42976,0.38256,0.406226,0.379728,"{'alpha_1': 1e-06, 'alpha_2': 1e-06, 'alpha_in..."
