# Creating regression graphs

In [1]:
import glob
import json
import pandas as pd
import plotly.express as px

In [53]:
linear_regression_path = 'linear_regression/repeated_metrics.json'
decision_tree_path = 'decision_tree_regressor/repeated_metrics.json'
random_forest_path = 'random_forest_regressor/repeated_metrics.json'
xgboost_path = 'xgboost_regressor/repeated_metrics.json'
linear_regression = pd.DataFrame(json.load(open(linear_regression_path, 'r'))).transpose()
decision_tree = pd.DataFrame(json.load(open(decision_tree_path, 'r'))).transpose()
random_forest = pd.DataFrame(json.load(open(random_forest_path, 'r'))).transpose()
xgboost = pd.DataFrame(json.load(open(xgboost_path, 'r'))).transpose()

validation_rmse = pd.concat([
    linear_regression['Validation RMSE'],
    decision_tree['Validation RMSE'],
    random_forest['Validation RMSE'],
    xgboost['Validation RMSE']
    ], axis=1)

validation_rmse.set_axis([
    'Linear Regression',
    'Decision Trees',
    'Random Forest',
    'XGBoost'
    ], axis=1, inplace=True)
    
validation_rmse

Unnamed: 0,Linear Regression,Decision Trees,Random Forest,XGBoost
0,91.366858,103.456757,98.106487,114.436946
1,96.182859,106.576095,117.324010,86.955726
2,86.427533,105.666016,100.179057,99.600891
3,99.988765,97.023079,121.781103,112.296572
4,102.789633,119.570974,72.940157,85.232870
...,...,...,...,...
995,106.039655,111.779691,106.350940,84.272041
996,98.730098,105.940276,115.338279,95.142276
997,95.719984,120.720522,94.545565,94.145820
998,99.216056,113.635853,84.651644,106.524122


In [76]:
box_plot = px.box(
    validation_rmse,
    labels={'value': 'Validation set RMSE', 'variable': 'Regression model'},
    title='Validation RMSE of regression models trained over 1000 different seeds.',
    orientation='h'
)
box_plot.update_layout(template='plotly_dark')
box_plot.write_image('../../../README-images/regression-box-plot.png', scale=20)
box_plot

In [60]:
linear_regression_path = 'linear_regression/metrics.json'
decision_tree_path = 'decision_tree_regressor/metrics.json'
random_forest_path = 'random_forest_regressor/metrics.json'
xgboost_path = 'xgboost_regressor/metrics.json'
linear_regression = json.load(open(linear_regression_path, 'r'))
decision_tree = json.load(open(decision_tree_path, 'r'))
random_forest = json.load(open(random_forest_path, 'r'))
xgboost = json.load(open(xgboost_path, 'r'))

validation_mean_dict = {
    'Linear Regression': linear_regression['Validation RMSE mean'],
    'Decision Trees': decision_tree['Validation RMSE mean'],
    'Random Forest': random_forest['Validation RMSE mean'],
    'XGBoost': xgboost['Validation RMSE mean']
}

validation_mean_dict

{'Linear Regression': 101.1059363248387,
 'Decision Trees': 106.14070415307567,
 'Random Forest': 101.27212489066564,
 'XGBoost': 101.86348739347163}

In [78]:
bar_chart = px.bar(
    x=validation_mean_dict.values(),
    y=validation_mean_dict.keys(),
    labels={'x': 'Mean validation set RMSE', 'y': 'Regression model'},
    title='Mean valdiation RMSE of regression models trained over 1000 different seeds.',
    color=validation_mean_dict.values(),
    color_continuous_scale='solar_r',
    orientation='h'
)
bar_chart.update_xaxes(range=[94, 107])
bar_chart.update_layout(template='plotly_dark', yaxis={'categoryorder': 'total descending'})
box_plot.write_image('../../../README-images/regression-mean-rmse.png', scale=20)
bar_chart