In [1]:
import numpy as np # array manipulation
import pandas as pd # dataset handling
import matplotlib.pyplot as plt # plotting graphs
import seaborn as sns # plotting graphs
import plotly.express as px # plotting graphs (plotly)
from plotly.subplots import make_subplots # make multiple plots in one figure
import plotly.graph_objects as go # plotting graphs (plotly)
import plotly.figure_factory as ff # plotting graphs (plotly)
%matplotlib inline # show plots inline

In [2]:
results = pd.read_csv("data/results.csv") #read data from csv
results.head() # show first 5 rows

Unnamed: 0.1,Unnamed: 0,y,y_pred,Set
0,332,13429.0354,12715.776403,Train
1,355,24603.04837,6709.725428,Train
2,138,27322.73386,12419.012091,Train
3,381,42303.69215,49745.434234,Train
4,292,42112.2356,22995.307121,Train


In [3]:
results['Residuals'] = results['y'] - results['y_pred'] # calculate residuals (y - y_pred)
results.drop(columns = ['Unnamed: 0'], axis = 1, inplace = True) # drop column 'Unnamed: 0' (index)
results.head() # show first 5 rows

Unnamed: 0,y,y_pred,Set,Residuals
0,13429.0354,12715.776403,Train,713.258997
1,24603.04837,6709.725428,Train,17893.322942
2,27322.73386,12419.012091,Train,14903.721769
3,42303.69215,49745.434234,Train,-7441.742084
4,42112.2356,22995.307121,Train,19116.928479


In [4]:
train = results[ results['Set'] == "Train" ] # select rows where 'Set' is 'Train' and store in variable 'train'
test  = results[ results['Set'] == "Test"  ] # select rows where 'Set' is 'Test' and store in variable 'test'

In [77]:
# subplot setup
fig = make_subplots(rows=3, cols=2, # rows and columns
                    subplot_titles=('Train set','Test set','Train set','Test set','Train set','Test set'), # subplot titles (row 1, col 1, row 2, col 2, row 3, col 3)
                    vertical_spacing=0.10, # vertical spacing between subplots
                    horizontal_spacing=0.10) # horizontal spacing between subplots

In [78]:
# First plot
# add_trace: This method accepts a graph object trace (an instance of go.Scatter, go.Bar, etc.) and adds it to the figure
fig.add_trace(go.Scatter(x = train['y'], # add scatter plot of 'y' values from 'train' dataset 
                         y = train['y_pred'], # same for 'y_pred' values
                         mode='markers', # drawing mode for this scatter trace
                         marker_color='rgb(16, 154, 246)') #define marker color
                        ,row=1, col=1) # subplot 1,1

# Line of perfect fit
fig.add_shape(type="line", # type of shape
              x0=0, # x-coordinate of the shape's starting point
              y0=0, # y-coordinate of the shape's starting point
              x1=70000, # x-coordinate of the shape's ending point
              y1=70000, row=1, col=1) # y-coordinate of the shape's ending point and subplot 1,1

fig.update_xaxes(title_text="Real", row=1, col=1) # update x-axis title for subplot 1,1
fig.update_yaxes(title_text="Predicted", row=1, col=1) # update y-axis title for subplot 1,1

# Second plot
fig.add_trace(go.Scatter(x = test['y'], # add scatter plot of 'y' values from 'test' dataset
                         y = test['y_pred'], # same for 'y_pred' values
                        mode='markers',# drawing mode for this scatter trace
                        marker_color='rgb(246, 52, 16)')# define marker color
                        ,row=1, col=2) # subplot 1,2

fig.update_xaxes(title_text="Real", row=1, col=2) # update x-axis title for subplot 1,2
fig.update_yaxes(title_text="Predicted", row=1, col=2) # update y-axis title for subplot 1,2

# Line of perfect fit
fig.add_shape(type="line", # type of shape
              x0=0, # x-coordinate of the shape's starting point
              y0=0, # y-coordinate of the shape's starting point
              x1=70000, # x-coordinate of the shape's ending point
              y1=70000, row=1, col=2)# y-coordinate of the shape's ending point and subplot 1,2

# Third plot
fig.add_trace(go.Scatter(x = train['y'], # add scatter plot of 'y' values from 'train' dataset
                         y = train['Residuals'], # same for 'Residuals' values
                         mode='markers', # drawing mode for this scatter trace
                         marker_color='rgb(16, 154, 246)') # define marker color
                        ,row=2, col=1) # subplot 2,1

fig.update_xaxes(title_text="Real", row=2, col=1) # update x-axis title for subplot 2,1
fig.update_yaxes(title_text="Residual", row=2, col=1) # update y-axis title for subplot 2,1

# Forth plot
fig.add_trace(go.Scatter(x = test['y'], # add scatter plot of 'y' values from 'test' dataset
                         y = test['Residuals'], # same for 'Residuals' values
                         mode='markers', # drawing mode for this scatter trace
                         marker_color='rgb(246, 52, 16)') # define marker color
                        ,row=2, col=2) # subplot 2,2

fig.update_xaxes(title_text="Real", row=2, col=2) # update x-axis title for subplot 2,2
fig.update_yaxes(title_text="Residual", row=2, col=2) # update y-axis title for subplot 2,2

# Fifth plot
fig.add_trace(go.Histogram(x = train['Residuals'], # add histogram of 'Residuals' values from 'train' dataset
                           marker_color='rgb(16, 154, 246)') # define marker color
                        ,row=3, col=1) # subplot 3,1

fig.update_xaxes(title_text="Residuals", row=3, col=1) # update x-axis title for subplot 3,1

# Last plot
fig.add_trace(go.Histogram(x = test['Residuals'], # add histogram of 'Residuals' values from 'test' dataset
                          marker_color='rgb(246, 52, 16)') # define marker color
                        ,row=3, col=2) #  subplot 3,2

fig.update_xaxes(title_text="Residuals", row=3, col=2) # update x-axis title for subplot 3,2

fig.update_layout(title ='Error analysis', showlegend=False, height=1200) # update layout title and height of figure

fig.show() # show figure