# WHIRLPOOL REGRESSION

## Import required packges

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import statsmodels.api as sm
#import seaborn as sns

## Read csv file into dataframes

In [2]:
dfwhirlpool = pd.read_csv('whirlpool_csv.csv')

## Check what data is in the dataframes

In [3]:
dfwhirlpool.head()

Unnamed: 0,Part number,Price/ Unit,Type,Region,Capacity BTU/Hr,Weight,EER*,Unit Volume
0,110-1574,34.49,Reciprocating,North America,510,5.68,3.26,81000
1,111-0810,39.48,Scroll,Europe,920,10.51,4.9,137000
2,116-7080,32.08,Reciprocating,North America,720,8.16,4.28,47000
3,116-7532,29.47,Reciprocating,Europe,285,3.31,5.03,82000
4,122-7151,26.56,Reciprocating,Europe,260,2.97,4.16,66000


## Rename columns for convenience

In [4]:
dfwhirlpool=dfwhirlpool.rename(columns={"Price/ Unit": "Price/Unit", "EER*": "EER"})#, errors="raise")

## Transform unit volume to log of unit volume

In [5]:
dfwhirlpool['log Unit Volume']=np.log(dfwhirlpool['Unit Volume'])

## Check if change has been made

In [6]:
dfwhirlpool.head()

Unnamed: 0,Part number,Price/Unit,Type,Region,Capacity BTU/Hr,Weight,EER,Unit Volume,log Unit Volume
0,110-1574,34.49,Reciprocating,North America,510,5.68,3.26,81000,11.302204
1,111-0810,39.48,Scroll,Europe,920,10.51,4.9,137000,11.827736
2,116-7080,32.08,Reciprocating,North America,720,8.16,4.28,47000,10.757903
3,116-7532,29.47,Reciprocating,Europe,285,3.31,5.03,82000,11.314475
4,122-7151,26.56,Reciprocating,Europe,260,2.97,4.16,66000,11.09741


## Summarize data

In [7]:
dfwhirlpool.describe()

Unnamed: 0,Price/Unit,Capacity BTU/Hr,Weight,EER,Unit Volume,log Unit Volume
count,45.0,45.0,45.0,45.0,45.0,45.0
mean,34.075333,545.333333,6.181111,4.276667,111911.111111,11.227207
std,4.047022,249.38652,2.872977,0.643414,136961.975848,0.797831
min,26.56,120.0,1.36,3.02,23000.0,10.043249
25%,31.66,345.0,3.9,3.83,45000.0,10.714418
50%,33.34,510.0,5.68,4.37,62000.0,11.03489
75%,37.07,725.0,8.3,4.68,98000.0,11.492723
max,43.78,1090.0,12.77,5.33,750000.0,13.527828


In [8]:
dummies1 = pd.get_dummies(dfwhirlpool['Type'])
dummies2 = pd.get_dummies(dfwhirlpool['Region'])
dfwhirlpool = pd.concat([dfwhirlpool, dummies1, dummies2], axis=1)
dfwhirlpool.head()

Unnamed: 0,Part number,Price/Unit,Type,Region,Capacity BTU/Hr,Weight,EER,Unit Volume,log Unit Volume,Reciprocating,Scroll,Europe,Latin America,North America
0,110-1574,34.49,Reciprocating,North America,510,5.68,3.26,81000,11.302204,1,0,0,0,1
1,111-0810,39.48,Scroll,Europe,920,10.51,4.9,137000,11.827736,0,1,1,0,0
2,116-7080,32.08,Reciprocating,North America,720,8.16,4.28,47000,10.757903,1,0,0,0,1
3,116-7532,29.47,Reciprocating,Europe,285,3.31,5.03,82000,11.314475,1,0,1,0,0
4,122-7151,26.56,Reciprocating,Europe,260,2.97,4.16,66000,11.09741,1,0,1,0,0


In [9]:
dfwhirlpool = dfwhirlpool.drop(columns=["Reciprocating", "North America"])
dfwhirlpool.head()

Unnamed: 0,Part number,Price/Unit,Type,Region,Capacity BTU/Hr,Weight,EER,Unit Volume,log Unit Volume,Scroll,Europe,Latin America
0,110-1574,34.49,Reciprocating,North America,510,5.68,3.26,81000,11.302204,0,0,0
1,111-0810,39.48,Scroll,Europe,920,10.51,4.9,137000,11.827736,1,1,0
2,116-7080,32.08,Reciprocating,North America,720,8.16,4.28,47000,10.757903,0,0,0
3,116-7532,29.47,Reciprocating,Europe,285,3.31,5.03,82000,11.314475,0,1,0
4,122-7151,26.56,Reciprocating,Europe,260,2.97,4.16,66000,11.09741,0,1,0


In [10]:
dfwhirlpool.corr()

Unnamed: 0,Price/Unit,Capacity BTU/Hr,Weight,EER,Unit Volume,log Unit Volume,Scroll,Europe,Latin America
Price/Unit,1.0,0.323419,0.318801,0.311448,-0.351386,-0.358806,0.185002,-0.317348,0.61852
Capacity BTU/Hr,0.323419,1.0,0.997922,-0.342144,0.071716,0.038737,0.635764,-0.219836,-0.136196
Weight,0.318801,0.997922,1.0,-0.343886,0.085289,0.05318,0.640415,-0.215994,-0.132858
EER,0.311448,-0.342144,-0.343886,1.0,-0.246618,-0.191887,-0.417808,0.278593,0.558138
Unit Volume,-0.351386,0.071716,0.085289,-0.246618,1.0,0.903965,0.140645,-0.019376,-0.177255
log Unit Volume,-0.358806,0.038737,0.05318,-0.191887,0.903965,1.0,0.071281,0.045688,-0.225876
Scroll,0.185002,0.635764,0.640415,-0.417808,0.140645,0.071281,1.0,-0.082199,-0.248548
Europe,-0.317348,-0.219836,-0.215994,0.278593,-0.019376,0.045688,-0.082199,1.0,-0.377964
Latin America,0.61852,-0.136196,-0.132858,0.558138,-0.177255,-0.225876,-0.248548,-0.377964,1.0


In [22]:
#X1 = ['Capacity BTU/Hr', 'EER', 'Scroll'] #Design drivers
#X1 = ['Capacity BTU/Hr', 'EER'] #Design drivers without type
#X1 = ['Capacity BTU/Hr', 'EER', 'Unit Volume'] #Design and operations drivers
#X1 = ['Capacity BTU/Hr', 'EER', 'Unit Volume', 'Europe', 'Latin America'] #Design, operations and region drivers
#X1 = ['Capacity BTU/Hr', 'EER', 'Unit Volume', 'Latin America'] #Remove Europe
X1 = ['Capacity BTU/Hr', 'Unit Volume', 'Latin America'] #Remove EER
#X1 = ['Capacity BTU/Hr', 'Unit Volume', 'EER'] #Swap Latin America for EER
#X1 = ['Capacity BTU/Hr', 'EER', 'Scroll', 'Unit Volume', 'Europe', 'Latin America'] #The kitchen sink model
X1 = dfwhirlpool[X1]
X1 = sm.add_constant(X1)
y = dfwhirlpool['Price/Unit']
model = sm.OLS(y, X1)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:             Price/Unit   R-squared:                       0.623
Model:                            OLS   Adj. R-squared:                  0.595
Method:                 Least Squares   F-statistic:                     22.55
Date:                Wed, 19 Jan 2022   Prob (F-statistic):           8.75e-09
Time:                        10:15:49   Log-Likelihood:                -104.33
No. Observations:                  45   AIC:                             216.7
Df Residuals:                      41   BIC:                             223.9
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
const              29.8331      1.036     

In [23]:
results.params

const              29.833119
Capacity BTU/Hr     0.006953
Unit Volume        -0.000008
Latin America       6.053679
dtype: float64

In [24]:
dfwhirlpool['X1 Predicted Price/Unit'] = results.predict(X1)
dfwhirlpool.head()

Unnamed: 0,Part number,Price/Unit,Type,Region,Capacity BTU/Hr,Weight,EER,Unit Volume,log Unit Volume,Scroll,Europe,Latin America,X1 Predicted Price/Unit,X1 Error,X1 Abs Error
0,110-1574,34.49,Reciprocating,North America,510,5.68,3.26,81000,11.302204,0,0,0,32.731581,-3.027807,3.027807
1,111-0810,39.48,Scroll,Europe,920,10.51,4.9,137000,11.827736,1,1,0,35.13463,-1.088127,1.088127
2,116-7080,32.08,Reciprocating,North America,720,8.16,4.28,47000,10.757903,0,0,0,34.463684,3.920845,3.920845
3,116-7532,29.47,Reciprocating,Europe,285,3.31,5.03,82000,11.314475,0,1,0,31.159077,4.742532,4.742532
4,122-7151,26.56,Reciprocating,Europe,260,2.97,4.16,66000,11.09741,0,1,0,31.113194,5.36912,5.36912


In [25]:
dfwhirlpool['X1 Error']= dfwhirlpool['X1 Predicted Price/Unit'] - dfwhirlpool['Price/Unit']

In [26]:
dfwhirlpool['X1 Abs Error']= dfwhirlpool['X1 Error'].abs()

In [27]:
dfwhirlpool.head()

Unnamed: 0,Part number,Price/Unit,Type,Region,Capacity BTU/Hr,Weight,EER,Unit Volume,log Unit Volume,Scroll,Europe,Latin America,X1 Predicted Price/Unit,X1 Error,X1 Abs Error
0,110-1574,34.49,Reciprocating,North America,510,5.68,3.26,81000,11.302204,0,0,0,32.731581,-1.758419,1.758419
1,111-0810,39.48,Scroll,Europe,920,10.51,4.9,137000,11.827736,1,1,0,35.13463,-4.34537,4.34537
2,116-7080,32.08,Reciprocating,North America,720,8.16,4.28,47000,10.757903,0,0,0,34.463684,2.383684,2.383684
3,116-7532,29.47,Reciprocating,Europe,285,3.31,5.03,82000,11.314475,0,1,0,31.159077,1.689077,1.689077
4,122-7151,26.56,Reciprocating,Europe,260,2.97,4.16,66000,11.09741,0,1,0,31.113194,4.553194,4.553194


In [28]:
fig = px.scatter(dfwhirlpool, x="X1 Predicted Price/Unit", y="Price/Unit", color="Region")
fig.update_xaxes(range=[26, 40])
fig.update_yaxes(range=[26, 40])
fig.update_layout(shapes = [{'type': 'line', 'yref': 'paper', 'xref': 'paper', 'y0': 0, 'y1': 1, 'x0': 0, 'x1': 1}])
fig.show()

In [29]:
dfwhirlpool['X1 Error'].mean()
#round(dfwhirlpool['X1 Error'].mean(),10)

-3.54631879417866e-12

In [30]:
fig = px.scatter(dfwhirlpool, x="X1 Predicted Price/Unit", y="X1 Error", color="Region")
#fig = px.scatter(dfwhirlpool, x="X1 Predicted Price/Unit", y="X1 Error", color="Region", symbol="Type")
fig.update_xaxes(range=[25, 45])
#fig = px.scatter(dfwhirlpool, x="Capacity BTU/Hr", y="X1 Error", color="Region")
#fig.update_xaxes(range=[0, 1200])
fig.update_yaxes(range=[-6, 6])
fig.show()

In [31]:
dfwhirlpool_piv_X1 = pd.pivot_table(dfwhirlpool, values='X1 Error', columns=['Region'], aggfunc=np.mean)

In [32]:
dfwhirlpool_piv_X1

Region,Europe,Latin America,North America
X1 Error,-0.052676,-4.05862e-12,0.039507


In [47]:
dfwhirlpool_piv_X1_2 = pd.pivot_table(dfwhirlpool, values='X1 Error', columns=['Type'], aggfunc=np.mean)

In [48]:
dfwhirlpool_piv_X1_2

Type,Reciprocating,Scroll
X1 Error,0.199043,-0.920573
