In [85]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from statsmodels.formula.api import ols

In [86]:
df = pd.read_csv('../data/mariokart.csv')

In [87]:
df.head()

Unnamed: 0,id,duration,n_bids,cond,start_pr,ship_pr,total_pr,ship_sp,seller_rate,stock_photo,wheels,title
0,150377422259,3,20,new,0.99,4.0,51.55,standard,1580,yes,1,~~ Wii MARIO KART &amp; WHEEL ~ NINTENDO Wii ~...
1,260483376854,7,13,used,0.99,3.99,37.04,firstClass,365,yes,1,Mariokart Wii Nintendo with wheel - Mario Kart...
2,320432342985,3,16,new,0.99,3.5,45.5,firstClass,998,no,1,Mario Kart Wii (Wii)
3,280405224677,3,18,new,0.99,0.0,44.0,standard,7,yes,1,Brand New Mario Kart Wii Comes with Wheel. Fre...
4,170392227765,1,20,new,0.01,0.0,71.0,media,820,yes,2,BRAND NEW NINTENDO 1 WII MARIO KART WITH 2 WHE...


### Feature Selection

In [88]:
# cond + stock_photo + duration + wheels

In [89]:
df['cond_new'] = df['cond'].apply(lambda x: 1 if x == 'new' else 0)

In [90]:
df['stock_photo'] = df['stock_photo'].apply(lambda x: 1 if x == 'yes' else 0)

In [91]:
df

Unnamed: 0,id,duration,n_bids,cond,start_pr,ship_pr,total_pr,ship_sp,seller_rate,stock_photo,wheels,title,cond_new
0,150377422259,3,20,new,0.99,4.00,51.55,standard,1580,1,1,~~ Wii MARIO KART &amp; WHEEL ~ NINTENDO Wii ~...,1
1,260483376854,7,13,used,0.99,3.99,37.04,firstClass,365,1,1,Mariokart Wii Nintendo with wheel - Mario Kart...,0
2,320432342985,3,16,new,0.99,3.50,45.50,firstClass,998,0,1,Mario Kart Wii (Wii),1
3,280405224677,3,18,new,0.99,0.00,44.00,standard,7,1,1,Brand New Mario Kart Wii Comes with Wheel. Fre...,1
4,170392227765,1,20,new,0.01,0.00,71.00,media,820,1,2,BRAND NEW NINTENDO 1 WII MARIO KART WITH 2 WHE...,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
138,110441497272,1,20,used,0.01,0.00,39.51,standard,7284,1,0,Mario Kart Wii (Wii) Nintendo Wii game *--WOW ...,0
139,150376936435,7,9,used,17.99,0.00,52.00,parcel,121,0,2,Mario Kart Wii (Wii),0
140,140349730405,3,14,new,0.99,8.70,47.70,priority,251,1,1,"Wii Mario Kart game + wheel: NIB, factory sealed",1
141,300352306018,7,13,used,1.00,4.90,38.76,parcel,41,0,0,Mario Kart Wii,0


In [92]:
mod = ols('total_pr ~ cond_new + stock_photo + duration + wheels', data=df)

In [93]:
res = mod.fit()
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:               total_pr   R-squared:                       0.123
Model:                            OLS   Adj. R-squared:                  0.098
Method:                 Least Squares   F-statistic:                     4.860
Date:                Sat, 22 Jan 2022   Prob (F-statistic):            0.00107
Time:                        21:49:45   Log-Likelihood:                -657.17
No. Observations:                 143   AIC:                             1324.
Df Residuals:                     138   BIC:                             1339.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
Intercept      40.9385      7.361      5.562      

### Model Selection

#### p-value approach backward elimination

### Passthrough-1

In [94]:
mod = ols('total_pr ~ cond_new + stock_photo + wheels', data=df)

In [95]:
res = mod.fit()
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:               total_pr   R-squared:                       0.122
Model:                            OLS   Adj. R-squared:                  0.104
Method:                 Least Squares   F-statistic:                     6.465
Date:                Sat, 22 Jan 2022   Prob (F-statistic):           0.000396
Time:                        21:49:46   Log-Likelihood:                -657.25
No. Observations:                 143   AIC:                             1323.
Df Residuals:                     139   BIC:                             1334.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
Intercept      43.2039      4.746      9.103      

### Passthrough-2

In [96]:
mod = ols('total_pr ~ stock_photo + wheels', data=df)

In [97]:
res = mod.fit()
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:               total_pr   R-squared:                       0.121
Model:                            OLS   Adj. R-squared:                  0.109
Method:                 Least Squares   F-statistic:                     9.681
Date:                Sat, 22 Jan 2022   Prob (F-statistic):           0.000115
Time:                        21:49:46   Log-Likelihood:                -657.33
No. Observations:                 143   AIC:                             1321.
Df Residuals:                     140   BIC:                             1330.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
Intercept      42.9313      4.679      9.175      

### Passthrough-3

In [98]:
mod = ols('total_pr ~ wheels', data=df)

In [99]:
res = mod.fit()
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:               total_pr   R-squared:                       0.109
Model:                            OLS   Adj. R-squared:                  0.103
Method:                 Least Squares   F-statistic:                     17.23
Date:                Sat, 22 Jan 2022   Prob (F-statistic):           5.70e-05
Time:                        21:49:46   Log-Likelihood:                -658.35
No. Observations:                 143   AIC:                             1321.
Df Residuals:                     141   BIC:                             1327.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     38.4052      3.433     11.188      0.0