# Week 08 - Variance

### Population

$\frac{1}{N}\sum(x-\bar{x})^2$

### Sample

$\frac{1}{n-1}\sum(x-\bar{x})^2$

In [1]:
# get data
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv('mariokart.csv', index_col=0)

# impute missing values with mean, median, and mode
df['Toad'].fillna(round(df['Toad'].mean(), 2), inplace=True)
df['Koopa Troopa'].fillna(round(df['Koopa Troopa'].mean(), 2), inplace=True)
df['Bowser'].fillna(round(df['Bowser'].mean(), 2), inplace=True)
df['Donkey Kong'].fillna(df['Donkey Kong'].median(), inplace=True)
df['Princess Peach'].fillna(df['Princess Peach'].median(), inplace=True)
df['Isabelle'].fillna(df['Isabelle'].median(), inplace=True)
df['Wario'].fillna(df['Wario'].mode()[0], inplace=True)
df['Birdo'].fillna(df['Birdo'].mode()[0], inplace=True)
df.isnull().sum()

# train test split
X_train, X_test, y_train, y_test = train_test_split(df.drop(['Mario Kart'], axis=1), df['Mario Kart'], test_size=.2, random_state=42)


In [2]:
# get model summary
import statsmodels.api as sm

# what ever we do for X_train, do for X_test?
X = X_train._get_numeric_data().copy()
X.insert(0, 'const', 1)
model = sm.OLS(y_train, X).fit()
model.summary()

0,1,2,3
Dep. Variable:,Mario Kart,R-squared:,0.846
Model:,OLS,Adj. R-squared:,0.824
Method:,Least Squares,F-statistic:,37.07
Date:,"Tue, 11 Oct 2022",Prob (F-statistic):,1.14e-25
Time:,19:04:35,Log-Likelihood:,-447.69
No. Observations:,86,AIC:,919.4
Df Residuals:,74,BIC:,948.8
Df Model:,11,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,10.9728,138.671,0.079,0.937,-265.336,287.282
Baby Daisy,3.4803,2.710,1.284,0.203,-1.919,8.880
Luigi,3.4803,2.710,1.284,0.203,-1.919,8.880
Toad,52.8694,6.976,7.579,0.000,38.970,66.768
Yoshi,4.8967,5.340,0.917,0.362,-5.743,15.536
Bowser,-37.6102,18.467,-2.037,0.045,-74.406,-0.814
Donkey Kong,-30.4515,18.434,-1.652,0.103,-67.181,6.278
Princess Peach,-2.3425,11.171,-0.210,0.834,-24.602,19.917
Isabelle,80.8079,5.850,13.813,0.000,69.151,92.465

0,1,2,3
Omnibus:,3.687,Durbin-Watson:,1.787
Prob(Omnibus):,0.158,Jarque-Bera (JB):,3.531
Skew:,0.21,Prob(JB):,0.171
Kurtosis:,3.899,Cond. No.,2.21e+18


In [3]:
# delete low variance and review summary
X.drop(['Luigi', 'Baby Mario', 'Birdo'], axis=1, inplace=True)
model = sm.OLS(y_train, X).fit()
model.summary()

0,1,2,3
Dep. Variable:,Mario Kart,R-squared:,0.846
Model:,OLS,Adj. R-squared:,0.824
Method:,Least Squares,F-statistic:,37.07
Date:,"Tue, 11 Oct 2022",Prob (F-statistic):,1.14e-25
Time:,19:04:55,Log-Likelihood:,-447.69
No. Observations:,86,AIC:,919.4
Df Residuals:,74,BIC:,948.8
Df Model:,11,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,11.0365,139.476,0.079,0.937,-266.875,288.948
Baby Daisy,6.9607,5.420,1.284,0.203,-3.839,17.760
Toad,52.8694,6.976,7.579,0.000,38.970,66.768
Yoshi,4.8967,5.340,0.917,0.362,-5.743,15.536
Bowser,-37.6102,18.467,-2.037,0.045,-74.406,-0.814
Donkey Kong,-30.4515,18.434,-1.652,0.103,-67.181,6.278
Princess Peach,-2.3425,11.171,-0.210,0.834,-24.602,19.917
Isabelle,80.8079,5.850,13.813,0.000,69.151,92.465
Koopa Troopa,39.2589,5.591,7.022,0.000,28.119,50.399

0,1,2,3
Omnibus:,3.687,Durbin-Watson:,1.787
Prob(Omnibus):,0.158,Jarque-Bera (JB):,3.531
Skew:,0.21,Prob(JB):,0.171
Kurtosis:,3.899,Cond. No.,315.0


In [4]:
# delete multicollinearity and review summary
X.drop(['Mii', 'Donkey Kong'], axis=1, inplace=True)
model = sm.OLS(y_train, X).fit()
model.summary()

0,1,2,3
Dep. Variable:,Mario Kart,R-squared:,0.841
Model:,OLS,Adj. R-squared:,0.822
Method:,Least Squares,F-statistic:,44.53
Date:,"Tue, 11 Oct 2022",Prob (F-statistic):,9.79e-27
Time:,19:05:22,Log-Likelihood:,-449.28
No. Observations:,86,AIC:,918.6
Df Residuals:,76,BIC:,943.1
Df Model:,9,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,9.8551,20.051,0.491,0.624,-30.080,49.791
Baby Daisy,7.0487,5.447,1.294,0.200,-3.800,17.898
Toad,53.5013,7.001,7.641,0.000,39.557,67.446
Yoshi,4.4849,5.336,0.840,0.403,-6.143,15.113
Bowser,-11.0584,9.240,-1.197,0.235,-29.461,7.344
Princess Peach,-0.0944,11.124,-0.008,0.993,-22.251,22.062
Isabelle,80.4293,5.864,13.715,0.000,68.750,92.109
Koopa Troopa,40.0900,5.597,7.163,0.000,28.943,51.237
Mario,3.6638,5.029,0.729,0.469,-6.352,13.680

0,1,2,3
Omnibus:,3.419,Durbin-Watson:,1.771
Prob(Omnibus):,0.181,Jarque-Bera (JB):,3.098
Skew:,0.214,Prob(JB):,0.212
Kurtosis:,3.826,Cond. No.,22.8
