### Classes and Transformations

In [1]:
from sklearn.datasets import fetch_california_housing

In [2]:
cali = fetch_california_housing()

In [3]:
X, y = cali.data, cali.target

In [5]:
import pandas as pd



In [6]:
df = pd.DataFrame(X, columns = cali.feature_names)

In [7]:
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25


### ACT vs. SAT Exams

- ACT is on a range 37 --> $\mu = 25$ and $\sigma = 4$
- SAT is on a range of 200 - 2400 --> $\mu = 1800$ and $\sigma = 100$

- **STUDENT A**: 24 on ACT
- **STUDENT B**: 1600 on SAT

In [10]:
(25 - 24)/4

0.25

In [11]:
(1800 - 1600)/100

2.0

In [19]:
class Dog:
    
    def __init__(self, name):
        self.name = name
    
    def bark(self):
        return f'bark {self.name}'
    

In [20]:
dog_1 = Dog('steve')
dog_2 = Dog('emily')

In [21]:
dog_1.bark()

'bark steve'

In [22]:
dog_2.bark()

'bark emily'

In [32]:
class StandardScaler:
    
    def __init__(self):
        self.mean_ = None
        self.std_ = None
    
    def fit(self, X):
        self.mean_ = np.mean(X)
        self.std_ = np.std(X)
        return self
    
    def transform(self, X):
        return (X - self.mean_)/self.std_
    
    def fit_transform(self, X):
        self.fit(X)
        return self.transform(X)

In [33]:
sscaler = StandardScaler()

In [34]:
import numpy as np

In [35]:
X = np.random.randint(3, 50, 80).reshape(2, 40)

In [36]:
X

array([[ 5, 36,  9, 11, 12, 45, 40, 35, 36, 39, 12, 42, 19, 28,  8, 16,
        18, 33, 18, 30, 14, 42, 46, 28, 15, 49, 23, 31, 10, 36, 48, 22,
        12, 13, 19, 31, 25, 36, 48, 41],
       [27, 23, 42, 23, 41, 34, 39,  8, 46, 15, 29, 13, 27, 17, 49, 16,
        39, 34, 42, 11, 20, 26, 24, 18, 30,  4, 40, 43, 18, 37,  8, 47,
         3, 16, 30, 26,  8, 37, 19, 18]])

In [37]:
sscaler.fit(X)

<__main__.StandardScaler at 0x7fc07e915280>

In [39]:
sscaler.fit_transform(X)

array([[-1.67729366,  0.72993335, -1.36668373, -1.21137876, -1.13372627,
         1.42880571,  1.04054329,  0.65228087,  0.72993335,  0.96289081,
        -1.13372627,  1.19584826, -0.59015888,  0.10871348, -1.44433621,
        -0.82311634, -0.66781137,  0.4969759 , -0.66781137,  0.26401845,
        -0.9784213 ,  1.19584826,  1.5064582 ,  0.10871348, -0.90076882,
         1.73941565, -0.27954894,  0.34167093, -1.28903124,  0.72993335,
         1.66176317, -0.35720143, -1.13372627, -1.05607379, -0.59015888,
         0.34167093, -0.12424398,  0.72993335,  1.66176317,  1.11819578],
       [ 0.03106099, -0.27954894,  1.19584826, -0.27954894,  1.11819578,
         0.57462839,  0.96289081, -1.44433621,  1.5064582 , -0.90076882,
         0.18636596, -1.05607379,  0.03106099, -0.74546385,  1.73941565,
        -0.82311634,  0.96289081,  0.57462839,  1.19584826, -1.21137876,
        -0.5125064 , -0.04659149, -0.20189646, -0.66781137,  0.26401845,
        -1.75494615,  1.04054329,  1.27350075, -0.

In [40]:
sscaler.mean_

26.6

In [41]:
sscaler.std_

12.877888025604198

### Linear Regression with Statsmodels

In [42]:
X, y = cali.data, cali.target

In [43]:
import statsmodels.api as sm



In [47]:
X = sm.add_constant(X)

In [48]:
model = sm.OLS(y, X).fit()

In [49]:
model.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.606
Model:,OLS,Adj. R-squared:,0.606
Method:,Least Squares,F-statistic:,3970.0
Date:,"Tue, 03 Nov 2020",Prob (F-statistic):,0.0
Time:,19:58:53,Log-Likelihood:,-22624.0
No. Observations:,20640,AIC:,45270.0
Df Residuals:,20631,BIC:,45340.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-36.9419,0.659,-56.067,0.000,-38.233,-35.650
x1,0.4367,0.004,104.054,0.000,0.428,0.445
x2,0.0094,0.000,21.143,0.000,0.009,0.010
x3,-0.1073,0.006,-18.235,0.000,-0.119,-0.096
x4,0.6451,0.028,22.928,0.000,0.590,0.700
x5,-3.976e-06,4.75e-06,-0.837,0.402,-1.33e-05,5.33e-06
x6,-0.0038,0.000,-7.769,0.000,-0.005,-0.003
x7,-0.4213,0.007,-58.541,0.000,-0.435,-0.407
x8,-0.4345,0.008,-57.682,0.000,-0.449,-0.420

0,1,2,3
Omnibus:,4393.65,Durbin-Watson:,0.885
Prob(Omnibus):,0.0,Jarque-Bera (JB):,14087.596
Skew:,1.082,Prob(JB):,0.0
Kurtosis:,6.42,Cond. No.,238000.0
