# Linear Regression with Cross Validation Leave-One-Out
* Supervised learning


## Imports and Definitions

In [17]:
import numpy as np
from sklearn import cross_validation
from sklearn import linear_model
import os

In [18]:
def input_dir():
    return os.getcwd() + '/'

def output_dir():
    return os.getcwd() + '/'

## Get data

In [19]:
rdata = open(input_dir() + 'data.csv', 'r+')

### Adjust data

In [20]:
data = []
for line in rdata:
    # Remove \n
    line = line.rstrip()
    data.append([float(x) for x in line.split(',')])
data

[[180.0, 8.0, 3070.0, 1300.0, 3504.0, 120.0],
 [150.0, 8.0, 3500.0, 1650.0, 3693.0, 115.0],
 [180.0, 8.0, 3180.0, 1500.0, 3436.0, 110.0],
 [160.0, 8.0, 3040.0, 1500.0, 3433.0, 120.0],
 [170.0, 8.0, 3020.0, 1400.0, 3449.0, 105.0],
 [150.0, 8.0, 4290.0, 1980.0, 4341.0, 100.0],
 [140.0, 8.0, 4540.0, 2200.0, 4354.0, 90.0],
 [140.0, 8.0, 4400.0, 2150.0, 4312.0, 85.0],
 [140.0, 8.0, 4550.0, 2250.0, 4425.0, 100.0],
 [150.0, 8.0, 3900.0, 1900.0, 3850.0, 85.0],
 [150.0, 8.0, 3830.0, 1700.0, 3563.0, 100.0],
 [140.0, 8.0, 3400.0, 1600.0, 3609.0, 80.0],
 [150.0, 8.0, 4000.0, 1500.0, 3761.0, 95.0],
 [140.0, 8.0, 4550.0, 2250.0, 3086.0, 100.0],
 [240.0, 4.0, 1130.0, 9500.0, 2372.0, 150.0],
 [220.0, 6.0, 1980.0, 9500.0, 2833.0, 155.0],
 [180.0, 6.0, 1990.0, 9700.0, 2774.0, 155.0],
 [210.0, 6.0, 2000.0, 8500.0, 2587.0, 160.0],
 [270.0, 4.0, 9700.0, 8800.0, 2130.0, 145.0],
 [260.0, 4.0, 9700.0, 4600.0, 1835.0, 205.0]]

In [21]:
for i in range(len(data)):
    data[i] = [float(x) for x in data[i]]

data = np.array(data)
X = data[:,0:-1]
Y = data[:,-1]

## Regression

In [23]:
n = len(X)
loo = cross_validation.LeaveOneOut(n)
ssum = 0
for train, test in loo:
    X_train, X_test = X[train], X[test]
    Y_train, Y_test = Y[train], Y[test]

    regr = linear_model.LinearRegression()
    regr.fit(X_train, Y_train)
    print (regr.score(X_test, Y_test))
    print (regr.coef_)
    print (regr.intercept_)
    print ("Residual sum of squares: %f" % np.mean((regr.predict(X_test) - Y_test) ** 2))
    ssum += regr.score(X_test, Y_test)

print (ssum)

0.0
[ 2.10831725e-01 -4.39462230e+00 -4.98372306e-04  7.41600913e-04
 -1.75630189e-02]
170.74686547490546
Residual sum of squares: 73.395931
0.0
[ 2.89234130e-01 -2.80983974e+00 -5.99579369e-04  7.78120865e-04
 -1.67547161e-02]
142.9781820727216
Residual sum of squares: 190.583856
0.0
[ 3.16746579e-01 -2.12678670e+00 -8.41022993e-04  4.62200309e-04
 -1.73112540e-02]
138.35030375480565
Residual sum of squares: 47.441416
0.0
[ 2.65135165e-01 -3.53295754e+00 -4.93581112e-04  8.28321027e-04
 -1.60877538e-02]
149.59937590050708
Residual sum of squares: 137.591067
0.0
[ 3.02097817e-01 -2.55004002e+00 -9.14775742e-04  3.52908051e-04
 -1.76668716e-02]
145.92742362047034
Residual sum of squares: 75.379566
0.0
[ 2.48024268e-01 -2.89438123e+00 -7.81236483e-04  5.16918249e-04
 -2.02408635e-02]
164.38291511028743
Residual sum of squares: 138.346748
0.0
[ 2.73690103e-01 -3.05150983e+00 -7.64108289e-04  5.23766976e-04
 -1.78639436e-02]
153.32817869891716
Residual sum of squares: 8.200552
0.0
[ 2.6485

In [29]:
y = regr.predict([[180,8,3000,1200,3500]])
print(y)

[112.81805447]
