In [1]:
import numpy as np
import pandas as pd
import sklearn.datasets as Datasets
import sklearn.model_selection as cv

## 1 Feature Linear Model

In [2]:
#Assuming input as numpy.ndarray objects
def fit1D(X_train,Y_train):
    numerator = (X_train*Y_train).mean() - X_train.mean()*Y_train.mean()
    denominator = (X_train**2).mean() - (X_train.mean()**2)
    m = numerator / denominator
    b = Y_train.mean() - m*X_train.mean()
    return (m,b)
# X_train = np.array([[1],[2],[3]])
# Y_train = np.array([[1],[2],[3]])
# fit1D(X_train,Y_train)

In [3]:
#Assuming input as numpy.ndarray objects
def predict1D(m,b,X_test):
    result = m*X_test + b
    return result

In [4]:
#Same for multi dimensional linear model
def score(Y_true,Y_predicted):
    nr = ((Y_true - Y_predicted)**2).sum()
    dr = ((Y_true - Y_true.mean())**2).sum()
    result = 1 - (nr/dr)
    return result

## Testing Boston Dataset with 1 Parameter

In [5]:
boston = Datasets.load_boston()
df = pd.DataFrame(boston.data)

In [6]:
X = df.values
Y = boston.target

In [7]:
X_test, X_train, Y_test, Y_train = cv.train_test_split(X,Y,test_size=0.2,random_state=0)
for i in range(X_train.shape[-1]):
    m,b = fit1D(X_train[:,i],Y_train)
    Y_predicted = predict1D(m,b,X_test[:,i])
    print("Score for i = ",i," is = ",score(Y_test,Y_predicted))

Score for i =  0  is =  0.141856856265
Score for i =  1  is =  0.100343458973
Score for i =  2  is =  0.23389658228
Score for i =  3  is =  0.0295589460117
Score for i =  4  is =  0.178574723461
Score for i =  5  is =  0.487736153052
Score for i =  6  is =  0.132826574418
Score for i =  7  is =  0.0517866310924
Score for i =  8  is =  0.128925213848
Score for i =  9  is =  0.216331015493
Score for i =  10  is =  0.23503993691
Score for i =  11  is =  0.108349870762
Score for i =  12  is =  0.56550783609


## Testing HW Dataset with 1 Feature

In [8]:
test_data = pd.read_excel(r"../data/L3_homework/test.xlsx")
train_data = pd.read_excel(r"../data/L3_homework/train.xlsx")
X_train = train_data.iloc[:,:-1].values
Y_train = train_data.iloc[:,-1].values
X_test = test_data.iloc[:,:-1].values
Y_test = test_data.iloc[:,-1].values

In [9]:
for i in range(X_train.shape[-1]):
    m,b = fit1D(X_train[:,i],Y_train)
    Y_predicted = predict1D(m,b,X_test[:,i])
    print("Score for i = ",i," is = ",score(Y_test,Y_predicted))

Score for i =  0  is =  0.898532626426
Score for i =  1  is =  0.749524003641
Score for i =  2  is =  0.279978675024
Score for i =  3  is =  0.161680537095


## 2 Feature Linear Model

In [10]:
#Assuming input as numpy.ndarray objects
def fit2D(X1_train,X2_train,Y_train):
    a = (X1_train * Y_train).mean()
    f = (X2_train * Y_train).mean()
    c = (X1_train**2).mean()
    d = (X2_train**2).mean()
    e = (X1_train * X2_train).mean()
    g = X1_train.mean()
    h = X2_train.mean()
    i = Y_train.mean()
    #Calculation m2
    nr = ((f - h*i)*(c + g*g)) - ((a - g*i)*(e + g*h))
    dr = ((d + h*h)*(c + g*g)) - ((e + g*h)**2)
    m2 = nr/dr
    
    #Caclulating m1
    m1 = ((a - g*i) - ((e + g*h)*m2)) / (c + g*g)
    
    #Calculating b
    b = i - g*m1 - h*m2
    return (m1,m2,b)

In [11]:
#Assuming input as numpy.ndarray objects
def predict2D(m1,m2,b,X1_test,X2_test):
    result = m1*X1_test + m2*X2_test + b
    return result

## Testing Boston Dataset with 2 Features

In [12]:
boston = Datasets.load_boston()
df = pd.DataFrame(boston.data)

In [13]:
X = df.values
Y = boston.target

In [14]:
X_test, X_train, Y_test, Y_train = cv.train_test_split(X,Y,test_size=0.2,random_state=0)
for i in range(0,X_train.shape[-1]):
    for j in range(i+1,X_train.shape[-1]):
        m1,m2,b = fit2D(X_train[:,i],X_train[:,j],Y_train)
        Y_predicted = predict2D(m1,m2,b,X_test[:,i],X_test[:,j])
        print("Score for i = ",i," and j = ",j," is ",score(Y_test,Y_predicted))

Score for i =  0  and j =  1  is  0.208696411117
Score for i =  0  and j =  2  is  0.0934761874176
Score for i =  0  and j =  3  is  0.150537235596
Score for i =  0  and j =  4  is  0.121913130804
Score for i =  0  and j =  5  is  0.172826341114
Score for i =  0  and j =  6  is  0.108528644254
Score for i =  0  and j =  7  is  0.151483249511
Score for i =  0  and j =  8  is  0.0890315095233
Score for i =  0  and j =  9  is  0.106413771197
Score for i =  0  and j =  10  is  0.125574354649
Score for i =  0  and j =  11  is  0.156716958886
Score for i =  0  and j =  12  is  0.142526663073
Score for i =  1  and j =  2  is  0.158494989886
Score for i =  1  and j =  3  is  0.0991002409327
Score for i =  1  and j =  4  is  0.119443430305
Score for i =  1  and j =  5  is  0.0794022694096
Score for i =  1  and j =  6  is  0.124388365042
Score for i =  1  and j =  7  is  0.0937521403313
Score for i =  1  and j =  8  is  0.158004897318
Score for i =  1  and j =  9  is  0.149734175427
Score for i 

## Testing HW Dataset with 2 Features

In [15]:
test_data = pd.read_excel(r"../data/L3_homework/test.xlsx")
train_data = pd.read_excel(r"../data/L3_homework/train.xlsx")
X_train = train_data.iloc[:,:-1].values
Y_train = train_data.iloc[:,-1].values
X_test = test_data.iloc[:,:-1].values
Y_test = test_data.iloc[:,-1].values

In [16]:
#Score will be bad but better than 1D because of very less number of features provided to learning model
for i in range(0,X_train.shape[-1]):
    for j in range(i+1,X_train.shape[-1]):
        m1,m2,b = fit2D(X_train[:,i],X_train[:,j],Y_train)
        Y_predicted = predict2D(m1,m2,b,X_test[:,i],X_test[:,j])
        print("Score for i = ",i," and j = ",j," is ",score(Y_test,Y_predicted))

Score for i =  0  and j =  1  is  0.520658169433
Score for i =  0  and j =  2  is  0.899447313958
Score for i =  0  and j =  3  is  0.724542269279
Score for i =  1  and j =  2  is  0.756768379514
Score for i =  1  and j =  3  is  0.643352806644
Score for i =  2  and j =  3  is  0.150318336103
