In [150]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

### Utils ###

# Function for Non Linear Transformation 
def non_linear_transform(input_data):
    transform = lambda x: [1, x[0], x[1], x[0] ** 2, x[1] ** 2, x[0] * x[1], np.abs(x[0] - x[1]), np.abs((x[0] + x[1]))]
    transformed_data=[transform(x) for x in input_data]
    transformed_data=np.array(transformed_data)
    return transformed_data

def LinearRegression_model(X, Y):
    Model = LinearRegression()
    Model.fit(X, Y)
    weights = Model.coef_
    return Model, weights

def Prediction(Model, X):
    Y_predicted = Model.predict(X)
    return Y_predicted

def Weight_Decay(k, N, weights):
    L = pow(10, k)
    m = N
    w = weights[0]
    sum = w[0]**2 + w[1]**2 + w[2]**2 +w[3]**2 + w[4]**2 + w[5]**2 + w[6]**2 +w[7]**2
    reg_param = np.dot((L/m),sum)
    return reg_param

def Error_aug(E, w_decay):
    return E + w_decay

def Error(Y_pred, Y_test):
    mean_squared_error = mean_squared_error(Y_pred - Y_test)
    return mean_squared_error

def w_reg(Z, Y, L):
    I = np.eye(8)
    w_reg = np.dot(np.dot(np.linalg.inv(np.dot(Z.T,Z) + np.dot(L, I)), Z.T), Y)
    w_reg = np.array(w_reg)
    return w_reg

def Y_scaling(Y):
    scaled_Y = Y
    for i in range(len(Y)):
        if(Y[i]>0):
            scaled_Y[i] = 1
        else:
            scaled_Y[i] = -1
    return scaled_Y
    

# -------------------------------------------------------



# Data Loading and separation for Training Set
training_set = pd.read_excel(r"training_data.xlsx")
X_train = training_set[['X1', 'X2']]
Y_train = training_set[['Y']]


# Data Loading and separation for Test Set
test_set = pd.read_excel(r"test_data.xlsx")
X_test = test_set[['X1', 'X2']]
Y_test = test_set[['Y']]


# Performing  Non Linear Transformation of train_data
X_train = X_train.to_numpy()
X_train = non_linear_transform(X_train)
Y_train = Y_train.to_numpy()

# Performing  Non Linear Transformation of test_data
X_test = X_test.to_numpy()
X_test = non_linear_transform(X_test)
Y_test = Y_test.to_numpy()





In [151]:
## a) In sample and Out of sample Classification errors
print ("---------------------------- PART A --------------------------")
model, weights = LinearRegression_model(X_train, Y_train)

# Finding E_in
Y_pred = Prediction(model,X_train)
Y_pred = Y_scaling(Y_pred)
m,n = Y_pred.shape
E_in = mean_squared_error(Y_pred, Y_train)
print("The InSample error is: ", E_in, "using scikitlearn function")
E_in = np.mean(Y_pred != Y_train)
print("The InSample error is: ", E_in, "using manual formula")


# Finding E_Out
Y_pred = Prediction(model,X_test)
Y_pred = Y_scaling(Y_pred)
m,n = Y_pred.shape
E_out = mean_squared_error(Y_pred, Y_test)
print("The Out of Sample error is: ", E_out, "using scikitlearn function")
E_out = np.mean(Y_pred != Y_test)
print("The Out of Sample error is: ", E_out, "using manual formula")

---------------------------- PART A --------------------------
The InSample error is:  0.11428571428571428 using scikitlearn function
The InSample error is:  0.02857142857142857 using manual formula
The Out of Sample error is:  0.336 using scikitlearn function
The Out of Sample error is:  0.084 using manual formula


In [152]:
## b) Adding weight decay to squared in sample error (k =-3) and finding E_in and E_out
print ("-----------------------------  PART B -------------------------------")
k = -3

# Method 1: E_aug = E + reg_param
print("--------Method 1----------")
model, weights = LinearRegression_model(X_train, Y_train)

# Finding E_in
Y_pred = Prediction(model,X_train)
Y_pred = Y_scaling(Y_pred)
m,n = Y_pred.shape
E_in = mean_squared_error(Y_pred, Y_train)
print("The InSample error is: ", E_in)

decay = Weight_Decay(k, m, weights)
print("The Weight Decay for training", decay)
E_aug_in = E_in + decay
print("The In Sample Error after adding adding weight decay", E_aug_in)


# Finding E_out
Y_pred = Prediction(model,X_test)
Y_pred = Y_scaling(Y_pred)
m,n = Y_pred.shape
E_out = mean_squared_error(Y_pred, Y_test)
print("The Out of Sample error is: ", E_out)


decay = Weight_Decay(k, m, weights)
print("The Weight Decay for testing", decay)
E_aug_out = E_out + decay
print("The Out of Sample Error after adding adding weight decay", E_aug_out)


# Method 2: Find w_reg
print("--------Method 2----------")
model, weights = LinearRegression_model(X_train, Y_train)
Lambda = pow(10, k)
wreg = w_reg(X_train, Y_train, Lambda)


# Finding E_in
Y_pred = np.dot(X_train, wreg)
Y_pred = Y_scaling(Y_pred)
m,n = Y_pred.shape
E_in = mean_squared_error(Y_pred, Y_train)
print("The Regularized InSample error is: ", E_in)


# Finding E_Out
Y_pred = np.dot(X_test, wreg)
Y_pred = Y_scaling(Y_pred)
m,n = Y_pred.shape
E_out = mean_squared_error(Y_pred, Y_test)
print("The Regularized Out of Sample error is: ", E_out)


-----------------------------  PART B -------------------------------
--------Method 1----------
The InSample error is:  0.11428571428571428
The Weight Decay for training 0.0008874571215826981
The In Sample Error after adding adding weight decay 0.11517317140729698
The Out of Sample error is:  0.336
The Weight Decay for testing 0.00012424399702157775
The Out of Sample Error after adding adding weight decay 0.3361242439970216
--------Method 2----------
The Regularized InSample error is:  0.11428571428571428
The Regularized Out of Sample error is:  0.32


In [153]:
## c) E_in and E_out when k = 3
print(" ------------------- PART C ------------------------")
k = 3

# Method 1: E_aug = E + reg_param
print("--------Method 1----------")
model, weights = LinearRegression_model(X_train, Y_train)

# Finding E_in
Y_pred = Prediction(model,X_train)
Y_pred = Y_scaling(Y_pred)
m,n = Y_pred.shape
E_in = mean_squared_error(Y_pred, Y_train)
print("The InSample error is: ", E_in)

decay = Weight_Decay(k, m, weights)
print("The Weight Decay for training", decay)
E_aug_in = E_in + decay
print("The In Sample Error after adding adding weight decay", E_aug_in)


# Finding E_out
Y_pred = Prediction(model,X_test)
Y_pred = Y_scaling(Y_pred)
m,n = Y_pred.shape
E_out = mean_squared_error(Y_pred, Y_test)
print("The Out of Sample error is: ", E_out)


decay = Weight_Decay(k, m, weights)
print("The Weight Decay for testing", decay)
E_aug_out = E_out + decay
print("The Out of Sample Error after adding adding weight decay", E_aug_out)


# Method 2: Find w_reg
print("--------Method 2----------")
model, weights = LinearRegression_model(X_train, Y_train)
Lambda = pow(10, k)
wreg = w_reg(X_train, Y_train, Lambda)


# Finding E_in
Y_pred = np.dot(X_train, wreg)
Y_pred = Y_scaling(Y_pred)
m,n = Y_pred.shape
E_in = mean_squared_error(Y_pred, Y_train)
print("The Regularized InSample error is: ", E_in)


# Finding E_Out
Y_pred = np.dot(X_test, wreg)
Y_pred = Y_scaling(Y_pred)
m,n = Y_pred.shape
E_out = mean_squared_error(Y_pred, Y_test)
print("The Regularized Out of Sample error is: ", E_out)

 ------------------- PART C ------------------------
--------Method 1----------
The InSample error is:  0.11428571428571428
The Weight Decay for training 887.4571215826982
The In Sample Error after adding adding weight decay 887.5714072969839
The Out of Sample error is:  0.336
The Weight Decay for testing 124.24399702157774
The Out of Sample Error after adding adding weight decay 124.57999702157774
--------Method 2----------
The Regularized InSample error is:  1.4857142857142858
The Regularized Out of Sample error is:  1.744


In [154]:
## d) k = [2, 1, 0, -1, -2] Give the smalled E_in and smallest E_out
print(" --------------------------  PART D --------------------------------")

k_val = [2, 1, 0, -1, -2]
for i in range(len(k_val)):
    k = k_val[i]
    print ("-------------------------------------------------")
    print (" For K Value = ", k)
    print('\v')
    # Method 1: E_aug = E + reg_param
    print("--------Method 1----------")
    model, weights = LinearRegression_model(X_train, Y_train)

    # Finding E_in
    Y_pred = Prediction(model,X_train)
    Y_pred = Y_scaling(Y_pred)
    m,n = Y_pred.shape
    E_in = mean_squared_error(Y_pred, Y_train)

    decay = Weight_Decay(k, m, weights)
    print("The Weight Decay for training", decay)
    E_aug_in = E_in + decay
    print("The In Sample Error after adding adding weight decay", E_aug_in)


    # Finding E_out
    Y_pred = Prediction(model,X_test)
    Y_pred = Y_scaling(Y_pred)
    m,n = Y_pred.shape
    E_out = mean_squared_error(Y_pred, Y_test)

    decay = Weight_Decay(k, m, weights)
    print("The Weight Decay for testing", decay)
    E_aug_out = (E_out + decay)/1000
    print("The Out of Sample Error after adding adding weight decay", E_aug_out)


    # Method 2: Find w_reg
    print("--------Method 2----------")
    model, weights = LinearRegression_model(X_train, Y_train)
    Lambda = pow(10, k)
    wreg = w_reg(X_train, Y_train, Lambda)


    # Finding E_in
    Y_pred = np.dot(X_train, wreg)
    Y_pred = Y_scaling(Y_pred)
    m,n = Y_pred.shape
    E_in = mean_squared_error(Y_pred, Y_train)
    print("The Regularized InSample error is: ", E_in)


    # Finding E_Out
    Y_pred = np.dot(X_test, wreg)
    Y_pred = Y_scaling(Y_pred)
    m,n = Y_pred.shape
    E_out = mean_squared_error(Y_pred, Y_test)
    print("The Regularized Out of Sample error is: ", E_out)
    
    

 --------------------------  PART D --------------------------------
-------------------------------------------------
 For K Value =  2

--------Method 1----------
The Weight Decay for training 88.74571215826981
The In Sample Error after adding adding weight decay 88.85999787255552
The Weight Decay for testing 12.424399702157775
The Out of Sample Error after adding adding weight decay 0.012760399702157775
--------Method 2----------
The Regularized InSample error is:  0.8
The Regularized Out of Sample error is:  0.912
-------------------------------------------------
 For K Value =  1

--------Method 1----------
The Weight Decay for training 8.87457121582698
The In Sample Error after adding adding weight decay 8.988856930112695
The Weight Decay for testing 1.2424399702157773
The Out of Sample Error after adding adding weight decay 0.0015784399702157774
--------Method 2----------
The Regularized InSample error is:  0.22857142857142856
The Regularized Out of Sample error is:  0.496
---