In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [40]:
# importing the data
data = pd.read_csv("FoDS-A1.csv", names = ["X1","X2","Y"], header=0)

# Data Preprocessing

Preprocessing the data involves:

- `Normalizing` the data:<br>
<br>
&emsp;&emsp;&emsp;&emsp;&emsp;&emsp; <font size="4"> X = $\frac{ X_{} - X_{min} }{X_{max} - X_{min}}$ </font>
<br>
<br>                 
- `Shuffling` the data
<br>
<br>
- `Splitting` the data to test and train 

In [41]:
def normalize(data):
    data_min = data.min()
    data_max = data.max()
    data["X1"] = (data["X1"] - data_min[0])/(data_max[0]-data_min[0])
    data["X2"] = (data["X2"] - data_min[1])/(data_max[1]-data_min[1])

In [49]:
def split(data,fraction = 0.7):
    
    shuffled_data = data.sample(frac=1, random_state=0) # Shuffling the dataset
    split_index = int(fraction * len(data)) # Finding split index
    
    # Spliting the dataset 
    train= shuffled_data[:split_index]
    test = shuffled_data[split_index:]
    return train.reset_index(drop=True), test.reset_index(drop=True)

In [None]:
def generate_vectors(train_X, degree=9):
    total_terms = int(((degree+1)*(degree+2))/2)
    terms = np.zeros((total_terms,train_X.shape[0]))
    
    
    for i in range(train_X.shape[0]):

        current_x1 = train_X[i][0]
        current_x2 = train_X[i][1]
        m = 0
        part_x1 = 1
        part_x2 = 1
        
        for j in range(degree+1):
            for k in range(degree-j+1):
                terms[m][i] = part_x1 * part_x2
                m += 1
                part_x1 = part_x1 * current_x1 
            
            part_x2 = part_x2 * current_x2
            part_x1 = 1
            
    W = np.zeros((total_terms,1))
                
    return terms.T, W

In [None]:
def calculate_cost_ridge(W, X, Y, penalty):
    cost = (np.sum(np.square(np.dot(X,W) - Y)))/2
    cost += (penalty*np.dot(W[1:].T, W[1:]))/2
    return cost

In [None]:
def calculate_cost_lasso(W, X, Y, penalty):
    cost = (np.sum(np.square(np.dot(X,W) - Y)))/2
    cost += penalty * np.sum(np.absolute(W[1:]))
    return cost

In [None]:
def gradient_descent_ridge(X, Y, W, penalty, learning_rate=0.01, iterations=1000):

    cost_history = np.zeros(iterations)
    for it in range(iterations):
        
        prediction = np.dot(X, W)         
        updated_W0 = W[0] - np.multiply((X.T.dot(prediction - Y)), learning_rate)[0]
        W[1:] = W[1:] - np.multiply((X.T.dot(prediction - Y)[1:] + np.multiply(W[1:], penalty)), learning_rate)
        W[0] = updated_W0
        cost_history[it]  = calculate_cost_ridge(W, X, Y)
        
    return W, cost_history

In [None]:
#TBD
def gradient_descent_lasso(X, Y, W,  penalty, learning_rate=0.0001, iterations=1000,):

    cost_history = np.zeros(iterations)
    for it in range(iterations):
        
        prediction = np.dot(X, W)         
        updated_W0 = W[0] - np.multiply((X.T.dot(prediction - Y)), learning_rate)[0]
        W[1:] = W[1:] - np.multiply((X.T.dot(prediction - Y)[1:] + np.multiply(np.divide(W[1:], np.absolute(W[1:])), penalty)), learning_rate)
        W[0] = updated_W0
        cost_history[it]  = calculate_cost_lasso(W, X, Y)
        
    return W, cost_history

In [None]:
def stocashtic_gradient_descent_ridge(X, Y, W, penalty, learning_rate=0.01, iterations=10):

    m = len(Y)
    m = int(m / 10)
    cost_history = np.zeros(iterations)
    
    for it in range(iterations):
        cost =0.0
        for i in range(m):
            rand_int = np.random.randint(0,m)
            X_i = X[rand_int:rand_int+1]
            Y_i = Y[rand_int:rand_int+1]

            prediction = np.dot(X_i,W)
            updated_W0 = W[0] - np.multiply((X_i.T.dot(prediction - Y_i)), learning_rate)[0]
            W[1:] = W[1:] - np.multiply((X_i.T.dot(prediction - Y_i)[1:] + np.multiply(W[1:], penalty)), learning_rate)
            W[0] = updated_W0
            
            cost += calculate_cost_ridge(W,X_i,Y_i)
        cost_history[it]  = cost
        
    return W, cost_history

In [None]:
def stocashtic_gradient_descent_lasso(X, Y, W, penalty, learning_rate=0.01, iterations=10):

    m = len(Y)
    m = int(m / 10)
    cost_history = np.zeros(iterations)
    
    for it in range(iterations):
        cost =0.0
        for i in range(m):
            rand_int = np.random.randint(0,m)
            X_i = X[rand_int:rand_int+1]
            Y_i = Y[rand_int:rand_int+1]

            prediction = np.dot(X_i,W)
            updated_W0 = W[0] - np.multiply((X_i.T.dot(prediction - Y_i)), learning_rate)[0]
            W[1:] = W[1:] - np.multiply((X_i.T.dot(prediction - Y_i)[1:] + np.multiply(np.divide(W[1:], np.absolute(W[1:])), penalty)), learning_rate)
            W[0] = updated_W0
            
            cost += calculate_cost_lasso(W,X_i,Y_i)
        cost_history[it]  = cost
        
    return W, cost_history

In [None]:
def errorloop(cost_history):
    for it in range(len(cost_history)):
        if it%50==0: print(cost_history[it])

In [53]:
normalize(data)
train_data, test_data = split(data)

Unnamed: 0,X1,X2,Y
0,0.250689,0.4,20
1,0.862259,1.0,10
2,0.710744,0.9,14
3,0.559229,0.5,16
4,0.490358,0.7,20
...,...,...,...
1150,0.672176,0.6,10
1151,0.504132,0.8,20
1152,0.415978,0.5,16
1153,0.567493,0.7,18
