In [2]:
import sys
import tqdm
import pandas as pd


import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm

import numpy as np

In [3]:
# Step 2. Parse and visualize data
# parse train data: read CSV files with train features (train_x) and train targets (train_y)
x_train = pd.read_csv("D:\\Dataset\\train\\train_x.csv", header=None)
y_train = pd.read_csv("D:\\Dataset\\train\\train_y.csv", header=None)

# show first 10 samples
print(pd.concat([x_train, y_train], axis=1).head(10))

x_train = x_train.to_numpy()
y_train = y_train.to_numpy()
print("Shape of train features:", x_train.shape)
print("Shape of train targets:", y_train.shape)

       0      1     0
0  23.98  6.459  11.8
1  21.52  6.193  11.0
2   7.74  6.750  23.7
3   4.81  7.249  35.4
4  18.06  5.454  15.2
5   5.90  6.487  24.4
6   2.94  6.998  33.4
7   6.36  7.163  31.6
8  17.44  6.749  13.4
9   4.56  6.975  34.9
Shape of train features: (354, 2)
Shape of train targets: (354, 1)


In [35]:
# Step 3. Prototypes.

# In this demo we will use linear regression to predict targets from features.
# In linear regression model with parameters thetas 
# the prediction y is calculated from features x using linear combination of x and thetas.
# For example, for the case of 2 features: 
# y = theta_0 * x_o + theta_1 * x_1

# Let's define some helper functions

def predict_fn(x, thetas):
    '''
    Predict target from features x using parameters thetas and linear regression
    
    param x: input features, shape NxM, N - number of samples to predict, M - number of features
    param thetas: vector of linear regression parameters, shape Mx1
    return y_hat: predicted scalar value for each input samples, shape Nx1
    '''    
    # TODO: calculate y_hat using linear regression
    y_hat = np.zeros((x.shape[0], 1))
    for i in range(len(x)):
        y_hat[i] = thetas[0] * x[i][0] + thetas[1] * x[i][1]
    #print(y_hat)
    return y_hat


def loss_fn(x_train, y_train, thetas):
    '''
    Calculate average loss value for train dataset (x_train, y_train).
    
    param x_train: input features, shape NxM, N - number of samples to predict, M - number of features
    param y_train: input tagrets, shape Nx1
    param thetas: vector of linear regression parameters, shape Mx1
    return loss: predicted scalar value for each input samples, shape Mx1
    '''
    y_predicted = predict_fn(x_train, thetas)    
    loss = np.mean(np.power(y_train - y_predicted, 2))   
    return loss


def gradient_fn(x_train, y_train, thetas):
    '''
    Calculate gradient value for linear regression.
    
    param x_train: input features, shape NxM, N - number of samples to predict, M - number of features
    param y_train: input tagrets, shape Nx1
    param thetas: vector of linear regression parameters, shape Mx1
    return g: predicted scalar value for each input samples, shape Mx1
    '''  
    # TODO: calculate vector gradient
    g = np.zeros_like(thetas)
    for i in range(len(x_train)):
        g[0] += -2 * x_train[i][0] * (y_train[i] - x_train[i][0] * thetas[0] - x_train[i][1] * thetas[1])
        g[1] += -2 * x_train[i][1] * (y_train[i] - x_train[i][0] * thetas[0] - x_train[i][1] * thetas[1])
    g[0] = g[0] / len(x_train)
    g[1] = g[1] / len(x_train)
    print(g)
    return g

In [57]:
# Step 4. Gradient descent.

# now let's find optimal parameters using gradient descent
MAX_ITER = 100000
thetas = np.random.randn(2, 1)
alpha = 1e-3

progress = tqdm.tqdm(range(MAX_ITER), "Training", file=sys.stdout)
loss_val = loss_fn(x_train, y_train, thetas)
progress.set_postfix(loss_val=loss_val)

for iter in progress:
    gradient = gradient_fn(x_train, y_train, thetas)
    #print(gradient)
    thetas_2 = thetas - alpha * gradient
    
    # TODO: add stop conditions
    if abs(thetas_2[0] - thetas[0]) < 0.000001:
        progress.close()
        loss_val = loss_fn(x_train, y_train, thetas)
        print("Stop condition detected")
        print("Final loss:", loss_val)
        break
    
    if iter % 100 == 0:
        loss_val = loss_fn(x_train, y_train, thetas_2)
        progress.set_postfix(loss_val=f"{loss_val:8.4f}", thetas=f"{thetas_2[0][0]:5.4f} {thetas_2[1][0]:5.4f}")
    thetas = thetas_2
    
progress.close()

Training:   0%|                                                              | 0/100000 [00:00<?, ?it/s, loss_val=1e+3][[-826.16659546]
 [-389.20010103]]
Training:   0%|                                   | 0/100000 [00:00<?, ?it/s, loss_val=365.8445, thetas=-0.5373 1.7634][[-424.66128093]
 [-232.57036652]]
[[-213.34676743]
 [-149.4506457 ]]
[[-102.2346524 ]
 [-105.07460858]]
[[-43.9121241 ]
 [-81.12435871]]
[[-13.39851516]
 [-67.94832525]]
[[  2.46783825]
 [-60.46104043]]
[[ 10.621428  ]
 [-55.98282204]]
[[ 14.71578754]
 [-53.10154919]]
Training:   0%|                           | 9/100000 [00:00<19:14, 86.59it/s, loss_val=365.8445, thetas=-0.5373 1.7634][[ 16.67578728]
 [-51.07295687]]
[[ 17.51550501]
 [-49.50466407]]
[[ 17.76914389]
 [-48.18969505]]
[[ 17.71807359]
 [-47.01876521]]
[[ 17.51045334]
 [-45.93406633]]
[[ 17.22425861]
 [-44.9049169 ]]
[[ 16.90048843]
 [-43.91493271]]
[[ 16.56066161]
 [-42.95526724]]
[[ 16.21603502]
 [-42.02105003]]
[[ 15.87246051]
 [-41.10951004]]
Training

[[ 0.4306388]
 [-1.1146787]]
[[ 0.42132541]
 [-1.09057163]]
[[ 0.41221344]
 [-1.06698592]]
Training:   0%|                          | 186/100000 [00:02<18:38, 89.25it/s, loss_val=29.1895, thetas=-0.5363 4.5758][[ 0.40329853]
 [-1.0439103 ]]
[[ 0.39457642]
 [-1.02133373]]
[[ 0.38604295]
 [-0.99924543]]
[[ 0.37769403]
 [-0.97763482]]
[[ 0.36952567]
 [-0.95649159]]
[[ 0.36153397]
 [-0.93580562]]
[[ 0.3537151 ]
 [-0.91556702]]
[[ 0.34606533]
 [-0.89576612]]
[[ 0.338581  ]
 [-0.87639346]]
Training:   0%|                          | 195/100000 [00:02<18:38, 89.23it/s, loss_val=29.1895, thetas=-0.5363 4.5758][[ 0.33125854]
 [-0.85743976]]
[[ 0.32409443]
 [-0.83889598]]
[[ 0.31708527]
 [-0.82075324]]
[[ 0.31022769]
 [-0.80300287]]
[[ 0.30351842]
 [-0.78563639]]
[[ 0.29695425]
 [-0.76864549]]
Training:   0%|                          | 195/100000 [00:02<18:38, 89.23it/s, loss_val=28.0135, thetas=-0.6425 4.8506][[ 0.29053204]
 [-0.75202206]]
[[ 0.28424873]
 [-0.73575813]]
[[ 0.2781013 ]
 [-0.71984

[[ 0.00737817]
 [-0.01909789]]
[[ 0.00721861]
 [-0.01868486]]
Training:   0%|                          | 371/100000 [00:04<18:15, 90.90it/s, loss_val=27.9986, thetas=-0.6544 4.8815][[ 0.00706249]
 [-0.01828077]]
[[ 0.00690975]
 [-0.01788541]]
[[ 0.00676031]
 [-0.0174986 ]]
[[ 0.00661411]
 [-0.01712016]]
[[ 0.00647107]
 [-0.01674991]]
[[ 0.00633112]
 [-0.01638766]]
[[ 0.00619419]
 [-0.01603324]]
[[ 0.00606023]
 [-0.01568649]]
[[ 0.00592917]
 [-0.01534724]]
[[ 0.00580094]
 [-0.01501533]]
Training:   0%|                          | 381/100000 [00:04<18:24, 90.18it/s, loss_val=27.9986, thetas=-0.6544 4.8815][[ 0.00567548]
 [-0.01469059]]
[[ 0.00555274]
 [-0.01437288]]
[[ 0.00543265]
 [-0.01406204]]
[[ 0.00531516]
 [-0.01375792]]
[[ 0.00520021]
 [-0.01346038]]
[[ 0.00508774]
 [-0.01316927]]
[[ 0.00497771]
 [-0.01288446]]
[[ 0.00487006]
 [-0.01260581]]
[[ 0.00476473]
 [-0.01233319]]
[[ 0.00466169]
 [-0.01206646]]
Training:   0%|                          | 391/100000 [00:04<18:21, 90.42it/s, l

In [58]:
for i in range(10):
    y_hat = predict_fn(x_train, thetas)
    print("Target: ", y_train[i][0], ", predicted:", y_hat[i][0])

Target:  11.8 , predicted: 15.825991165907341
Target:  11.0 , predicted: 16.139959669607084
Target:  23.7 , predicted: 27.898943989361573
Target:  35.4 , predicted: 32.25838240789613
Target:  15.2 , predicted: 14.799079627889157
Target:  24.4 , predicted: 27.820928115369096
Target:  33.4 , predicted: 32.25866557751542
Target:  31.6 , predicted: 30.82165129384511
Target:  13.4 , predicted: 21.53210871603006
Target:  34.9 , predicted: 31.083793758712453
