In [2]:
import sys
import tqdm
import pandas as pd


import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm

import numpy as np

In [3]:
# Step 2. Parse and visualize data
# parse train data: read CSV files with train features (train_x) and train targets (train_y)
x_train = pd.read_csv("D:\\Dataset\\train\\train_x.csv", header=None)
y_train = pd.read_csv("D:\\Dataset\\train\\train_y.csv", header=None)

# show first 10 samples
print(pd.concat([x_train, y_train], axis=1).head(10))

x_train = x_train.to_numpy()
y_train = y_train.to_numpy()
print("Shape of train features:", x_train.shape)
print("Shape of train targets:", y_train.shape)

       0      1     0
0  23.98  6.459  11.8
1  21.52  6.193  11.0
2   7.74  6.750  23.7
3   4.81  7.249  35.4
4  18.06  5.454  15.2
5   5.90  6.487  24.4
6   2.94  6.998  33.4
7   6.36  7.163  31.6
8  17.44  6.749  13.4
9   4.56  6.975  34.9
Shape of train features: (354, 2)
Shape of train targets: (354, 1)


In [4]:
# Step 3. Prototypes.

# In this demo we will use linear regression to predict targets from features.
# In linear regression model with parameters thetas 
# the prediction y is calculated from features x using linear combination of x and thetas.
# For example, for the case of 2 features: 
# y = theta_0 * x_o + theta_1 * x_1

# Let's define some helper functions

def predict_fn(x, thetas):
    '''
    Predict target from features x using parameters thetas and linear regression
    
    param x: input features, shape NxM, N - number of samples to predict, M - number of features
    param thetas: vector of linear regression parameters, shape Mx1
    return y_hat: predicted scalar value for each input samples, shape Nx1
    '''    
    # TODO: calculate y_hat using linear regression
    y_hat = np.zeros((x.shape[0], 1))
    for i in range(len(x)):
        y_hat[i] = thetas[0] * x[i][0] + thetas[1] * x[i][1]
    return y_hat


def loss_fn(x_train, y_train, thetas):
    '''
    Calculate average loss value for train dataset (x_train, y_train).
    
    param x_train: input features, shape NxM, N - number of samples to predict, M - number of features
    param y_train: input tagrets, shape Nx1
    param thetas: vector of linear regression parameters, shape Mx1
    return loss: predicted scalar value for each input samples, shape Mx1
    '''
    y_predicted = predict_fn(x_train, thetas)    
    loss = np.mean(np.power(y_train - y_predicted, 2))   
    return loss


def gradient_fn(x_train, y_train, thetas):
    '''
    Calculate gradient value for linear regression.
    
    param x_train: input features, shape NxM, N - number of samples to predict, M - number of features
    param y_train: input tagrets, shape Nx1
    param thetas: vector of linear regression parameters, shape Mx1
    return g: predicted scalar value for each input samples, shape Mx1
    '''  
    # TODO: calculate vector gradient
    g = np.zeros_like(thetas)
    for i in range(len(x_train)):
        g[0] += -2 * x_train[i][0] * (y_train[i] - x_train[i][0] * thetas[0] - x_train[i][1] * thetas[1])
        g[1] += -2 * x_train[i][1] * (y_train[i] - x_train[i][0] * thetas[0] - x_train[i][1] * thetas[1])
    g[0] = g[0] / len(x_train)
    g[1] = g[1] / len(x_train)
    return g

In [7]:
# Step 4. Gradient descent.

# now let's find optimal parameters using gradient descent
MAX_ITER = 1000000
thetas = np.random.randn(2, 1)
alpha = 1e-3

progress = tqdm.tqdm(range(MAX_ITER), "Training", file=sys.stdout)
loss_val = loss_fn(x_train, y_train, thetas)
progress.set_postfix(loss_val=loss_val)

for iter in progress:
    gradient = gradient_fn(x_train, y_train, thetas)
    print(gradient)
    thetas_2 = thetas - alpha * gradient
    
    # TODO: add stop conditions
    if (abs(thetas_2[0] - thetas[0]) < 0.00001) and (abs(thetas_2[1] -thetas[1]) < 0.00001):
        progress.close()
        loss_val = loss_fn(x_train, y_train, thetas)
        print("Stop condition detected")
        print("Final loss:", loss_val)
        break
    
    if iter % 100 == 0:
        loss_val = loss_fn(x_train, y_train, thetas_2)
        progress.set_postfix(loss_val=f"{loss_val:8.4f}", thetas=f"{thetas_2[0][0]:5.4f} {thetas_2[1][0]:5.4f}")
    thetas = thetas_2
    
progress.close()

Training:   0%|                                                             | 0/1000000 [00:00<?, ?it/s, loss_val=1e+3][[-646.55463344]
 [-389.33786238]]
Training:   0%|                                  | 0/1000000 [00:00<?, ?it/s, loss_val=565.1414, thetas=0.6941 -0.9062][[-319.47505032]
 [-259.95767539]]
[[-147.60204606]
 [-190.60441113]]
[[ -57.49408377]
 [-152.90366086]]
[[ -10.45663975]
 [-131.90501522]]
[[  13.89737385]
 [-119.73088825]]
[[  26.30926219]
 [-112.23022153]]
[[  32.43818317]
 [-107.21539387]]
Training:   0%|                        | 8/1000000 [00:00<3:30:17, 79.26it/s, loss_val=565.1414, thetas=0.6941 -0.9062][[  35.26562689]
 [-103.53328745]]
[[  36.36228314]
 [-100.57585259]]
[[ 36.55552609]
 [-98.02223483]]
[[ 36.28109722]
 [-95.70290712]]
[[ 35.76839932]
 [-93.52808379]]
[[ 35.13812372]
 [-91.45000322]]
[[ 34.45368784]
 [-89.44305522]]
[[ 33.74833652]
 [-87.4933103 ]]
[[ 33.03942091]
 [-85.59300275]]
Training:   0%|                       | 17/1000000 [00:00<3:23

[[ 1.0915778]
 [-2.8254735]]
[[ 1.06797033]
 [-2.7643672 ]]
[[ 1.04487342]
 [-2.70458244]]
[[ 1.02227602]
 [-2.64609064]]
[[ 1.00016734]
 [-2.58886384]]
[[ 0.97853679]
 [-2.53287468]]
[[ 0.95737405]
 [-2.4780964 ]]
[[ 0.936669 ]
 [-2.4245028]]
Training:   0%|                       | 180/1000000 [00:02<3:17:53, 84.21it/s, loss_val=32.7292, thetas=-0.4176 4.2684][[ 0.91641173]
 [-2.37206826]]
[[ 0.89659256]
 [-2.32076772]]
[[ 0.87720203]
 [-2.27057666]]
[[ 0.85823084]
 [-2.22147107]]
[[ 0.83966995]
 [-2.17342748]]
[[ 0.82151047]
 [-2.12642293]]
[[ 0.80374373]
 [-2.08043495]]
[[ 0.78636123]
 [-2.03544154]]
[[ 0.76935465]
 [-1.9914212 ]]
Training:   0%|                       | 189/1000000 [00:02<3:17:54, 84.19it/s, loss_val=32.7292, thetas=-0.4176 4.2684][[ 0.75271588]
 [-1.94835289]]
[[ 0.73643695]
 [-1.90621601]]
[[ 0.72051008]
 [-1.86499042]]
[[ 0.70492767]
 [-1.82465642]]
[[ 0.68968225]
 [-1.78519471]]
[[ 0.67476655]
 [-1.74658644]]
[[ 0.66017342]
 [-1.70881315]]
[[ 0.6458959 ]
 [-1.67

[[ 0.0187021 ]
 [-0.04840909]]
[[ 0.01829763]
 [-0.04736215]]
Training:   0%|                       | 360/1000000 [00:04<3:16:25, 84.82it/s, loss_val=27.9992, thetas=-0.6529 4.8776][[ 0.01790191]
 [-0.04633786]]
[[ 0.01751475]
 [-0.04533571]]
[[ 0.01713596]
 [-0.04435524]]
[[ 0.01676536]
 [-0.04339597]]
[[ 0.01640278]
 [-0.04245745]]
[[ 0.01604803]
 [-0.04153923]]
[[ 0.01570097]
 [-0.04064086]]
[[ 0.0153614 ]
 [-0.03976192]]
[[ 0.01502918]
 [-0.038902  ]]
Training:   0%|                       | 369/1000000 [00:04<3:16:53, 84.62it/s, loss_val=27.9992, thetas=-0.6529 4.8776][[ 0.01470415]
 [-0.03806066]]
[[ 0.01438614]
 [-0.03723753]]
[[ 0.01407501]
 [-0.0364322 ]]
[[ 0.01377061]
 [-0.03564428]]
[[ 0.0134728]
 [-0.0348734]]
[[ 0.01318142]
 [-0.0341192 ]]
[[ 0.01289635]
 [-0.03338131]]
[[ 0.01261744]
 [-0.03265937]]
[[ 0.01234456]
 [-0.03195305]]
Training:   0%|                       | 378/1000000 [00:04<3:18:21, 83.99it/s, loss_val=27.9992, thetas=-0.6529 4.8776][[ 0.01207759]
 [-0.03126

In [12]:
for i in range(10):
    y_hat = predict_fn(x_train, thetas)
    print("Target: ", y_train[i][0], ", predicted:", y_hat[i][0])

Target:  11.8 , predicted: 15.825663061797137
Target:  11.0 , predicted: 16.139711609970796
Target:  23.7 , predicted: 27.899383727052204
Target:  35.4 , predicted: 32.259012900433305
Target:  15.2 , predicted: 14.798901476444952
Target:  24.4 , predicted: 27.821420233616394
Target:  33.4 , predicted: 32.259351209667344
Target:  31.6 , predicted: 30.822201690957264
Target:  13.4 , predicted: 21.53211004080499
Target:  34.9 , predicted: 31.084403500872533
