In [313]:
import sys
import tqdm
import pandas as pd


import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm

import numpy as np

In [318]:
# Step 2. Parse and visualize data
# parse train data: read CSV files with train features (train_x) and train targets (train_y)
x_train = pd.read_csv("D:\\Dataset\\train\\train_x.csv", header=None)
y_train = pd.read_csv("D:\\Dataset\\train\\train_y.csv", header=None)

# show first 10 samples
print(pd.concat([x_train, y_train], axis=1).head(10))

x_train = x_train.to_numpy()
y_train = y_train.to_numpy()
print("Shape of train features:", x_train.shape)
print("Shape of train targets:", y_train.shape)

       0      1     0
0  23.98  6.459  11.8
1  21.52  6.193  11.0
2   7.74  6.750  23.7
3   4.81  7.249  35.4
4  18.06  5.454  15.2
5   5.90  6.487  24.4
6   2.94  6.998  33.4
7   6.36  7.163  31.6
8  17.44  6.749  13.4
9   4.56  6.975  34.9
[23.98   6.459]
Shape of train features: (354, 2)
Shape of train targets: (354, 1)


In [553]:
# Step 3. Prototypes.

# In this demo we will use linear regression to predict targets from features.
# In linear regression model with parameters thetas 
# the prediction y is calculated from features x using linear combination of x and thetas.
# For example, for the case of 2 features: 
# y = theta_0 * x_o + theta_1 * x_1

# Let's define some helper functions

def predict_fn(x, thetas):
    '''
    Predict target from features x using parameters thetas and linear regression
    
    param x: input features, shape NxM, N - number of samples to predict, M - number of features
    param thetas: vector of linear regression parameters, shape Mx1
    return y_hat: predicted scalar value for each input samples, shape Nx1
    '''    
    # TODO: calculate y_hat using linear regression
    y_hat = np.zeros((x.shape[0], 1))
    for i in range(len(x)):
        y_hat[i] = thetas[0] * x[i][0] + thetas[1] * x[i][1]
    #print(y_hat)
    return y_hat


def loss_fn(x_train, y_train, thetas):
    '''
    Calculate average loss value for train dataset (x_train, y_train).
    
    param x_train: input features, shape NxM, N - number of samples to predict, M - number of features
    param y_train: input tagrets, shape Nx1
    param thetas: vector of linear regression parameters, shape Mx1
    return loss: predicted scalar value for each input samples, shape Mx1
    '''
    y_predicted = predict_fn(x_train, thetas)    
    loss = np.mean(np.power(y_train - y_predicted, 2))   
    return loss


def gradient_fn(x_train, y_train, thetas):
    '''
    Calculate gradient value for linear regression.
    
    param x_train: input features, shape NxM, N - number of samples to predict, M - number of features
    param y_train: input tagrets, shape Nx1
    param thetas: vector of linear regression parameters, shape Mx1
    return g: predicted scalar value for each input samples, shape Mx1
    '''  
    # TODO: calculate vector gradient
    g = np.zeros_like(thetas)
    for i in range(len(x_train)):
        g[0] += (-2 * y_train[i] * x_train[i][0] + 2 * np.power(x_train[i][0], 2) * thetas[0]
        - 2 * x_train[i][0] * x_train[i][1] * thetas[1])
        g[1] +=  (2 * y_train[i] * x_train[i][1] + 2 * np.power(x_train[i][1], 2) * thetas[1] 
        - 2 * x_train[i][0] * x_train[i][1] * thetas[0])
    g[0] /= len(x_train)
    g[1] /= len(x_train)
    return g

In [609]:
# Step 4. Gradient descent.

# now let's find optimal parameters using gradient descent
MAX_ITER = 10000
thetas = np.random.randn(2, 1)
alpha = 1e-5

progress = tqdm.tqdm(range(MAX_ITER), "Training", file=sys.stdout)
loss_val = loss_fn(x_train, y_train, thetas)
progress.set_postfix(loss_val=loss_val)

for iter in progress:
    gradient = gradient_fn(x_train, y_train, thetas)
    print(gradient)
    thetas_2 = thetas - alpha * gradient
    
    # TODO: add stop conditions
    if abs(thetas_2[0] - thetas[0]) < 0.00001:
        progress.close()
        loss_val = loss_fn(x_train, y_train, thetas)
        print("Stop condition detected")
        print("Final loss:", loss_val)
        break
    
    if iter % 100 == 0:
        loss_val = loss_fn(x_train, y_train, thetas_2)
        progress.set_postfix(loss_val=f"{loss_val:8.4f}", thetas=f"{thetas_2[0][0]:5.4f} {thetas_2[1][0]:5.4f}")
    thetas = thetas_2
    
progress.close()




Training:   0%|                                                                              | 0/10000 [00:00<?, ?it/s][A[A[A


Training:   0%|                                                            | 0/10000 [00:00<?, ?it/s, loss_val=1.07e+3][A[A[A[[-899.87179718]
 [ 457.67458945]]



Training:   0%|                                   | 0/10000 [00:00<?, ?it/s, loss_val=1066.8534, thetas=-0.9169 0.3017][A[A[A[[-895.44731916]
 [ 455.94146647]]
[[-891.04381042]
 [ 454.2164498 ]]
[[-886.66117176]
 [ 452.4995011 ]]
[[-882.29930441]
 [ 450.79058222]]
[[-877.95811011]
 [ 449.08965519]]



Training:   0%|                           | 6/10000 [00:00<03:06, 53.60it/s, loss_val=1066.8534, thetas=-0.9169 0.3017][A[A[A[[-873.63749103]
 [ 447.39668223]]
[[-869.33734982]
 [ 445.71162571]]
[[-865.05758958]
 [ 444.0344482 ]]
[[-860.79811389]
 [ 442.36511245]]
[[-856.55882677]
 [ 440.70358137]]
[[-852.33963268]
 [ 439.04981806]]



Training:   0%|                          | 12/10000 [

[[-448.71335699]
 [ 280.1179541 ]]
[[-446.42835402]
 [ 279.21208881]]



Training:   1%|▎                        | 141/10000 [00:02<02:49, 58.28it/s, loss_val=722.3359, thetas=-0.2092 -0.0805][A[A[A[[-444.15419695]
 [ 278.31041866]]
[[-441.89083445]
 [ 277.41292382]]
[[-439.63821544]
 [ 276.51958455]]
[[-437.39628909]
 [ 275.63038121]]
[[-435.16500479]
 [ 274.74529426]]
[[-432.9443122 ]
 [ 273.86430423]]
[[-430.73416119]
 [ 272.98739177]]



Training:   1%|▎                        | 148/10000 [00:02<02:47, 58.75it/s, loss_val=722.3359, thetas=-0.2092 -0.0805][A[A[A[[-428.53450188]
 [ 272.11453759]]
[[-426.34528463]
 [ 271.24572253]]
[[-424.16646001]
 [ 270.38092749]]
[[-421.99797886]
 [ 269.52013347]]
[[-419.83979223]
 [ 268.66332156]]
[[-417.69185141]
 [ 267.81047293]]



Training:   2%|▍                        | 154/10000 [00:02<02:48, 58.44it/s, loss_val=722.3359, thetas=-0.2092 -0.0805][A[A[A[[-415.55410792]
 [ 266.96156887]]
[[-413.42651349]
 [ 266.11659072]]
[[-411.30902

Training:   3%|▋                         | 284/10000 [00:04<02:46, 58.26it/s, loss_val=592.3821, thetas=0.2181 -0.3522][A[A[A[[-208.85426383]
 [ 184.15568632]]
[[-207.7090899 ]
 [ 183.69088919]]
[[-206.56936801]
 [ 183.22820322]]
[[-205.43507234]
 [ 182.76761842]]
[[-204.30617723]
 [ 182.30912487]]
[[-203.1826571 ]
 [ 181.85271271]]
[[-202.06448653]
 [ 181.3983721 ]]



Training:   3%|▊                         | 291/10000 [00:04<02:44, 59.03it/s, loss_val=592.3821, thetas=0.2181 -0.3522][A[A[A[[-200.95164019]
 [ 180.94609326]]
[[-199.84409289]
 [ 180.49586645]]
[[-198.74181955]
 [ 180.04768199]]
[[-197.64479521]
 [ 179.60153023]]
[[-196.55299503]
 [ 179.15740157]]
[[-195.46639428]
 [ 178.71528646]]



Training:   3%|▊                         | 297/10000 [00:05<02:45, 58.63it/s, loss_val=592.3821, thetas=0.2181 -0.3522][A[A[A[[-194.38496837]
 [ 178.2751754 ]]
[[-193.30869279]
 [ 177.83705891]]
[[-192.23754317]
 [ 177.40092758]]
[[-191.17149525]
 [ 176.96677205]]



Training:   

[[-90.51426993]
 [135.30730126]]



Training:   4%|█                         | 427/10000 [00:07<02:43, 58.51it/s, loss_val=539.9680, thetas=0.6165 -0.7123][A[A[A[[-89.93373832]
 [135.06132175]]
[[-89.35598627]
 [134.81642078]]
[[-88.78100062]
 [134.57259325]]
[[-88.20876827]
 [134.32983411]]
[[-87.63927619]
 [134.08813833]]
[[-87.07251141]
 [133.84750088]]
[[-86.50846102]
 [133.60791678]]



Training:   4%|█▏                        | 434/10000 [00:07<02:41, 59.36it/s, loss_val=539.9680, thetas=0.6165 -0.7123][A[A[A[[-85.94711217]
 [133.36938106]]
[[-85.38845207]
 [133.13188879]]
[[-84.83246799]
 [132.89543504]]
[[-84.27914726]
 [132.66001493]]
[[-83.72847728]
 [132.42562356]]
[[-83.1804455 ]
 [132.19225611]]



Training:   4%|█▏                        | 440/10000 [00:07<02:41, 59.04it/s, loss_val=539.9680, thetas=0.6165 -0.7123][A[A[A[[-82.63503942]
 [131.95990774]]
[[-82.09224663]
 [131.72857365]]
[[-81.55205474]
 [131.49824906]]
[[-81.01445144]
 [131.26892921]]
[[-80.4794244

[[-28.34735108]
 [108.08694746]]
[[-28.06580633]
 [107.95716962]]



Training:   6%|█▌                        | 578/10000 [00:09<02:41, 58.35it/s, loss_val=548.5031, thetas=0.6946 -0.8423][A[A[A[[-27.78562551]
 [107.82792325]]
[[-27.50680216]
 [107.69920587]]
[[-27.22932986]
 [107.57101499]]
[[-26.9532022 ]
 [107.44334813]]
[[-26.67841282]
 [107.31620283]]
[[-26.40495538]
 [107.18957664]]



Training:   6%|█▌                        | 584/10000 [00:09<02:41, 58.33it/s, loss_val=548.5031, thetas=0.6946 -0.8423][A[A[A[[-26.13282356]
 [107.06346713]]
[[-25.8620111 ]
 [106.93787186]]
[[-25.59251174]
 [106.81278843]]
[[-25.32431926]
 [106.68821442]]
[[-25.05742748]
 [106.56414745]]
[[-24.79183023]
 [106.44058514]]



Training:   6%|█▌                        | 590/10000 [00:10<02:41, 58.42it/s, loss_val=548.5031, thetas=0.6946 -0.8423][A[A[A[[-24.52752137]
 [106.31752511]]
[[-24.26449482]
 [106.194965  ]]
[[-24.00274448]
 [106.07290248]]
[[-23.74226433]
 [105.95133519]]
[[-23.4830483

In [610]:
for i in range(10):
    y_hat = predict_fn(x_train, thetas)
    print("Target: ", y_train[i][0], ", predicted:", y_hat[i][0])

Target:  11.8 , predicted: 10.954133590404647
Target:  11.0 , predicted: 9.409757572654923
Target:  23.7 , predicted: -1.4123795356000173
Target:  35.4 , predicted: -4.117094123295022
Target:  15.2 , predicted: 7.624574152281781
Target:  24.4 , predicted: -2.4995998851054146
Target:  33.4 , predicted: -5.239316289135996
Target:  31.6 , predicted: -2.875037138135994
Target:  13.4 , predicted: 5.79073588207401
Target:  34.9 , predicted: -4.012104667898637
