In [564]:
import numpy as np
import random
import math
import pandas as pd

In [565]:
dataSet = np.genfromtxt('x06simple.csv', delimiter = ',')

In [566]:
dataSet = dataSet[1:,1:]

In [567]:
print(dataSet.shape)

(44, 3)


In [568]:
random.seed(0)

In [569]:
np.random.shuffle(dataSet)

In [570]:
# finding the number of samples to be used in the training set

trainingSamples = int(len(dataSet) * 2/3)

In [571]:
print(trainingSamples)

29


In [572]:
training_data_x = dataSet[:trainingSamples, :-1]

In [573]:
training_data_y = dataSet[:trainingSamples,-1]

training_data_y = np.reshape(training_data_y, (len(training_data_y), 1))

In [574]:
testing_data_x = dataSet[trainingSamples:, :-1]

In [575]:
testing_data_y = dataSet[trainingSamples:, -1]

testing_data_y = np.reshape(testing_data_y, (len(testing_data_y), 1))

In [576]:
# standardizing the training data

average_training_x = np.mean(training_data_x, axis=0)
std_training_x = np.std(training_data_x, axis=0)

training_x = (training_data_x - average_training_x) / std_training_x

In [577]:
# standardizing the testing data

average_testing_x = np.mean(testing_data_x, axis=0)
std_testing_x = np.std(testing_data_x, axis=0)

testing_x = (testing_data_x - average_training_x) / std_training_x

In [578]:
print(training_x.shape)

(29, 2)


In [579]:
print (testing_x.shape)

(15, 2)


In [580]:
training_ones = np.ones((len(training_x)))

training_ones = np.reshape(training_ones, (len(training_ones), 1))

In [581]:
x = np.column_stack((training_ones, training_x))

In [582]:
testing_ones = np.ones((len(testing_x)))

testing_ones = np.reshape(testing_ones, (len(testing_ones), 1))

In [583]:
x_testing = np.column_stack((testing_ones, testing_x))

In [584]:
print(x_testing.shape)

(15, 3)


In [585]:
# computing weights

xtx = np.dot(np.transpose(x), x)

In [586]:
xtx_inv = np.linalg.inv(xtx)

In [587]:
print(xtx_inv)

[[ 3.44827586e-02  4.63858513e-19 -6.78808295e-18]
 [ 4.63858513e-19  3.54482771e-02  5.85029635e-03]
 [-6.78808295e-18  5.85029635e-03  3.54482771e-02]]


In [588]:
weights = np.dot ( np.dot(xtx_inv, np.transpose(x)), training_data_y)

In [589]:
print(weights)

[[3235.10344828]
 [1146.23744387]
 [-322.50939164]]


In [590]:
print(x_testing)

[[ 1.         -1.10275645 -1.51637766]
 [ 1.         -1.71731751 -1.51637766]
 [ 1.         -1.10275645 -0.60023282]
 [ 1.          1.12787261 -1.51637766]
 [ 1.         -1.39865622 -1.51637766]
 [ 1.         -0.78409515 -0.60023282]
 [ 1.          0.80921131 -1.51637766]
 [ 1.         -0.46543386 -1.51637766]
 [ 1.         -0.46543386  1.23205685]
 [ 1.         -1.10275645  0.31591201]
 [ 1.          0.49055002  0.31591201]
 [ 1.          1.4465339   1.23205685]
 [ 1.         -1.39865622  0.31591201]
 [ 1.         -0.14677257  0.31591201]
 [ 1.         -0.46543386  0.31591201]]


In [591]:
predicted_outputs = np.matmul(x_testing, weights)


In [592]:
print(testing_data_y)

[[2120.]
 [ 620.]
 [2110.]
 [4570.]
 [1315.]
 [2805.]
 [4530.]
 [3110.]
 [2710.]
 [2140.]
 [4520.]
 [3214.]
 [1305.]
 [3920.]
 [3920.]]


In [593]:
predicted_outputs - testing_data_y

array([[  340.12875323],
       [ 1135.69584785],
       [   54.66344008],
       [  446.95929868],
       [  805.95735434],
       [ -275.07505342],
       [  121.69779218],
       [   80.65176621],
       [ -405.74417322],
       [ -270.80187306],
       [ -824.4943406 ],
       [ 1281.82486574],
       [  225.02672805],
       [ -955.01735358],
       [-1320.27886008]])

In [594]:
out_2 = np.square(testing_data_y - predicted_outputs)

In [595]:
np.mean(out_2, axis =0)

array([508097.41806608])

In [596]:
np.sqrt(np.mean(out_2, axis =0))

array([712.80952439])