In [25]:
import numpy as np
from numpy import genfromtxt
import matplotlib.pyplot as plt
from numpy.random import randn
from sklearn import preprocessing 
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

X = genfromtxt('stock_prediction_data.csv', delimiter=',')
y = np.reshape(genfromtxt('stock_price.csv', delimiter='\n'), (-1, 1))

X_train, X_rest, y_train, y_rest  = train_test_split(X, y, test_size = 0.2, random_state = 0)
X_validation, X_test, y_validation, y_test = train_test_split(X_rest, y_rest, test_size = 0.5, random_state = 0)

X_train = preprocessing.scale(X_train)
mean = np.mean(X_train)
std = np.std(X_train)

X_validation = (X_validation - mean) / std

ones = np.ones((len(X_train), 1))
phi = np.hstack((X_train, ones))
d = phi.shape[1]
n = X_train.shape[0]
w1 = np.linalg.inv(phi.T.dot(phi)).dot(phi.T.dot(y_train))  ## closed form
print ("closed form linear \n", w1)
ŷ_closed_form = phi.dot(w1)
mse_closed_form = np.mean((ŷ_closed_form - y_train)**2)
print("MSE for closed form solution: ", mse_closed_form)
# plt.scatter(X[:, 0], y[:, 0])
# plt.show()

 ## gradient
w = randn(d, 1)
def f(w):
	fₒ = 0						
	for ϕᵢ, yᵢ in zip(phi,y_train):
		ϕᵢ = np.reshape(ϕᵢ, (d,1))		
		fₒ += (w.T.dot(ϕᵢ) - yᵢ)**2
	return ((1/n)*fₒ).item()	
#	
def ᐁf(w):
	
	grads = np.zeros((d, 1))	
	for ϕᵢ,yᵢ in zip(phi,y_train):	
		ϕᵢ = np.reshape(ϕᵢ, (d,1)) 
		grads += (w.T.dot(ϕᵢ) - yᵢ)*ϕᵢ
	return 2/n * grads
#

mse_list = []
for i in range(200):
	w = w - 0.01 * ᐁf(w)
	mse_list.append((f(w)))
	
print ("grad desc linear \n", w)
ŷ = phi.dot(w)
mse_closed_form = np.mean((ŷ - y_train)**2)
print("MSE for gradient descent: ", mse_closed_form)

# plt.plot(mse_list)
# plt.title('MSE over GD')
# plt.xlabel('steps')
# plt.ylabel('MSE')
# plt.show() 	


reg = LinearRegression().fit(X_train, y_train)
w_sklearn = np.vstack((reg.coef_.reshape(-1, 1), reg.intercept_))
print ("w_sklearn's w vector: ", w_sklearn)
ŷ_sklearn = phi.dot(w_sklearn)
mse_closed_form = np.mean((ŷ_sklearn - y_train)**2)
print("MSE for sklearn lin reg: ", mse_closed_form)

closed form linear 
 [[-0.00847957]
 [ 4.07998362]
 [ 0.00982479]
 [-0.01575478]
 [ 0.99611963]
 [ 3.02058423]
 [ 3.92186851]
 [ 2.98131229]
 [ 0.98392866]
 [ 1.02658872]
 [ 0.95633333]]
MSE for closed form solution:  0.042947354485713754
grad desc linear 
 [[ 0.00642991]
 [ 4.00889689]
 [ 0.00875159]
 [-0.01741467]
 [ 0.99916355]
 [ 2.95447044]
 [ 3.85544192]
 [ 2.90274984]
 [ 0.94481703]
 [ 1.03352063]
 [ 0.92866493]]
MSE for gradient descent:  0.06399349023627603
w_sklearn's w vector:  [[-0.00847957]
 [ 4.07998362]
 [ 0.00982479]
 [-0.01575478]
 [ 0.99611963]
 [ 3.02058423]
 [ 3.92186851]
 [ 2.98131229]
 [ 0.98392866]
 [ 1.02658872]
 [ 0.95633333]]
MSE for sklearn lin reg:  0.04294735448571375
