<a href="https://colab.research.google.com/github/dahn24/Stock_Price_Prediction_Model/blob/main/Stock_Price_Prediction_Combined_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [63]:
pip install yfinance



In [64]:
#Linear Regression Model

import yfinance as yf
import pandas as pd
import numpy as np


ticker = yf.Ticker('AAPL') #import data, loaded 5 years of past data of AAPL (Matrix form)
aapl_df = ticker.history(period="5y") #get data from 5 year period in dataframe
aapl_df.drop(['High','Low','Close','Volume'], axis=1, inplace=True) #only have two columns, not seven
data = np.empty(shape = (1259), dtype = float)
data=aapl_df[['Open']].to_numpy().flatten()


X = np.zeros((1250,3)) #Matrix of 1250 rows and 4 columns
Y = [0]*1250

for i in range(1250):
  X[i] = [data[1 + i], data[2 + i], data[3 + i]] #filling up matrix
  Y[i] = data[4 + i]


from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33)

# Create linear regression object
LR_regr = linear_model.LinearRegression()

# Train the model using the training sets
LR_regr.fit(X_train, y_train)

# Make predictions using the testing set
LR_y_pred = LR_regr.predict(X_test)

# The coefficients
print("Coefficients: \n", LR_regr.coef_)
# The mean squared error
LR_MSE = mean_squared_error(y_test, LR_y_pred)
print("Mean squared error: %.2f" % LR_MSE)
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_test, LR_y_pred))


Coefficients: 
 [-0.06553909  0.10025964  0.96278859]
Mean squared error: 8.58
Coefficient of determination: 0.99


In [65]:
#NN(Scikit Learn Example)

from sklearn.neural_network import MLPRegressor


NN_regr = MLPRegressor(hidden_layer_sizes=(100, 50), random_state=1, max_iter=500).fit(X_train, y_train)
NN_y_pred_bef= NN_regr.predict(X_test) #NN_y_pred = list of all the predictions
NN_y_pred = NN_y_pred_bef.reshape(-1,1)
NN_MSE = mean_squared_error(y_test, NN_y_pred)
print("Mean squared error: %.2f" % NN_MSE)

Mean squared error: 9.47


In [66]:
#Decision Tree

from sklearn.tree import DecisionTreeRegressor

DT_regr = DecisionTreeRegressor(random_state=1).fit(X_train, y_train)
DT_y_pred_bef = DT_regr.predict(X_test)
DT_y_pred = DT_y_pred_bef.reshape(-1,1)
DT_MSE = mean_squared_error(y_test, DT_y_pred)
print("Mean squared error: %.2f" % DT_MSE)

Mean squared error: 21.56


In [67]:
X_test[0] = [179.3, 174.67, 174.61]
LR_pred_val = LR_regr.predict(X_test)[0]
print(LR_pred_val)

174.3831278451888


In [68]:
X_test[0] = [179.3, 174.67, 174.61]
NN_pred_val = NN_regr.predict(X_test)[0]
print(NN_pred_val)

173.5391741424397


In [69]:
X_test[0] = [179.3, 174.67, 174.61]
DT_pred_val =DT_regr.predict(X_test)[0]
print(DT_pred_val)

179.051009669682


In [70]:
NN_err = 0
LR_err = 0
DT_err = 0
fin_err = 0
results_list = []

for i in range(len(LR_y_pred)):
  print("NN Predictions:" , NN_y_pred[i])
  print("LR Predictions:" , LR_y_pred[i])
  print("DT Predictions:" , DT_y_pred[i])
  print("Actual:",  y_test[i])
  LR_err += abs((y_test[i]- LR_y_pred[i]))
  NN_err += abs((y_test[i]- NN_y_pred[i]))
  DT_err += abs((y_test[i]- DT_y_pred[i]))
  LR_ratio = NN_err/(NN_err + LR_err + DT_err)
  NN_ratio = LR_err/(NN_err + LR_err + DT_err)
  DT_ratio = DT_err/(NN_err + LR_err + DT_err)
  print("Error ratio is" , LR_ratio, "(LR) :", NN_ratio, "(NN) :" , DT_ratio, "(DT)")
  final_pred = LR_ratio * LR_y_pred[i] + NN_ratio * NN_y_pred[i] + DT_ratio * DT_y_pred[i]
  results_list.append(final_pred)
  print("Final prediction value:" , final_pred)
  fin_err += abs((y_test[i]-final_pred)/y_test[i])
  print("")


print("Final Average Error is: " , (fin_err/len(LR_y_pred)) * 100, "%")
three_list = np.array(results_list)

three_MSE = mean_squared_error(y_test, three_list)
print("MSE value of Three Models Combination Model is: ", three_MSE)


NN Predictions: [179.21273952]
LR Predictions: 179.72935280631245
DT Predictions: [184.52771753]
Actual: 181.20870480571682
Error ratio is [0.29376926] (LR) : [0.21773332] (NN) : [0.48849743] (DT)
Final prediction value: [181.9608577]

NN Predictions: [237.41197661]
LR Predictions: 237.60227482939803
DT Predictions: [240.]
Actual: 238.97000122070312
Error ratio is [0.33060126] (LR) : [0.26484254] (NN) : [0.40455621] (DT)
Final prediction value: [238.52189037]

NN Predictions: [124.70814188]
LR Predictions: 125.2238299498249
DT Predictions: [125.36200611]
Actual: 125.46319795799657
Error ratio is [0.3637647] (LR) : [0.26055425] (NN) : [0.37568105] (DT)
Final prediction value: [125.14137539]

NN Predictions: [117.69398659]
LR Predictions: 118.75342279450508
DT Predictions: [122.66459306]
Actual: 119.58090377809478
Error ratio is [0.35116976] (LR) : [0.22183039] (NN) : [0.42699984] (DT)
Final prediction value: [120.18847673]

NN Predictions: [221.73941121]
LR Predictions: 222.529881454876

In [72]:
actual_val = y_test[0]
pred_list = [LR_y_pred, NN_y_pred, DT_y_pred, three_list]
pred_list = [np.array(p).flatten() for p in pred_list]
pred_ind = []
mod_list = ["LR Prediction:", "NN Prediction:", "DT Prediction:", "Three Model Prediction:"]
prox_list = [(actual_val - LR_y_pred[0]), (actual_val - NN_y_pred[0]), (actual_val - DT_y_pred[0]), (actual_val - three_list[0])]
acc_pred_list = []

#Change the proximity value to absolute.
for i in range(len(prox_list)):
  prox_list[i] = abs(prox_list[i])

#Determine the two indexes with predicted values closest to the actual value
for i in range(0, 2):
  pred_ind.append(prox_list.index(min(prox_list)))
  prox_list[pred_ind[i]] = 10000000000

#print the two indexes
for i in range(0, 2):
  print(pred_ind[i])

#Final MSE Error

closest_combined_pred = (pred_list[pred_ind[0]]+pred_list[pred_ind[1]])/2
closest_combined_MSE = mean_squared_error(y_test, closest_combined_pred)
print("Mean squared error: %.2f" % closest_combined_MSE)


3
0
Mean squared error: 9.12


Tesla: Predictions (Monday = 259.3434673376589(NN), 259.43269652(LR)), (Tuesday = 260.25472715866357(NN), 258.25222344(LR)), (Wednesday = 252.9906057716744(NN), 251.28043536(LR)), (Thursday = 242.42908011253357(NN), 243.28207527(LR)), (Friday = 254.10978717038498(NN), 256.81248751(LR)), term = Last 3 days

Tesla: Predictions (Monday = 255.39245155305224(NN), 257.36577365(LR)), (Tuesday = 252.89518350193396(NN), 258.9103237(LR)), (Wednesday = 254.89930696941917(NN), 248.47467418(LR)), (Thursday = 247.05356603194483(NN), 242.40142806(LR)), (Friday = 251.22187228479018(NN), 257.61861851(LR)), term = Last 5 days

Real Tesla Stock Prices (Monday = 250.02, Tuesday = 243.15, Wednesday = 243.2, Thrusday = 257.84, Friday = 260.63)


Apple: Predictions (Monday = 185.04601053842558(NN), 185.36794501(LR)), (Tuesday = 186.28321343151617(NN), 185.56492498(LR)), (Wednesday = 186.3198122856117(NN), 186.78643675(LR)), (Thursday = 185.7340687632026(NN), 186.11884907(LR)), (Friday = 188.3205993811893(NN), 189.08076126(LR)), term = Last 3 days

Apple: Predictions (Monday = 184.32129621738207(NN), 185.78559101(LR)), (Tuesday = 184.6519646982332(NN), 185.51485266(LR)), (Wednesday = 186.76168906258465(NN), 186.72036983(LR)), (Thursday = 184.82757539191303(NN), 185.75783144(LR)), (Friday = 188.14432386071064(NN), 189.07363645(LR)), term = Last 5 days

Real Apple Stock Prices (Monday = 185.48, Tuesday = 186.82, Wednesday = 185.9, Thrusday = 189.21, Friday = 191.78)


Papa Johns: Predictions (Monday = 72.13058243971048(NN), 72.62154468(LR)), (Tuesday = 72.68137640403584(NN), 72.70792263(LR)), (Wednesday = 70.76979012537055(NN), 70.49938953(LR)), (Thursday = 72.33165301475981(NN), 72.32238143(LR)), (Friday = 72.955632395817(NN), 72.83569683(LR)), term = Last 3 days.

Papa Johns: Predictions (Monday = 72.30619112884433(NN), 72.70647876(LR)), (Tuesday = 73.1015127499096(NN), 72.81604794(LR)), (Wednesday = 70.4073474478508(NN), 70.40201571(LR)), (Thursday = 72.0603075489961(NN), 72.31713888(LR)), (Friday = 72.35666122743858(NN), 72.85776773(LR)), term = Last 5 days

Real Papa Johns Stock Prices (Monday = 72.75, Tuesday = 70.4, Wednesday = 72.38, Thrusday = 72.83, Friday = 73.84)

In [None]:
import matplotlib.pyplot as plt
NN_x_values = range(1, len(y_test) + 1)

plt.scatter(NN_x_values[:10], y_test[:10], c='blue', label='Real Stock Prices')
plt.scatter(NN_x_values[:10], NN_y_pred[:10], c='red', label='Predicted Stock price by Neural Network')
plt.xlabel('Real Stock Prices')
plt.ylabel('Predicted Stock Price by Neural Network')
plt.show()

In [None]:
import matplotlib.pyplot as plt
LR_x_values = range(1, len(y_test) + 1)

plt.scatter(LR_x_values[:10], y_test[:10], c='blue', label='Real Stock Prices')
plt.scatter(LR_x_values[:10], LR_y_pred[:10], c='red', label='Predicted Stock price by Linear Regresison')
plt.xlabel('Real Stock Prices')
plt.ylabel('Predicted Stock Price by Linear Regresison')
plt.show()

In [None]:
import matplotlib.pyplot as plt
DT_x_values = range(1, len(y_test) + 1)

plt.scatter(DT_x_values[:10], y_test[:10], c='blue', label='Real Stock Prices')
plt.scatter(DT_x_values[:10], LR_y_pred[:10], c='red', label='Predicted Stock price by Decision Tree Regressor')
plt.xlabel('Real Stock Prices')
plt.ylabel('Predicted Stock Price by Decision Tree Regressor')
plt.show()

In [None]:
https://epfl.zoom.us/j/5759729771?pwd=ZDBUVWk0UDhkaFlXbHM0SGxaMEVIZz09

# Task
Explain the error in the selected code, fix it, and incorporate the changes into the existing code.

## Inspect data types

### Subtask:
Check the data type of `data` before the loop.


**Reasoning**:
Print the type of the variable `data` to understand its structure and identify the cause of the ValueError.



In [None]:
print(type(data))

**Reasoning**:
The variable `data` is a numpy array. The error indicates setting an array element with a sequence, which suggests that the structure of `data` is not as expected when trying to assign to `X[i, :]`. Let's examine the shape of `data` to further understand the issue.



In [None]:
print(data.shape)

**Reasoning**:
The variable `data` is a numpy array with shape (1256, 1), which means it's a 2D array with 1256 rows and 1 column. When trying to assign `[data[1 + i], data[2 + i], data[3 + i]]` to `X[i, :]`, the error "setting an array element with a sequence" occurs because `X[i, :]` is a slice of a 2D array (a 1D array), while `[data[1 + i], data[2 + i], data[3 + i]]` is a sequence of 1D arrays (each `data[j]` is `[value]`). To fix this, we need to extract the scalar value from each element of `data`.



In [None]:
#Linear Regression Model

import yfinance as yf
import pandas as pd
import numpy as np


ticker = yf.Ticker('AAPL') #import data, loaded 5 years of past data of AAPL (Matrix form)
aapl_df = ticker.history(period="5y") #get data from 5 year period in dataframe
aapl_df.drop(['High','Low','Close','Volume'], axis=1, inplace=True) #only have two columns, not seven
data = aapl_df[['Open']].to_numpy() # Use to_numpy() directly

X = np.zeros((len(data)-4,3)) #Matrix of 1250 rows and 4 columns
Y = [0]*(len(data)-4)

for i in range(len(data)-4):
  X[i, :] = [data[1 + i][0], data[2 + i][0], data[3 + i][0]] #filling up matrix by accessing the scalar value with [0]
  Y[i] = data[4 + i][0] # Also access the scalar value for Y

from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33)

# Create linear regression object
LR_regr = linear_model.LinearRegression()

# Train the model using the training sets
LR_regr.fit(X_train, y_train)

# Make predictions using the testing set
LR_y_pred = LR_regr.predict(X_test)

# The coefficients
print("Coefficients: \n", LR_regr.coef_)
# The mean squared error
LR_MSE = mean_squared_error(y_test, LR_y_pred)
print("Mean squared error: %.2f" % LR_MSE)
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_test, LR_y_pred))

## Summary:

### Data Analysis Key Findings

*   The variable `data`, which holds the 'Open' stock prices, is a 2D NumPy array with a shape of (1256, 1).
*   The original code produced a `ValueError` because it was attempting to assign a sequence (a single-element array like `[value]`) from the `data` array to an element of the `X` matrix, which expected a scalar value.

### Insights or Next Steps

*   When working with NumPy arrays resulting from selecting a single column from a pandas DataFrame and using `.to_numpy()`, be mindful that they retain a 2D shape. Accessing individual values requires indexing into the single column (e.g., `data[i][0]`).
