Multiple Linear Regression on Stock Price Data (is far from market prediction as its using current day's open, high, low, and volume)

Import libraries

In [56]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import yfinance as yf
import ta as ta
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

Import dataset

In [42]:
stock_data = yf.download("RGTI", start="2020-01-01", end="2025-04-01")

stock_data["MA_5"] = stock_data["Close"].rolling(window=5).mean()
stock_data["MA_20"] = stock_data["Close"].rolling(window=20).mean()
stock_data["Return"] = stock_data["Close"].pct_change() 
stock_data["Volatility"] = (stock_data["High"] - stock_data["Low"]) / stock_data["Open"]
stock_data["Target"] = (stock_data["Return"].shift(-1) >= 0)

#due to rolling window the first few are NaN
stock_data = stock_data.dropna()
print(stock_data)

[*********************100%***********************]  1 of 1 completed

Price      Close    High    Low  Open    Volume   MA_5    MA_20    Return  \
Ticker      RGTI    RGTI   RGTI  RGTI      RGTI                             
Date                                                                        
2021-05-19  9.80   9.850  9.800  9.85      1200  9.810  9.87900  0.000000   
2021-05-20  9.78   9.850  9.780  9.85      1600  9.796  9.87300 -0.002041   
2021-05-21  9.80  10.040  9.790  9.95      9500  9.796  9.86750  0.002045   
2021-05-24  9.80   9.800  9.800  9.80         0  9.796  9.86200  0.000000   
2021-05-25  9.80  10.040  9.770  9.81      4600  9.796  9.85650  0.000000   
...          ...     ...    ...   ...       ...    ...      ...       ...   
2025-03-25  9.82  10.020  9.470  9.56  36323100  9.513  9.06175  0.004090   
2025-03-26  9.18  10.310  9.090  9.89  41834000  9.368  9.05675 -0.065173   
2025-03-27  8.47   9.190  8.410  9.02  28869400  9.264  9.07875 -0.077342   
2025-03-28  8.15   8.419  7.782  8.40  28049000  9.080  9.06325 -0.037780   




Define Features and Target

In [65]:
X = stock_data[["Open", "Close", "High", "Low", "Return", "Volatility"]].values
y = stock_data["Target"].values

Split into Training and Testing sets

In [66]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

Scale the data (Optional)

In [67]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

Train the model

In [68]:
regressor = LinearRegression()
regressor.fit(X_train, y_train)

Make predictions and Compare

In [69]:
y_pred = (regressor.predict(X_test) > 0.5)
np.set_printoptions(precision = 2)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print(np.concatenate((y_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test), 1)), 1))

Accuracy: 0.45
[[ True False]
 [False False]
 [ True  True]
 [False False]
 [ True False]
 [False  True]
 [False  True]
 [ True False]
 [False  True]
 [ True False]
 [ True  True]
 [ True False]
 [False False]
 [ True False]
 [False  True]
 [False  True]
 [False False]
 [False  True]
 [False False]
 [False  True]
 [ True False]
 [False False]
 [False False]
 [False  True]
 [False  True]
 [False False]
 [False  True]
 [False  True]
 [False  True]
 [False  True]
 [False False]
 [False False]
 [False False]
 [False  True]
 [False  True]
 [False False]
 [False False]
 [False  True]
 [False False]
 [ True False]
 [False  True]
 [ True False]
 [ True  True]
 [False  True]
 [False False]
 [False False]
 [False False]
 [False False]
 [False  True]
 [False  True]
 [ True  True]
 [ True False]
 [False False]
 [False False]
 [ True  True]
 [False False]
 [ True False]
 [ True False]
 [False  True]
 [False  True]
 [False  True]
 [False  True]
 [ True False]
 [False False]
 [False False]
 [False Fa