In [None]:
# Import used libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error

In [None]:
#Import csv
stock = pd.read_csv('stock_data.csv')
print(stock.head())

In [None]:
stock = stock[['Date', 'Open', 'Close', 'Volume', 'Change']]
print(stock.head())

In [None]:
# tradition step
# logistic regression
# higher dimension
# CNN

In [None]:
# Regression using the Normal Equation (X.T*X)*theta = (X.T*y)
# Shift the change vector by 1 to make features project into next week
stock['Next Change'] = stock['Change'].shift(-1)
# Drop any rows with NaN values (will occur on last row due to shifting)
stock = stock.dropna()
# Define feature matrix X and target vector y
X = stock[['Open', 'Close', 'Volume']].values
y = stock['Next Change'].values
X = np.c_[np.ones((X.shape[0], 1)), X]
theta_linear = np.linalg.inv(X.T @ X) @ X.T @ y
print(theta)

In [None]:
# Predictions on the training set
y_pred = X @ theta

# Plot predictions
plt.plot(range(len(y)), y, label="Actual Values")
plt.plot(range(len(y_pred)), y_pred, label="Predicted Values")
plt.xlabel("Sample Index")
plt.ylabel("Percent Change")
plt.title("Actual vs Predicted Percent Change")
plt.legend()
plt.show()

In [None]:
# Get Error
mse = mean_squared_error(y, y_pred)
print(f"Mean Squared Error: {mse}")

In [None]:
# Create quadratic features
X_quad = np.hstack([X, X**2])
X_quad = np.c_[np.ones((X_quad.shape[0], 1)), X_quad]
theta_quad = np.linalg.pinv(X_quad.T @ X_quad) @ X_quad.T @ y
print(theta_quad)

In [None]:
# Predictions on the training set
y_pred = X_quad @ theta_quad

# Plot predictions
plt.plot(range(len(y)), y, label="Actual Values")
plt.plot(range(len(y_pred)), y_pred, label="Predicted Values")
plt.xlabel("Sample Index")
plt.ylabel("Percent Change")
plt.title("Actual vs Predicted Percent Change")
plt.legend()
plt.show()

In [None]:
# Get Error
mse = mean_squared_error(y, y_pred)
print(f"Mean Squared Error: {mse}")

In [None]:
# Add a small regularization term to the diagonal
lambda_identity = 1e-5 * np.eye(X_quad.shape[1])  # Adjust lambda as needed
theta_quad = np.linalg.inv(X_quad.T @ X_quad + lambda_identity) @ X_quad.T @ y
print(theta_quad)

In [None]:
# Predictions on the training set
y_pred = X_quad @ theta_quad

# Plot predictions
plt.plot(range(len(y)), y, label="Actual Values")
plt.plot(range(len(y_pred)), y_pred, label="Predicted Values")
plt.xlabel("Sample Index")
plt.ylabel("Percent Change")
plt.title("Actual vs Predicted Percent Change")
plt.legend()
plt.show()

In [None]:
# Get Error
mse = mean_squared_error(y, y_pred)
print(f"Mean Squared Error: {mse}")

In [None]:
# Ridge Regression
lambda_identity = 1e-5 * np.eye(X_quad.shape[1])
theta_quad = np.linalg.inv(X_quad.T @ X_quad + lambda_identity) @ X_quad.T @ y
print(theta_quad)

In [None]:
# Predictions on the training set
y_pred = X_quad @ theta_quad

# Plot predictions
plt.plot(range(len(y)), y, label="Actual Values")
plt.plot(range(len(y_pred)), y_pred, label="Predicted Values")
plt.xlabel("Sample Index")
plt.ylabel("Percent Change")
plt.title("Actual vs Predicted Percent Change")
plt.legend()
plt.show()

In [None]:
# Get Error
mse = mean_squared_error(y, y_pred)
print(f"Mean Squared Error: {mse}")