In [None]:
# Import the dependencies
import os
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import matplotlib.pyplot as plt

# Define directory containing the csv files
dir_path = Path("../Resources")

# create an empty list to hold the dataframes 
dfs = []

# loop through each file in the directory
for file in os.listdir(dir_path):
    # check if the file is a csv file
    if file.endswith(".csv"):
        # extract the ticker symbol from the file name (assuming the file name is the ticker symbol)
        ticker = file.replace(".csv", "")
        # Read the file into a DataFrame
        stocks_df = pd.read_csv(dir_path / file)
        # Add a column to the DataFrame to store the ticker symbol
        stocks_df["Ticker"] = ticker
        # add the dataframe to the list
        dfs.append(stocks_df)
        
# concatenate the dataframes in the list
combined_stocks_df = pd.concat(dfs, ignore_index=True)

# Display the combined DataFrame to verify
print(combined_stocks_df.head())

In [None]:
# view the shape of the data
print(combined_stocks_df.shape)

In [None]:
# change the type in 'date' column to datetime
combined_stocks_df["Date"] = pd.to_datetime(combined_stocks_df["Date"])

# Filter the data for the last 5 years from today
five_years_ago = pd.Timestamp.today() - pd.DateOffset(years=5)
stocks_data_filtered = combined_stocks_df[combined_stocks_df["Date"] >= five_years_ago]

print(stocks_data_filtered.head())
print(stocks_data_filtered.tail())



In [None]:
# Initialize a dictionary to hold the MSE for each stock symbol
mse_dict = {}

# Initialize a dictionary to hold the predictions for each stock symbol
predictions_dict = {}

# Group the data by ticker symbol
stocks_grouped = stocks_data_filtered.groupby("Ticker")

# # Iterate through each group (stock symbol)
for ticker, group in stocks_grouped:
    # Create a copy of the group's data
    group_copy = group.copy()
    
    # Set the date as the index
    group_copy.set_index("Date", inplace=True)
    
    # Define a feature int he data for previous date closing prices
    group_copy["Previous Day Close"] = group_copy["Close"].shift(1)
    
    # Define a feature in the data for the volume
    group_copy["Volume Difference"] = group_copy["Volume"].diff()
    
    # Drop rows with NaN values
    group_copy = group_copy.dropna()
    
    # Define the features (X) and the target (y) variables for training purposes
    X = group_copy[["Previous Day Close", "Volume Difference"]]
    y = group_copy["Close"].values.reshape(-1, 1) 
    
    # split the data into training and testing sets chronologically
    split = int(0.7 * len(X))
    X_train = X[: split]
    X_test = X[split:]
    y_train = y[: split]
    y_test = y[split:]
    
    # Train the model
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    # Make predictions using the testing data
    predictions = model.predict(X_test)
    
    # Calculate the mean_squared_error on the testing data
    mse = mean_squared_error(y_test, predictions)
    
    # Store the MSE in the dictionary
    mse_dict[ticker] = mse
    
        # Make a prediction for the values of the stock over the next year starting from the last date for each stock in the dataset
    # Create a dataframe to hold the predictions
    future_dates = pd.date_range(start=group_copy.index[-1], periods=12, freq='M')
    future_dates_df = pd.DataFrame(future_dates, columns=["Date"])
    future_dates_df.set_index("Date", inplace=True)
    
    # Initialize previous_close with the last available close value
    previous_close = group_copy["Close"].iloc[-1]
    volume_difference = 0

    # Predict the future values iteratively
    future_closes = []
    for date in future_dates_df.index:
        # prepare the input data for prediction
        input_data = pd.DataFrame({"Previous Day Close": [previous_close], "Volume Difference": [volume_difference]})
        # Make the prediction
        predicted_close = model.predict(input_data)[0][0]
        # Append the predicted close to the list
        future_closes.append(predicted_close)
        # Update previous_close for the next iteration
        previous_close = predicted_close
    
    # Add the predictions to the dataframe
    future_dates_df["Close"] = future_closes
    
    # Store the predictions in the dictionary
    predictions_dict[ticker] = future_dates_df

In [None]:
# Display the MSE for each stock ticker
for ticker, mse in mse_dict.items():
    print(f"Stock Ticker: {ticker}, Mean Squared Error: {mse}")

In [None]:
# Display the predictions for each stock ticker
for ticker, future_dates_df in predictions_dict.items():
    print(f"Predictions for {ticker}:")
    print(future_dates_df)


In [None]:
# Define the base path as the parent directory of "Notebooks" and "Resources"
base_path = os.path.abspath(os.path.join(os.getcwd(), os.pardir))

# Create a directory named "Predictions" within the base path if it doesn't exist
predictions_folder = os.path.join(base_path, "One Year Predictions")
if not os.path.exists(predictions_folder):
    os.makedirs(predictions_folder)

# Save the Predictions results to individual files in the "Predictions" folder
for ticker, future_dates_df in predictions_dict.items():
    prediction_file_path = os.path.join(predictions_folder, f"predictions_{ticker}.csv")
    future_dates_df.to_csv(prediction_file_path)

print(f"Prediction results saved to the folder: {predictions_folder}")

# Create a directory named "MSE" within the base path if it doesn't exist
mse_folder = os.path.join(base_path, "MSE Output")
if not os.path.exists(mse_folder):
    os.makedirs(mse_folder)

# Define the path to save the MSE results file
mse_file_path = os.path.join(mse_folder, "mse_results.csv")

# Save the MSE results to a file in the "MSE" folder
mse_df = pd.DataFrame(list(mse_dict.items()), columns=["Stock Ticker", "Mean Squared Error"])
mse_df.to_csv(mse_file_path, index=False)

print(f"MSE results saved to: {mse_file_path}")
