In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd

df = pd.read_csv("/content/drive/MyDrive/AIProj/Dataset/stock_details_5_years.csv")

df

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,Company
0,2018-11-29 00:00:00-05:00,43.829761,43.863354,42.639594,43.083508,167080000,0.00,0.0,AAPL
1,2018-11-29 00:00:00-05:00,104.769074,105.519257,103.534595,104.636131,28123200,0.00,0.0,MSFT
2,2018-11-29 00:00:00-05:00,54.176498,55.007500,54.099998,54.729000,31004000,0.00,0.0,GOOGL
3,2018-11-29 00:00:00-05:00,83.749496,84.499496,82.616501,83.678497,132264000,0.00,0.0,AMZN
4,2018-11-29 00:00:00-05:00,39.692784,40.064904,38.735195,39.037853,54917200,0.04,0.0,NVDA
...,...,...,...,...,...,...,...,...,...
602957,2023-11-29 00:00:00-05:00,26.360001,26.397499,26.120001,26.150000,1729147,0.00,0.0,PPL
602958,2023-11-29 00:00:00-05:00,27.680000,28.535000,27.680000,28.350000,1940066,0.00,0.0,FITB
602959,2023-11-29 00:00:00-05:00,75.940002,76.555000,75.257500,75.610001,298699,0.00,0.0,IFF
602960,2023-11-29 00:00:00-05:00,45.230000,45.259998,44.040001,44.209999,2217579,0.00,0.0,CCJ


In [None]:
# no need to run this
max_value = df['Close'].max()
min_value = df['Close'].min()

print(f"Maximum value: {max_value}, Minimum value: {min_value}")


Maximum value: 6509.35009765625, Minimum value: 1.03488409519196


In [None]:
datestrings = df['Date']

# Remove timezone information
datestrings = datestrings.apply(lambda x: x.split('-')[0])

# Convert date strings to Unix timestamps (seconds since the epoch)
unix_timestamps = pd.to_datetime(datestrings).apply(lambda x: x.timestamp())

# Convert Unix timestamps to floats
date_ints = unix_timestamps.astype(int)

# Replace the original date column with the float values
df['Date_ints'] = date_ints

# Reset the index of data
df.reset_index(drop=True, inplace=True)

In [None]:
df

In [None]:
# Normalizing the data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df['Close'].values.reshape(-1, 1))

In [None]:
type(data)

numpy.ndarray

In [None]:

# Create sequences and labels
def create_sequences(data, seq_length):
    sequences = []
    labels = []
    for i in range(len(data) - seq_length):
        seq = data[i:i + seq_length]
        label = data[i + seq_length]
        sequences.append(seq)
        labels.append(label)

    # Convert the list of sequences and labels to a single NumPy array
    sequences = np.array(sequences)
    labels = np.array(labels)

    return torch.tensor(sequences), torch.tensor(labels)

seq_length = 10
X, y = create_sequences(scaled_data, seq_length)

In [None]:
# ignore for now

def create_sequences(data, seq_length):
    sequences = []
    labels = []
    for i in range(len(data) - seq_length):
        seq = data[i:i + seq_length - 1, 'Date_ints'].values
        label = data[i + seq_length, 'Close']
        sequences.append(seq)
        labels.append(label)

# Convert the list of sequences and labels to a single NumPy array
    sequences = np.array(sequences)
    labels = np.array(labels)

    return torch.tensor(sequences), torch.tensor(labels)

seq_length = 10
X, y = create_sequences(data, seq_length)


In [None]:
# Split the data into training and testing sets
split = int(0.8 * len(X))
X_train, X_test, y_train, y_test = X[:split], X[split:], y[:split], y[split:]

In [None]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hx=None):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.rnn(x, h0)
        out = self.fc(out[:, -1, :])
        return out

# Define hyperparameters
input_size = 1
hidden_size = 100
num_layers = 3
output_size = 1
num_epochs = 100
learning_rate = 0.001
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# Create the model
model = RNN(input_size, hidden_size, num_layers, output_size).to(device)

In [None]:
# Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
# Free up GPU memory
# (Resolved?)
torch.cuda.empty_cache()

PYTORCH_CUDA_ALLOC_CONF=0.5

In [None]:
# Train the model
for epoch in range(num_epochs):
    model.train()
    outputs = model(X_train.float().to(device))
    loss = criterion(outputs, y_train.float().to(device))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 5 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')

Epoch [5/100], Loss: 0.8597584366798401
Epoch [10/100], Loss: 0.8511343002319336
Epoch [15/100], Loss: 0.8454227447509766
Epoch [20/100], Loss: 0.8432894349098206
Epoch [25/100], Loss: 0.8429824709892273
Epoch [30/100], Loss: 0.8412080407142639
Epoch [35/100], Loss: 0.8395564556121826
Epoch [40/100], Loss: 0.8380711078643799
Epoch [45/100], Loss: 0.835728108882904
Epoch [50/100], Loss: 0.8326269388198853
Epoch [55/100], Loss: 0.8288779258728027
Epoch [60/100], Loss: 0.8239613175392151
Epoch [65/100], Loss: 0.8165515661239624
Epoch [70/100], Loss: 0.8051486611366272
Epoch [75/100], Loss: 0.7910171747207642
Epoch [80/100], Loss: 0.7781903147697449
Epoch [85/100], Loss: 0.7608463764190674
Epoch [90/100], Loss: 0.7404759526252747
Epoch [95/100], Loss: 0.7200472354888916
Epoch [100/100], Loss: 0.6974985003471375


In [None]:
# Define the local path for saving the model
local_path = input("/content/drive/MyDrive/AIProj/Models")

# Save the trained model
torch.save(model.state_dict(), local_path)

/content/drive/MyDrive/AIProj/Models/rnn4


Evaluation

In [None]:
#ignore

# Select the top 4 companies
top_companies = ['AAPL', 'MSFT', 'AMZN', 'GOOGL']
df_top = df[df['Company'].isin(top_companies)].copy()

# Preprocess the data
df = df.set_index('Date_ints')
df = df.sort_index()

# Use the 'close' column as the target variable
data = df[['Close']]

KeyError: "None of ['Date_ints'] are in the columns"

In [None]:
# Define a function to get predictions for a given company
def get_predictions(company_name, model, scaler):
    # Filter data for the given company
    company_data = df[df['Company'] == company_name]['Close'].values.reshape(-1, 1)
    company_data = scaler.transform(company_data).reshape(1, -1, 1)

    # Convert to PyTorch tensor and move to device
    company_data = torch.tensor(company_data, dtype=torch.float32).to(device)

    # Get initial hidden state
    batch_size = 1
    h0 = torch.zeros(1, batch_size, hidden_size).to(device)

    # Get predictions
    with torch.no_grad():
        prediction = model(company_data, h0)

    # Inverse transform predictions
    prediction = scaler.inverse_transform(prediction.cpu().numpy()).reshape(-1)

    return prediction

# Get predictions for the top 4 companies
top_companies = ['AAPL', 'MSFT', 'GOOGL', 'AMZN']
predictions = {}
for company in top_companies:
    prediction = get_predictions(company, model, scaler)
    predictions[company] = prediction

# Display predictions
for company, prediction in predictions.items():
    print(f"Predicted closing price for {company}: {prediction[-1]}")


Predicted closing price for AAPL: 116.3813247680664
Predicted closing price for MSFT: 409.63818359375
Predicted closing price for GOOGL: 72.87936401367188
Predicted closing price for AMZN: 81.9839859008789


In [None]:
# Define a function to get predictions for a given company
def get_predictions(company_name, model, scaler):
    # Filter data for the given company
    company_data = df[df['Company'] == company_name]['Close'].values.reshape(-1, 1)
    company_data = scaler.transform(company_data).reshape(1, -1, 1)

    # Convert to PyTorch tensor and move to device
    company_data = torch.tensor(company_data, dtype=torch.float32).to(device)

    # Get initial hidden state
    batch_size = 1
    h0 = torch.zeros(1, batch_size, hidden_size).to(device)

    # Get predictions
    with torch.no_grad():
        prediction = model(company_data, h0)

    # Inverse transform predictions
    prediction = scaler.inverse_transform(prediction.cpu().numpy()).reshape(-1)

    return prediction

# Get predictions for the top 4 companies
top_companies = ['AAPL', 'MSFT', 'GOOGL', 'AMZN']
predictions = {}
for company in top_companies:
    prediction = get_predictions(company, model, scaler)
    predictions[company] = prediction

# Display predictions
for company, prediction in predictions.items():
    print(f"Predicted closing price for {company}: {prediction[-1]}")


Predicted closing price for AAPL: 116.3813247680664
Predicted closing price for MSFT: 409.63818359375
Predicted closing price for GOOGL: 72.87936401367188
Predicted closing price for AMZN: 81.9839859008789


In [None]:
# Define a function to get predictions for a given company
def get_predictions(company_name, model, scaler):
    # Filter data for the given company
    company_data = df[df['Company'] == company_name]['Close'].values.reshape(-1, 1)
    company_data = scaler.transform(company_data).reshape(1, -1, 1)

    # Convert to PyTorch tensor and move to device
    company_data = torch.tensor(company_data, dtype=torch.float32).to(device)

    # Get initial hidden state
    batch_size = 1
    h0 = torch.zeros(1, batch_size, hidden_size).to(device)

    # Get predictions
    with torch.no_grad():
        prediction = model(company_data)

    # Inverse transform predictions
    prediction = scaler.inverse_transform(prediction.cpu().numpy()).reshape(-1)

    return prediction

# Get predictions for the top 4 companies
top_companies = ['AAPL', 'MSFT', 'GOOGL', 'AMZN']
predictions = {}
for company in top_companies:
    prediction = get_predictions(company, model, scaler)
    predictions[company] = prediction

# Display predictions
for company, prediction in predictions.items():
    print(f"Predicted closing price for {company}: {prediction[-1]}")




Predicted closing price for AAPL: 143.85731506347656
Predicted closing price for MSFT: 137.5020294189453
Predicted closing price for GOOGL: 145.65074157714844
Predicted closing price for AMZN: 145.32960510253906


In [None]:
df.reset_index(drop=True, inplace=True)


# Predictions for the top 4 companies
for company in top_companies:
    company_indices = df[df['Company'] == company].index
    company_data = X[company_indices, -seq_length:, :]
    prediction = model(company_data.float()).cpu().detach().numpy()[0][0]
    prediction = scaler.inverse_transform([[prediction]])[0][0]
    print(f"Predicted closing price for {company}: {prediction}")


df.reset_index(drop=True, inplace=True)

# Predictions for all companies
for company in df['Company'].unique():
    company_indices = df[df['Company'] == company].index
    company_data = X[company_indices, -seq_length:, :]
    prediction = model(company_data.float()).cpu().detach().numpy()[0][0]
    prediction = scaler.inverse_transform([[prediction]])[0][0]
    print(f"Predicted closing price for {company}: {prediction}")



Predicted closing price for AAPL: 175.75054527946457
Predicted closing price for MSFT: 250.74593979432632
Predicted closing price for GOOGL: 130.7617462710512
Predicted closing price for AMZN: 153.3140644010212
Predicted closing price for AAPL: 175.75054527946457
Predicted closing price for MSFT: 250.74593979432632
Predicted closing price for GOOGL: 130.7617462710512
Predicted closing price for AMZN: 153.3140644010212
Predicted closing price for NVDA: 46.455651989584766
Predicted closing price for META: 125.76636075189542
Predicted closing price for TSLA: 132.43455374852647
Predicted closing price for LLY: 102.86290107572768
Predicted closing price for V: 125.90473411422971
Predicted closing price for TSM: 123.40064791337988
Predicted closing price for UNH: 127.6023384972353
Predicted closing price for AVGO: 231.31698808063283
Predicted closing price for NVO: 197.0459877704945
Predicted closing price for JPM: 95.80656975098965
Predicted closing price for WMT: 260.3884068249822
Predicte

KeyboardInterrupt: 

In [None]:
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

df.reset_index(drop=True, inplace=True)

# Predictions for the top 4 companies
for company in top_companies:
    company_indices = df[df['Company'] == company].index
    company_data = X[company_indices, -seq_length:, :]
    prediction = model(company_data.float()).cpu().detach().numpy()[0][0]
    prediction = scaler.inverse_transform([[prediction]])[0][0]

    # Calculate RMSE and r-squared
    true_values = df.loc[company_indices[0] + seq_length, 'Close']
    mse = (mean_squared_error([true_values], [prediction]))

    # Print results
    print(f"Company: {company}")
    print(f"Predicted closing price: {prediction}")
    print(f"MSE: {mse}\n")

print("---------------------For all companies------------------------")

# Predictions for all companies
for company in df['Company'].unique():
    company_indices = df[df['Company'] == company].index
    company_data = X[company_indices, -seq_length:, :]
    prediction = model(company_data.float()).cpu().detach().numpy()[0][0]
    prediction = scaler.inverse_transform([[prediction]])[0][0]

    # Calculate RMSE and r-squared
    true_values = df.loc[company_indices[0] + seq_length, 'Close']
    mse = (mean_squared_error([true_values], [prediction]))

    # Print results
    print(f"Company: {company}")
    print(f"Predicted closing price: {prediction}")
    print(f"MSE: {mse}\n")


Company: AAPL
Predicted closing price: 175.75054527946457
MSE: 7484.341542777006

Company: MSFT
Predicted closing price: 250.74593979432632
MSE: 2720.1915045307655

Company: GOOGL
Predicted closing price: 130.7617462710512
MSE: 12361.45833578801

Company: AMZN
Predicted closing price: 153.3140644010212
MSE: 3450.645532693223

---------------------For all companies------------------------
Company: AAPL
Predicted closing price: 175.75054527946457
MSE: 7484.341542777006

Company: MSFT
Predicted closing price: 250.74593979432632
MSE: 2720.1915045307655

Company: GOOGL
Predicted closing price: 130.7617462710512
MSE: 12361.45833578801

Company: AMZN
Predicted closing price: 153.3140644010212
MSE: 3450.645532693223

Company: NVDA
Predicted closing price: 46.455651989584766
MSE: 1842.087461007709

Company: META
Predicted closing price: 125.76636075189542
MSE: 4166.502887969588

Company: TSLA
Predicted closing price: 132.43455374852647
MSE: 3462.646328289613

Company: LLY
Predicted closing pric

KeyboardInterrupt: 

In [None]:
df["Company"]

Date_ints
1514764800     AAPL
1514764800     MSFT
1514764800    GOOGL
1514764800     AMZN
1514764800     AAPL
              ...  
1672531200     AMZN
1672531200     AAPL
1672531200     MSFT
1672531200    GOOGL
1672531200     AMZN
Name: Company, Length: 5032, dtype: object

In [None]:
unique_companies = df["Company"].apply(lambda x: x).unique()

unique_companies

array(['AAPL', 'MSFT', 'GOOGL', 'AMZN'], dtype=object)