Predict Stock Prices 5 Days Ahead

In [None]:
import yfinance as yf

data = yf.download('PLTR', period='1mo')

close_price = data['Close'].reset_index()

print(close_price)



YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['PLTR']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


Empty DataFrame
Columns: [Date, PLTR]
Index: []


In [35]:
from alpha_vantage.timeseries import TimeSeries
import os
from dotenv import load_dotenv

load_dotenv()
API_key = os.getenv("STOCKAPI_KEY")

ts = TimeSeries(key = API_key, output_format='pandas')

data, meta = ts.get_daily('PLTR')


close = data[['4. close']]
close_df = close.rename(columns={'4. close': 'Close'})
close_df = close_df.reset_index()
close_df

Unnamed: 0,date,Close
0,2025-06-06,127.72
1,2025-06-05,119.91
2,2025-06-04,130.01
3,2025-06-03,133.17
4,2025-06-02,132.04
...,...,...
95,2025-01-21,73.07
96,2025-01-17,71.77
97,2025-01-16,69.24
98,2025-01-15,68.14


In [36]:
import pandas as pd


sentiment_df = pd.read_csv('daily_sentiment_scores.csv')

#df = pd.DataFrame(close_price)
sentiment_df['date'] = pd.to_datetime(sentiment_df['date'])
#df['Date'] = pd.to_datetime(df['Date'])

dataset_merged = pd.merge(close_df, sentiment_df, on='date', how='inner')

print(dataset_merged)

         date   Close  sentiment_score
0  2025-06-06  127.72        -0.065419
1  2025-06-05  119.91        -0.106249
2  2025-06-04  130.01        -0.098330
3  2025-06-03  133.17        -0.001489
4  2025-06-02  132.04        -0.099764
5  2025-05-30  131.78         0.040010
6  2025-05-29  122.32         0.198328
7  2025-05-28  123.76         0.353887
8  2025-05-27  123.39         0.301800
9  2025-05-23  123.31         0.191797
10 2025-05-22  122.29         0.092537
11 2025-05-21  120.58         0.022187
12 2025-05-20  125.59         0.193016
13 2025-05-19  126.33         0.054354
14 2025-05-16  129.52        -0.067974
15 2025-05-15  128.12         0.020153
16 2025-05-14  130.18         0.084889
17 2025-05-13  128.10         0.095310
18 2025-05-12  118.46         0.039492
19 2025-05-09  117.30         0.066564


In [None]:
import matplotlib.pyplot as plt

plt.plot(close_price)
plt.show()

In [41]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler

data = dataset_merged[['Close', 'sentiment_score']].to_numpy()

scaler = MinMaxScaler()
scaled_dataset = scaler.fit_transform(data)


train_size = int(len(scaled_dataset) * 0.70)
train_set = scaled_dataset[:train_size]
test_set = scaled_dataset[train_size:]


def create_sequence(data, seq_length, n_steps_ahead):
    x = []
    y = []
    for i in range(seq_length, len(data) - n_steps_ahead + 1):
        x.append(data[i-seq_length:i, :]) # All Features
        y.append(data[i:i+n_steps_ahead, 0]) #  Close Column
    
    return np.array(x), np.array(y)

x_train, y_train = create_sequence(train_set, 5, 1)
x_test, y_test = create_sequence(test_set, 5, 1)

# (samples, time_steps, features)
#x_train = x_train.reshape((x_train.shape[0],x_train.shape[1],2))
#x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 2))

print("x_train shape:", x_train.shape)
print("y_train shape:", y_train.shape)
x_test.shape

x_train shape: (9, 5, 2)
y_train shape: (9, 1)


(1, 5, 2)

In [43]:
import torch 
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from torch.optim import Adam 

x_train_tensor = torch.from_numpy(x_train).float()
y_train_tensor = torch.from_numpy(y_train).float()

dataset = TensorDataset(x_train_tensor, y_train_tensor)
dataloader = DataLoader(dataset, shuffle=True)

class LSTM(nn.Module):
    
    def __init__(self, input_size=2, hidden_size=64, num_layers=2):
        
        super().__init__()
        
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, input):
        h0 = torch.zeros(self.num_layers, input.size(0), self.hidden_size).to(input.device)
        c0 = torch.zeros(self.num_layers, input.size(0), self.hidden_size).to(input.device)
        
        out, _ = self.lstm(input, (h0, c0))
        
        out = self.fc(out[:, -1, :])
        
        return out

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LSTM().to(device)
loss_func = nn.MSELoss()
optimizer = Adam(model.parameters(), lr=0.01)
        


max_epochs = 100
for epoch in range(max_epochs):
    model.train()
    epoch_loss = 0 
    
    for x_batch, y_batch in dataloader:
        
        optimizer.zero_grad()
        
        output = model(x_batch)
        
        loss = loss_func(output, y_batch)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
    
    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {epoch_loss:.4f}")
    

x_test_tensor = torch.from_numpy(x_test).float()
y_test_tensor = torch.from_numpy(y_test).float()


model.eval()

with torch.no_grad():
    prediction = model(x_test_tensor)
    predicted_price = prediction.detach().numpy()
    #predicted = scaler.inverse_transform(predicted_price)
    
    dummy_sentiment = np.zeros((predicted_price.shape[0], 1))
    reconstructed_input = np.hstack([predicted_price, dummy_sentiment])  # shape (n, 2)

# Inverse transform both columns
    inversed = scaler.inverse_transform(reconstructed_input)

# Extract only the Close price (first column)
    actual_predicted_close = inversed[:, 0]

print(actual_predicted_close[-1])


#print(predicted[-1])
    

Epoch 0, Loss: 0.6821
Epoch 10, Loss: 0.4174
Epoch 20, Loss: 0.1004
Epoch 30, Loss: 0.0974
Epoch 40, Loss: 0.0427
Epoch 50, Loss: 0.0973
Epoch 60, Loss: 0.0117
Epoch 70, Loss: 0.0090
Epoch 80, Loss: 0.0189
Epoch 90, Loss: 0.0034
124.07203115969897
