Cell 1: Install dependencies

In [None]:
!pip install pandas scikit-learn matplotlib torch

Cell 2: Load and prep data

In [None]:
import pandas as pd

df = pd.read_csv('/content/merged_sentiment_price.csv')
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values('date')

# Label: 1 if next close > current
df['next_close'] = df['close'].shift(-1)
df['target'] = (df['next_close'] > df['close']).astype(int)

# Features
df['price_change'] = df['close'].pct_change()
df['sentiment_rolling_mean'] = df['sentiment'].rolling(window=3).mean()
df['day_of_week'] = df['date'].dt.dayofweek
df = df.dropna().reset_index(drop=True)

feature_cols = ['sentiment', 'sentiment_rolling_mean', 'price_change', 'day_of_week']


Cell 3: Create sequences for LSTM

In [None]:
import numpy as np

SEQ_LEN = 10  # Number of days in each sequence

def create_sequences(data, labels, seq_len):
    X, y = [], []
    for i in range(len(data) - seq_len):
        X.append(data[i:i+seq_len])
        y.append(labels[i+seq_len])
    return np.array(X), np.array(y)

X_raw = df[feature_cols].values
y_raw = df['target'].values

X, y = create_sequences(X_raw, y_raw, SEQ_LEN)

print("Shape:", X.shape, y.shape)


Cell 4: Train/test split and conversion to tensors

In [None]:
import torch
from torch.utils.data import TensorDataset, DataLoader

train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

train_ds = TensorDataset(X_train_tensor, y_train_tensor)
test_ds = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=32)


Cell 5: Define the LSTM model

In [None]:
import torch.nn as nn

class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim=2):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        _, (hn, _) = self.lstm(x)
        out = self.fc(hn[-1])
        return out

model = LSTMModel(input_dim=len(feature_cols), hidden_dim=64)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


Cell 6: Train the model

In [None]:
EPOCHS = 10
for epoch in range(EPOCHS):
    model.train()
    for xb, yb in train_loader:
        preds = model(xb)
        loss = loss_fn(preds, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f"Epoch {epoch+1}/{EPOCHS} - Loss: {loss.item():.4f}")


Cell 7: Evaluate on test set

In [None]:
from sklearn.metrics import classification_report

model.eval()
all_preds = []
all_targets = []

with torch.no_grad():
    for xb, yb in test_loader:
        preds = model(xb)
        pred_labels = preds.argmax(dim=1)
        all_preds.extend(pred_labels.cpu().numpy())
        all_targets.extend(yb.cpu().numpy())

print(classification_report(all_targets, all_preds))


Cell 8: Save the trained model

In [None]:
torch.save(model.state_dict(), "lstm_model.pt")
print("✅ Model saved as lstm_model.pt")


Cell 9: Download


In [None]:
from google.colab import files
files.download("lstm_model.pt")