Cell 1: Install dependencies

In [None]:
!pip install pandas scikit-learn xgboost matplotlib joblib


Cell 2: Load the merged dataset

In [None]:
import pandas as pd

# Load your data
df = pd.read_csv('/content/merged_sentiment_price.csv')
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values('date')

df.head()


Cell 3: Feature engineering

In [None]:
# Binary target: 1 if next day price > today
df['next_close'] = df['close'].shift(-1)
df['target'] = (df['next_close'] > df['close']).astype(int)

# Features
df['price_change'] = df['close'].pct_change()
df['sentiment_rolling_mean'] = df['sentiment'].rolling(window=3).mean()
df['day_of_week'] = df['date'].dt.dayofweek
df = df.dropna()

# Define X and y
feature_cols = ['sentiment', 'sentiment_rolling_mean', 'price_change', 'day_of_week']
X = df[feature_cols]
y = df['target']

X.head()


Cell 4: Train/test split and model training

In [None]:
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import classification_report

X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.2)

model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


Cell 5: Save model as .pkl

In [None]:
import joblib

joblib.dump(model, 'xgb_model.pkl')
print("✅ Saved as xgb_model.pkl")


Cell 6: Download the model

In [None]:
from google.colab import files
files.download('xgb_model.pkl')
