In [None]:
import yfinance as yf
from imblearn.over_sampling import SMOTE
from imblearn.ensemble import BalancedRandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report, precision_score
import numpy as np
import pandas as pd

# Step 1: Load data from Yahoo Finance
ticker = 'AAPL'  # Replace with desired ticker symbol, also tried ^GSPC and AAPL and SPY. AAPL gets 0.68, ^GSPC gets 0.58, and SPY gets 0.56
start_date = '2020-01-01'
end_date = '2023-01-01'

data = yf.download(ticker, start=start_date, end=end_date)
data['Return'] = data['Close'].pct_change()  # Calculate daily returns
data['Target'] = (data['Return'] > 0).astype(int)  # 1 if positive return, 0 otherwise

# Step 2: Feature engineering
features = ['Open', 'High', 'Low', 'Close', 'Volume']  # Basic features

data = data.dropna()  # Drop any rows with NaN values
X = data[features]
y = data['Target']

# Step 3: Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Apply SMOTE for oversampling the minority class in the training set
smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

# Step 5: Initialize and train the BalancedRandomForestClassifier
brf = BalancedRandomForestClassifier(n_estimators=100, max_depth=20,
                                     min_samples_split=100, random_state=42)
brf.fit(X_train_res, y_train_res)

# Step 6: Make predictions and evaluate the model
y_pred = brf.predict(X_test)

print("Classification Report:")
print(classification_report(y_test, y_pred))

# Step 7: Cross-validation for stability check
cv_scores = cross_val_score(brf, X_train_res, y_train_res, cv=5, scoring='precision')
print(f"Cross-Validation Precision Scores: {cv_scores}")
print(f"Mean CV Precision Score: {np.mean(cv_scores)}")


[*********************100%***********************]  1 of 1 completed
  warn(
  warn(
  warn(


Classification Report:
              precision    recall  f1-score   support

           0       0.68      0.62      0.65        73
           1       0.67      0.73      0.70        78

    accuracy                           0.68       151
   macro avg       0.68      0.67      0.67       151
weighted avg       0.68      0.68      0.67       151



  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Cross-Validation Precision Scores: [0.50704225 0.57352941 0.61666667 0.57575758 0.52857143]
Mean CV Precision Score: 0.5603134672563007
