## Loading

In [58]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [59]:
import helper as hp

import os
from abc import ABC, abstractmethod
from collections import defaultdict
import warnings

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import mplfinance as mpf
import numpy as np
import pandas as pd
import seaborn as sns
import xgboost as xgb
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, mean_squared_error, r2_score
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
from statsmodels.tsa.stattools import adfuller, kpss
from models import * 


current_path = os.getcwd()
random_state = hp.RANDOM_STATE

STOCKS = hp.STOCKS
START_DATE = hp.START_DATE
END_DATE = hp.END_DATE

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [53]:
# data fetching 

stock_data = {}
for stock in STOCKS:
    data_path = os.path.join(current_path, "data", f"{stock}_{START_DATE}_{END_DATE}.csv")
    data = pd.read_csv(data_path)
    stock_data[stock] = data
stock_data = hp.preprocess_stock_data(stock_data, STOCKS)

Data fetched for RIVN
Data fetched for BB
Data fetched for SOFI
Data fetched for GME
Data fetched for AMC
Data fetched for PLTR
Data fetched for TSLA
Data fetched for AAPL
Data fetched for MSFT
Data fetched for AMZN
Data fetched for GOOG
Data fetched for AMD
Data fetched for NVDA
Data fetched for QQQ
Data fetched for SPY
Data fetched for DIA
Data fetched for ^IRX


In [54]:
STOCKS

['RIVN',
 'BB',
 'SOFI',
 'GME',
 'AMC',
 'PLTR',
 'TSLA',
 'AAPL',
 'MSFT',
 'AMZN',
 'GOOG',
 'AMD',
 'NVDA',
 'QQQ',
 'SPY',
 'DIA',
 '^IRX']

get the prediction of all stock

In [75]:
folder_name = 'data_with_signal'
if not os.path.exists(folder_name):
    os.makedirs(folder_name)


lag = 5
window_size = 40
starting_funds = 50000
stationary = False
columns_to_return = ['Date'] + hp.ORIGINAL_PRICE_FEATURES 
loss_fn = 'reg:squarederror' #'reg:squaredlogerror'
params = {'n_estimators': 323, 
          'max_depth': 7, 
          'min_child_weight': 2, 
          'gamma': 0.10020565066030232, 
          'learning_rate': 0.08013623310286376, 
          'subsample': 0.9541002999199182, 
          'colsample_bytree': 0.659370350154071, 
          'reg_alpha': 0.029553047788818548, 
          'reg_lambda': 0.00021589152386430175
          } 

for stock in STOCKS: #STOCKS
    if stock in ['QQQ', 'SPY', 'DIA', '^IRX']:
        continue
    fromDate = START_DATE
    toDate = END_DATE
    print(stock)
    make_new_predictions = False
    file_path = os.path.join(folder_name, f'{stock}.csv')
    image_folder_name = "gallery"

    if os.path.exists(file_path):
        df_old = pd.read_csv(file_path)
        df_old['Date'] = pd.to_datetime(df_old['Date'])
        latest_date = df_old['Date'].max()

        if latest_date < pd.to_datetime(toDate):
            make_new_predictions = True
            latest_date_index = df_old[df_old['Date'] == latest_date].index[0]
            new_start_index = max(latest_date_index - 250, 0)
            fromDate = df_old.loc[new_start_index, 'Date']
        continue
    else:
        make_new_predictions = True

    if make_new_predictions:
        X, y, df_stock = hp.prepare_data(stock_data, stock, fromDate, toDate, lag, stationary=stationary)
        
        xgboost_model = XGBoost(loss_fn, params)
        xgboost_stock_predictor = StockPredictor(xgboost_model, window_size=window_size, stationary=stationary)
        xgboost_stock_predictor.fit_predict(X, y, df_stock)

        true_returns = xgboost_stock_predictor.true_returns
        predicted_returns = xgboost_stock_predictor.predicted_returns

        buys, sells, portfolio_value, portfolio_growth_percentage, dates, stock_prices = hp.trading_strategy(df_stock=df_stock, window_size=window_size, 
        true_returns=xgboost_stock_predictor.true_returns,
        predicted_returns=xgboost_stock_predictor.predicted_returns,
        starting_funds=50000)


        df_res = stock_data[stock].copy()
        df_res = df_res[columns_to_return] 
        df_res['buys'] = None
        df_res['sells'] = None
        df_res.loc[df_res.index[-len(buys):], 'buys'] = buys
        df_res.loc[df_res.index[-len(sells):], 'sells'] = sells

        df_res.to_csv(file_path, index=False)

        # fig, fig2 = hp.plot_signal_returns(buys, sells, portfolio_value, portfolio_growth_percentage, dates, stock_prices, display=False)
        # image_path = os.path.join(image_folder_name, f'{stock}.png')
        # fig2.savefig(image_path)

RIVN
BB
SOFI
GME
AMC
PLTR
TSLA
AAPL
MSFT
AMZN
GOOG
AMD
NVDA
