In [1]:
import pandas as pd 
import matplotlib.pyplot as plt 
import numpy as np
import yfinance as yf
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.arima_model import ARMA
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from tqdm.notebook import tqdm
import warnings
warnings.filterwarnings('ignore')

  from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,
  from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,


In [2]:
class StockPrediction():
    def __init__(self,stock_symbol="SPY",interval='1d'):
        self.stock_symbol = stock_symbol
        self.interval = interval
        self.df = self.download_ticker(self.stock_symbol,self.interval)
        self.train = None
        self.test = None
        self.d= 0

        # baseline model
        self.baseline_history = []
        self.baseline_pred = []
        self.baseline_RMSE = None
        
        # model
        self.model = None
        self.pred = []
        self.history = []
        
               
    def download_ticker(self, ticker, interval):
        """params: 
            ticker: default is SPY
            returned_cols: (list) default is Adj Close
            interval: one of 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo, default is 1d
            returns: df with ticker data index is timeseries"""
        return yf.download(ticker, interval=interval)
    
    def train_test_split(self, col="Adj Close",diff=0, pct_change=False, train_size=.99):
        """Splits dataframe into train and test according to train_size"""
        self.d = diff
        self.original_price = col
        ind_split=int(np.floor(len(self.df.index)*train_size))
        # Train
        self.train= self.df.iloc[:ind_split][col] if self.d==0 else \
            (self.df.iloc[:ind_split][col].diff(self.d).dropna() if not pct_change else \
             self.df.iloc[:ind_split][col].pct_change(self.d).dropna()*100)
        # test
        self.test = self.df.iloc[ind_split:][col] if self.d==0 else \
            (self.df.iloc[ind_split:][col].diff(self.d).dropna() if not pct_change else \
             self.df.iloc[ind_split:][col].pct_change(self.d).dropna()*100)
        print("train, test split success")
            
    def baseline_predict_all(self,p=1,q=1,diff=False):
        self.baseline_pred = []
        self.baseline_history = self.train.to_list()
        
        for t in tqdm(range(len(self.test))):
            bsl_model = ARIMA(self.baseline_history, order = (p, q if not diff else 0 ,q))
            bsl_fit = bsl_model.fit()
            self.baseline_pred.append(bsl_fit.forecast()[0])
            self.baseline_history.append(self.test[t])
        self.baseline_RMSE = np.sqrt(((self.test.to_numpy() - np.array(self.baseline_pred))**2).sum()/len(self.test.index))
        
    def baseline_predict(self, scope=1, p=4, q=1):
        baseline_model = ARIMA(pd.concat([self.train,self.test]), order=(p, q if not diff else 0, q))
        bsl_fit = baseline_model.fit()
        return pd.Series(bsl_fit.forecast(steps = scope))
    
    def plot_baseline_performance(self):
        plt.figure(figsize=(12,8))
        ax = self.test.plot(label="y_true")
        pd.Series(self.baseline_pred,index=self.test.index).plot(label="y_pred_baseline", ax = ax)
        ax.grid(True, which="both")
        ax.legend()
        ax.set_title(f"RMSE_Baseline: {self.baseline_RMSE:.4f}")
    
    def profiling(self,cols=["Adj Close"]):
        """Automatic EDA"""
        # head
        print("The head of the DataFrame:")
        display(self.df.head())
        print("\n")
        
        # shape
        print("The shape of the DataFrame:")
        display(self.df.shape)
        print("\n")
        
        # index
        print("The time stamp for each record:")
        display(self.df.index)
        print("\n")
        
        # plot the distribution of the day
        print("The distribution of the weekday recorded:")
        sns.countplot(x=self.df.index.strftime('%A'))
        plt.show()
        print("\n")
        
        # gap
        print("The start date and the current end date for the data:")
        display(self.df.index[0],self.df.index[-1])
        print("\n")
        
        # nan check
        print("If there is missing values in the data:")
        display(self.df.isna().sum())
        print("\n")
        
        # describe
        print("Describe the time series:")
        display(self.df.describe())
        print("\n")
        
        for col in cols:
            # plot time series values
            print("Plot the time series:")
            plt.figure(figsize=(12,8))
            self.plot(col)
            plt.title = f"{col} Price"
            plt.show()
            
            # plot diff one day
            print("Plot the change in the time series:")
            plt.figure(figsize=(12,8))
            self.df[col].diff().plot()
            plt.title = f"{col} Price Change"
            plt.show()
            # plot diff acf
            print(f"Plot the Auto-Correlation Function of {col} Price Change:")
            self.plot_acf(col)
            plt.show()
            # plot diff pacf
            print(f"Plot the Partial Auto-Correlation Function of {col} Price Change:")
            self.plot_pacf(col)
            plt.show()
            
    def plot(self,col="Adj Close"):
        self.df[col].plot()
        
    def plot_acf(self,col="Adj Close"):
        plot_acf(self.df[col].diff().dropna())
        
    def plot_pacf(self,col="Adj Close"):
        plot_pacf(self.df[col].diff().dropna())
    

In [3]:
class InvestMoney2():
    def __init__(self,date_price,predictions,prediction_type = 0):
        self.table = None
        self.date_price = date_price
        self.predictions = predictions
        self.original_price = "Adj Close"
        self.original_diffs = "Original diffs"
        self.original_persent = "Original % change"
        self.prediction_price = "Prediction (price)"
        self.prediction_diffs = "Prediction (diffs)"
        self.prediction_persent = "Prediction (% change)"
        self.prediction_recommendations = "Prediction recommendations"
        self.money_at_end_day = "money_at_end_day"
        self.money_cash = "money_cash"
        self.number_stocks = "number_stocks"
        self.money_day_benefits = "money_day_benefits"
        self.type_predictions_list = ["type_price", "type_diffs", "type_persents"]
        self.prediction_type = self.type_predictions_list[prediction_type]
        self.start_capital = 1000
        
    def init_table(self):
        date_price = self.date_price
        predictions = self.predictions
        prediction_type = self.prediction_type
        df = date_price.copy()
        df[self.original_diffs] = df[self.original_price].diff().copy()
        df[self.original_persent] = 0.0
        df[self.prediction_price] = 0.0
        df[self.prediction_diffs] = 0.0
        df[self.prediction_persent] = 0.0
        df[self.prediction_recommendations] = 0.0

        df[self.money_at_end_day] = 0.0
        df[self.money_cash] = 0.0
        df[self.number_stocks] = 0
        df[self.money_day_benefits] = 0.0
        self.table = df
        self._init_table_predictions()
        return self.table
    
    def _init_table_predictions(self):
        df = self.table
        predictions = self.predictions
        prediction_type = self.prediction_type
    
        if prediction_type == self.type_predictions_list[0]:
            df[self.prediction_price] = predictions.copy()
            df[self.prediction_diffs] = df[self.prediction_price].diff()
            self.table = self._calculate_persent_by_price(df)
            

        elif prediction_type == self.type_predictions_list[1]:
            df[self.prediction_price] = 0.0
            df[self.prediction_diffs] = predictions.copy()
            self.table=self._calculate_persent_by_price(df)

        elif prediction_type == self.type_predictions_list[2]:
            df[self.prediction_price] = 0.0
            df = self._calculate_diff_by_persent(df)
            df[self.prediction_persent] = predictions.copy()
            self.table = df
        
        self.table = self._calculate_recommendation(self.table)
        self.table = self._calculate_money_end_day(self.table)
        return self.table

    def _calculate_persent_by_price(self,df):
        for i in range (len(df)):
            df[self.prediction_persent][i] = df[self.prediction_diffs][i]/df[self.original_price][i]* 100
        return df

    def _calculate_diff_by_persent(self,df):
        for i in range (len(df)):
            df[self.prediction_diffs][i] = df[self.prediction_persent][i]*df[self.original_price][i]/ 100
        return df

    def _calculate_recommendation(self,df):
        df[self.prediction_recommendations] = df[self.prediction_diffs] > 0   
        return df

    def _calculate_money_end_day(self,df):
        df[self.money_at_end_day][0] = self.start_capital    
        for i in range (len(df) - 1):
            df[self.original_persent][i+1] = df[self.original_diffs][i+1]/df[self.original_price][i]* 100
            if (df[self.prediction_diffs][i+1] > 0):
                n_stocks = df[self.money_at_end_day][i]//df[self.original_price][i]
                cash = df[self.money_at_end_day][i] - n_stocks * df[self.original_price][i]
                df[self.money_at_end_day][i+1] = n_stocks * df[self.original_price][i+1] + cash
                print("Buy!", df[self.money_at_end_day][i], df[self.money_at_end_day][i+1])
                df[self.number_stocks][i] = n_stocks
                df[self.money_cash][i] = cash

            else:
                df[self.number_stocks][i] = df[self.number_stocks][i-1]
                df [self.money_cash][i] = df [self.money_cash][i-1]
                df[self.money_at_end_day][i+1] = df[self.money_at_end_day][i]
                print("Do not buy!", df[self.money_at_end_day][i], df[self.money_at_end_day][i+1])

            df[self.money_day_benefits][i+1] = df[self.money_at_end_day][i+1] - df[self.money_at_end_day][i]
        return df


In [4]:
sp = StockPrediction()
sp.train_test_split(diff=0,pct_change=False)
sp.baseline_predict_all()

[*********************100%***********************]  1 of 1 completed
train, test split success


  0%|          | 0/75 [00:00<?, ?it/s]

In [5]:
ivm = InvestMoney2(pd.DataFrame(sp.test),sp.baseline_pred)
ivm.init_table()

Do not buy! 1000.0 1000.0
Do not buy! 1000.0 1000.0
Do not buy! 1000.0 1000.0
Buy! 1000.0 996.1685180664062
Do not buy! 996.1685180664062 996.1685180664062
Do not buy! 996.1685180664062 996.1685180664062
Do not buy! 996.1685180664062 996.1685180664062
Buy! 996.1685180664062 1014.9288940429688
Buy! 1014.9288940429688 1025.7484741210938
Buy! 1025.7484741210938 1035.3389892578125
Buy! 1035.3389892578125 1035.0801391601562
Buy! 1035.0801391601562 1045.4353637695312
Buy! 1045.4353637695312 1033.9051513671875
Do not buy! 1033.9051513671875 1033.9051513671875
Buy! 1033.9051513671875 1038.2862548828125
Buy! 1038.2862548828125 1044.6985473632812
Buy! 1044.6985473632812 1055.9299926757812
Buy! 1055.9299926757812 1050.2545776367188
Do not buy! 1050.2545776367188 1050.2545776367188
Do not buy! 1050.2545776367188 1050.2545776367188
Buy! 1050.2545776367188 1057.9811401367188
Buy! 1057.9811401367188 1046.4907836914062
Do not buy! 1046.4907836914062 1046.4907836914062
Do not buy! 1046.4907836914062 10

Unnamed: 0_level_0,Adj Close,Original diffs,Original % change,Prediction (price),Prediction (diffs),Prediction (% change),Prediction recommendations,money_at_end_day,money_cash,number_stocks,money_day_benefits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-03-04,428.978516,,0.000000,433.011119,,,False,1000.000000,0.000000,0,0.000000
2022-03-07,416.332581,-12.645935,-2.947918,429.265946,-3.745172,-0.899563,False,1000.000000,0.000000,0,0.000000
2022-03-08,413.176086,-3.156494,-0.758166,417.713552,-11.552394,-2.795998,False,1000.000000,0.000000,0,0.000000
2022-03-09,424.253662,11.077576,2.681079,413.182324,-4.531228,-1.068047,False,1000.000000,151.492676,2,0.000000
2022-03-10,422.337921,-1.915741,-0.451556,422.991028,9.808704,2.322478,True,996.168518,151.492676,2,-3.831482
...,...,...,...,...,...,...,...,...,...,...,...
2022-06-14,372.261932,-1.125153,-0.301337,374.616220,-14.519138,-3.900248,False,938.696350,119.793762,2,0.000000
2022-06-15,377.569031,5.307098,1.425636,372.025762,-2.590458,-0.686089,False,938.696350,183.558289,2,0.000000
2022-06-16,365.072998,-12.496033,-3.309602,377.101477,5.075714,1.390329,True,913.704285,183.558289,2,-24.992065
2022-06-17,365.859985,0.786987,0.215570,366.486066,-10.615411,-2.901495,False,913.704285,183.558289,2,0.000000


In [6]:
sp = StockPrediction()
sp.train_test_split(diff=1,pct_change=False)
sp.baseline_predict_all()

[*********************100%***********************]  1 of 1 completed
train, test split success


  0%|          | 0/74 [00:00<?, ?it/s]

In [7]:
ivm = InvestMoney2(pd.DataFrame(sp.test),sp.baseline_pred,prediction_type=1)
ivm.init_table()

Buy! 1000.0 240.8447265625
Buy! 240.8447265625 -855.1786499023438
Do not buy! -855.1786499023438 -855.1786499023438
Buy! -855.1786499023438 -2395.80322265625
Buy! -2395.80322265625 -1359.8685913085938
Buy! -1359.8685913085938 4058.8653564453125
Do not buy! 4058.8653564453125 4058.8653564453125
Do not buy! 4058.8653564453125 4058.8653564453125
Do not buy! 4058.8653564453125 4058.8653564453125
Do not buy! 4058.8653564453125 4058.8653564453125
Buy! 4058.8653564453125 -162375.1330871582
Do not buy! -162375.1330871582 -162375.1330871582
Buy! -162375.1330871582 187612.46731567383
Do not buy! 187612.46731567383 187612.46731567383
Do not buy! 187612.46731567383 187612.46731567383
Do not buy! 187612.46731567383 187612.46731567383
Do not buy! 187612.46731567383 187612.46731567383
Buy! 187612.46731567383 464761.5919189453
Buy! 464761.5919189453 -84266.50408935547
Buy! -84266.50408935547 -255431.17150878906
Do not buy! -255431.17150878906 -255431.17150878906
Buy! -255431.17150878906 -199652.649780

Unnamed: 0_level_0,Adj Close,Original diffs,Original % change,Prediction (price),Prediction (diffs),Prediction (% change),Prediction recommendations,money_at_end_day,money_cash,number_stocks,money_day_benefits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-03-07,-12.645935,,0.000000,0.0,0.424870,-3.359738,True,1000.000000,-11.674805,-80,0.000000e+00
2022-03-08,-3.156494,9.489441,-75.039456,0.0,1.643493,-52.067048,True,240.844727,-2.205322,-77,-7.591553e+02
2022-03-09,11.077576,14.234070,-450.945549,0.0,0.506698,4.574083,True,-855.178650,-2.205322,-77,-1.096023e+03
2022-03-10,-1.915741,-12.993317,-117.293865,0.0,-1.141591,59.590059,False,-855.178650,-0.758179,446,0.000000e+00
2022-03-11,-5.370056,-3.454315,180.312226,0.0,0.379114,-7.059781,True,-2395.803223,-0.758179,446,-1.540625e+03
...,...,...,...,...,...,...,...,...,...,...,...
2022-06-14,-1.125153,13.611176,-92.364770,0.0,1.617171,-143.728996,True,-128386.041351,-0.505310,114105,1.553103e+06
2022-06-15,5.307098,6.432251,-571.678104,0.0,0.201029,3.787935,True,605565.956329,-0.505310,114105,7.339520e+05
2022-06-16,-12.496033,-17.803131,-335.458848,0.0,-0.453946,3.632724,False,605565.956329,-4.285065,-48461,0.000000e+00
2022-06-17,0.786987,13.283020,-106.297897,0.0,1.391333,176.792332,True,-38142.476837,-4.285065,-48461,-6.437084e+05
