In [5]:
import pandas as pd
from os.path import exists
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import datetime as dt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import pandas_datareader.data as web
from datetime import date
import warnings
import tensorflow as tf 
from ta import add_all_ta_features
from sklearn.feature_selection import SelectKBest, f_regression
import json
warnings.filterwarnings('ignore')
plt.style.use('ggplot')

In [6]:
global indicators
indicators = ['momentum_tsi', 'volatility_bbw', 'trend_aroon_down',
              'trend_aroon_ind', 'momentum_ppo', 'volatility_dcw',
              'momentum_stoch_signal', 'volume_em', 'momentum_stoch_rsi_k',
              'volume_cmf', 'trend_psar_down_indicator',
              'trend_psar_up_indicator', 'volume_mfi', 'trend_vortex_ind_pos',
              'momentum_ppo_hist', 'trend_vortex_ind_neg',
              'trend_vortex_ind_diff', 'trend_adx_neg', 'volume_fi',
              'volatility_bbli', 'volatility_bbhi', 'trend_adx_pos',
              'momentum_roc', 'momentum_rsi', 'volatility_kchi',
              'volatility_kcli', 'volatility_dcp', 'others_dr', 'momentum_wr',
              'momentum_stoch', 'volatility_bbp', 'trend_cci',
              'momentum_stoch_rsi', 'volatility_kcp', 'volume_vpt', 'others_dlr']

In [50]:
class StockPredictor:
    def __init__(self):
        global indicators
        self.layer_sizes = [128,64,32,1] 
        #self.input_shape = 36
        #self.model = self.create_model()
        self.start = dt.datetime(1990,1,1)
        self.end = date.today()
        self.ticker = None
        self.indicators = indicators 

    def create_model(self):
        # create model
        model = tf.keras.models.Sequential()
        # First layer inputs
        model.add(tf.keras.Input(shape=(self.X_train.shape[1],)))
        for layer_size in self.layer_sizes[:-1]:
            model.add(tf.keras.layers.Dense(layer_size, activation="relu"))

        for layer_size in self.layer_sizes[-1:]:
            model.add(tf.keras.layers.Dense(layer_size))

        model.compile(loss='mean_squared_error', 
                      optimizer='adam', 
                      metrics=['mae','mse','accuracy'])
        # print summary to undertstand your neural network flow
        #model.summary()
        return model

    def load_csv(self):
        return pd.read_csv(f'{self.ticker}.csv')

    def prepare_test_train(self):
        self.X, self.y = self.final_df.drop('prediction', axis=1), self.final_df['prediction']
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=77)
        scaler = StandardScaler()
        self.X_train = scaler.fit_transform(self.X_train)
        self.X_test = scaler.transform(self.X_test)

    def get_data(self):
        # Prepareing the data for a single stock
        data = web.DataReader(self.ticker,'yahoo', self.start, self.end)
        self.df = data
        format = '%d/%m/%Y'
        self.df.reset_index(inplace=True)
        self.df['Datetime'] = pd.to_datetime(self.df['Date'], format=format)
        self.df = self.df.set_index(pd.DatetimeIndex(self.df['Datetime']))
        self.df.drop(['Datetime','Date'], axis=1, inplace=True)
        #self.df.to_csv(f'datasets/{self.ticker}.csv')

    def get_indicatore(self, data):
        data = add_all_ta_features(data, open  = "Open", 
                                   high  = "High",
                                   low   = 'Low',
                                   close = 'Close', 
                                   volume= 'Volume',
                                   fillna= True)
        return data

    def get_all_indicators(self):
        data = self.df.copy()
        data.sort_index(ascending=False,inplace=True)
        self.indicators_data = self.get_indicatore(data)
        self.indicators_data.drop(['High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close'],axis=1, inplace=True)
        #self.indicators_data.to_csv(f'datasets/indicators_{self.ticker}.csv')
    
    def prepare_prediction(self):
        # Add prediction to data
        self.processed_data = self.df.copy()
        self.processed_data.sort_index(ascending=False,inplace=True)
        self.processed_data['change'] = 1-self.processed_data['Adj Close'].shift(-1)/self.processed_data['Adj Close']
        self.processed_data['prediction'] = self.processed_data['change'].shift(1)
        self.processed_data.drop(['change'], axis=1,inplace=True)
        self.processed_data.dropna(inplace=True)
        #self.processed_data.to_csv(f'datasets/processed_{self.ticker}.csv')
 
    def get_new_observ(self):
        scaler = StandardScaler()
        self.get_all_indicators()
        new_observ = self.indicators_data[:1][self.indicators].values
        new_observ = scaler.fit_transform(new_observ)
        return new_observ

    def get_most_important_features(self, data):
        #UNIVARIATE SELECTION
        # Feature Extraction with Univariate Statistical Tests (f_clfression)
        # load data
        X = data.drop('prediction',axis=1)
        y = data['prediction']
        names = pd.DataFrame(X.columns)
        model = SelectKBest(score_func=f_regression, k=4)
        results = model.fit(X, y)
        results_df=pd.DataFrame(results.scores_)
        #Concat and name columns
        scored=pd.concat([names,results_df], axis=1)
        scored.columns = ["Feature", "Score"]
        scored.sort_values(by=['Score'],ascending=False)
        final_columns = scored[scored.Score>0]
        return final_columns.sort_values(by=['Score'],ascending=True)[50:]['Feature'].values
        
    def prepare_data_and_indicators(self):
        #print('Creating raw data')
        self.get_data()
        # Prepare_indicators override the df, so we will pass a copy of it
        # To save the original df
        #print("creating y prediction columns")
        self.prepare_prediction()
        #print("creating indicators")
        self.get_all_indicators()
        #print("geting most importent features")
        data = pd.concat([self.indicators_data[1:], self.processed_data['prediction']],axis=1)
        #self.indicators = self.get_most_important_features(data)
        # Need to set indicators to fix set of columns
        # otherwise it is changing and doesnt fit into last weights config saved file.
        #print("Creating final df")
        self.final_df = pd.concat([self.indicators_data[self.indicators], self.processed_data['prediction']],axis=1)
        self.final_df.dropna(inplace=True)
   
    def create_new_weights(self):
        # Build the model and create new weights
        self.model = self.create_model()
        self.model.fit(self.X_train,
                       self.y_train,
                       validation_data=(self.X_test , self.y_test),
                       batch_size=50, 
                       epochs=50)
        self.save_model_weights()
        

    def prepare_test_train(self):
        self.X, self.y = self.final_df.drop('prediction', axis=1), self.final_df['prediction']
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=77)
        scaler = StandardScaler()
        self.X_train = scaler.fit_transform(self.X_train)
        self.X_test = scaler.transform(self.X_test)

    def accuracy_result(self):
        y_pred = self.model.predict(self.X_test)
        return r2_score(self.y_test, y_pred)

    def predict_tomorrow(self, ticker, override=False):
        self.ticker = ticker
        self.prepare_data_and_indicators()
        self.prepare_test_train()
        if not exists(f'weights/{self.ticker}_ANN_Model_weights.index') or override:
            print(f"Creating new weights for {self.ticker} Model")
            self.create_new_weights()
        else:
            print(f"Loading an exsisting weights for {self.ticker} Model")
            self.model = self.create_model()
            self.load_model_weights()
        print(f"Making a prediction for {ticker}")
        new_observ = self.get_new_observ()
        new_prediction = self.model.predict(new_observ)
        # today_price/(1 - prediction) = tomorrow_price
        accuracy = self.accuracy_result()
        y_price_tomorrow = self.df['Close'][-1:].values/(1-new_prediction)
        #print(f"Prediction: {new_prediction}, price: {y_price_tomorrow}, Accuracy: {accuracy}")
        day_month = (df.index[:1][0].day, df.index[:1][0].month)
        return day_month ,y_price_tomorrow, new_prediction, accuracy

    def save_model_config_weights(self):
        # Save model to file
        self.model.save(f'models/{self.model.__class__.__name__}_config_weights')

    def save_model_weights(self):
        # Save weights for a model
        self.model.save_weights(f'weights/{self.ticker}_ANN_Model_weights')

    def save_model_json(self):
        # Safve a model as a json file
        model_json = self.model.to_json()
        with open(f'models/{self.model.__class__.__name__}_config.json', 'w') as outfile: 
            outfile.write(model_json)

    def load_model_config_weights(self, path):
        # Load model weights + confivuration
        return tf.keras.models.load_model(path)

    def load_model_weights(self):
        # Load weight for a model
        self.model.load_weights(f'weights/{self.ticker}_ANN_Model_weights')

    def load_model_json(self, path):
        # Load a json file for a model
        with open(path, 'r') as infile: 
            model_json = json.load(infile)   
            return tf.keras.models.model_from_json(json.dumps(model_json))



In [51]:
if __name__ == '__main__':
    arr = []
    result = {}
    stock_predictor = StockPredictor()
    
    stocks = ['QQQ', 'AAPL', 'NVDA', 'NIO', 'TSLA', 'MSFT', 'AMD', 'QCOM', 'INTC', 'IBM']
    for stock in stocks:
        day_m, price, change, accuracy = stock_predictor.predict_tomorrow(stock, override=False)
        result[stock] = {'stock': stock, 'date': day_m,
                         'price': price, 'change': change, 
                         'accuracy': accuracy}
    
    for stock in (result.keys()):
        item = [stock, 
                result[stock]['date'],
                result[stock]['price'][0][0], 
                result[stock]['change'][0][0], 
                result[stock]['accuracy']]
        arr.append(item)
    print("Saving data to file .csv")
    data = np.array(arr)
    score_df = pd.DataFrame(data, columns=['Stock', 'Date', 'Price', 'Change', 'Accuracy'])
    score_df.to_csv(f'results/{date.today()}.csv')
    print("Finished running programm")

Loading an exsisting weights for QQQ Model
Making a prediction for QQQ
(13, 9)
Loading an exsisting weights for AAPL Model
Making a prediction for AAPL
(13, 9)
Loading an exsisting weights for NVDA Model
Making a prediction for NVDA
(13, 9)
Loading an exsisting weights for NIO Model
Making a prediction for NIO
(13, 9)
Loading an exsisting weights for TSLA Model
Making a prediction for TSLA
(13, 9)
Loading an exsisting weights for MSFT Model
Making a prediction for MSFT
(13, 9)
Loading an exsisting weights for AMD Model
Making a prediction for AMD
(13, 9)
Loading an exsisting weights for QCOM Model
Making a prediction for QCOM
(13, 9)
Loading an exsisting weights for INTC Model
Making a prediction for INTC
(13, 9)
Loading an exsisting weights for IBM Model
Making a prediction for IBM
(13, 9)
Saving data to file .csv
Finished running programm
