In [356]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.layers import LSTM
from keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from keras.layers import Dense
import yfinance as yf


In [None]:
class ModelLstm:

    def __init__(self):
        pass

    # import data
    def get_data(self):
        df = pd.read_csv('../../long_short_local/raw_data/cleaned_data_2y.csv')
        return df


    # split data into train and validation
    def split_train_val(self, df):
        length_data = len(df)
        split_ratio = 0.7           # %70 train + %30 validation
        length_train = round(length_data * split_ratio)
        length_validation = length_data - length_train
        #print("Data length :", length_data)
        #print("Train data length:", length_train)
        #print("Validation data lenth:", length_validation)

        train_data = df[:length_train].iloc[:,:2]
        train_data['Date'] = pd.to_datetime(train_data['Date'])  # converting to date time object

        validation_data = df[length_train:].iloc[:,:2]
        validation_data['Date'] = pd.to_datetime(validation_data['Date'])  # converting to date time object

        return train_data, validation_data, length_train, length_validation


    # create train dataset from train split
    def train_split(self, train_data):
        dataset_train = train_data.iloc[:, 1].values
        # Change 1d array to 2d array
        # Changing shape from (1692,) to (1692,1)
        dataset_train = np.reshape(dataset_train, (-1,1))
        #dataset_train.shape
        dataset_train_scaled = dataset_train

        return dataset_train_scaled


    # create X_train and y_train from train data
    def create_x_y_train(self, df, length_train):
        X_train = []
        y_train = []

        time_step = 20 #change that?

        for i in range(time_step, length_train):
            X_train.append(df[i-time_step:i,0:1])
            y_train.append(df[i,0:1])

        # convert list to array
        X_train, y_train = np.array(X_train), np.array(y_train)

        X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1],1))
        y_train = np.reshape(y_train, (y_train.shape[0],1))

        X_train = X_train[:int(X_train.shape[0]*0.95)]
        X_val = X_train[int(X_train.shape[0]*0.95):]
        y_train = y_train[:int(y_train.shape[0]*0.95)]
        y_val = y_train[int(y_train.shape[0]*0.95):]

        return X_train, X_val, y_train, y_val, time_step


    # create test dataset from validation data
    # R
    #Converting array and scaling
    def create_x_y_test(self, validation_data, length_validation, time_step):
        dataset_validation = validation_data.iloc[:,1].values  # getting "Ratio" column and converting to array
        dataset_validation = np.reshape(dataset_validation, (-1,1))  # converting 1D to 2D array
        #scaled_dataset_validation =  scaler.fit_transform(dataset_validation)  # scaling  values to between 0 and 1
        scaled_dataset_validation = dataset_validation

        X_test = []
        y_test = []

        for i in range(time_step, length_validation):
            X_test.append(scaled_dataset_validation[i-time_step:i,0])
            y_test.append(scaled_dataset_validation[i,0])

        # Converting to array
        X_test, y_test = np.array(X_test), np.array(y_test)
        X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1],1))  # reshape to 3D array
        y_test = np.reshape(y_test, (-1,1))  # reshape to 2D array

        return X_test, y_test


    # create LSTM model
    def generate_model(self, X_train, y_train, X_val, y_val, X_test, y_test):
        es = EarlyStopping(patience=20, restore_best_weights=True)
        model_lstm = Sequential()

        model_lstm.add(LSTM(20, return_sequences=False, input_shape=(X_train.shape[1],1))) #64 lstm neuron block
        model_lstm.add(Dense(32))
        model_lstm.add(Dense(1))

        model_lstm.compile(loss = "mape", optimizer = "rmsprop", metrics = ["mae", "mape"])
        history2 = model_lstm.fit(X_train, y_train, epochs = 400, batch_size = 64,validation_data = (X_val, y_val),callbacks=[es])
        mape = model_lstm.evaluate(X_test, y_test)
        predictions = model.predict(X_test)
        return mape[2]


    # implemenation refactored
    def run_model(self, df):

        mape_dict = {}

        for ratio in df:
        # split into train/test
            if ratio == 'Date':
                continue
            else:
                one_ratio_df = pd.DataFrame(df[['Date', ratio]])
                train_data, validation_data, length_train, length_validation = self.split_train_val(one_ratio_df)
                # call train_split
                dataset_train_scaled = self.train_split(train_data)
                # create X_train, y_train
                X_train, X_val, y_train, y_val, time_step = self.create_x_y_train(dataset_train_scaled, length_train)
                # create X_test, y_test
                X_test, y_test = self.create_x_y_test(validation_data, length_validation, time_step)
                # run LSTM model
                mape = self.generate_model(X_train, y_train, X_val, y_val, X_test, y_test)
                mape_dict[ratio] = round(mape, 3)

        mape_lstm = pd.DataFrame(mape_dict.items(), columns=['ratio', 'MAPE'])
        return mape_lstm


In [418]:
df = pd.read_csv('../raw_data/Arima_preds_mape.csv',index=False)
df

TypeError: read_csv() got an unexpected keyword argument 'index'

In [364]:
ratios_df = pd.read_excel('../raw_data/ratios.xlsx')

In [369]:
ratios_df.tail(30)

Unnamed: 0,Date,CMG_ETN,ROST_MMM,RE_ZTS,MCK_GLW,D_WBD,DVA_OMC,CARR_GOOGL,PGR_CTSH,RJF_MSFT,PKG_FIS
475,2022-07-28,10.578916,0.583795,1.408397,9.432495,5.173568,1.197559,0.345649,1.715386,0.3458,1.388685
476,2022-07-29,10.598762,0.571114,1.423062,9.351867,5.420702,1.20504,0.348435,1.699715,0.351496,1.37637
477,2022-08-01,10.616089,0.577939,1.395593,9.470187,5.315972,1.210068,0.353213,1.681538,0.355633,1.344177
478,2022-08-02,10.644866,0.566961,1.385576,9.339323,5.093067,1.25,0.350995,1.681742,0.35739,1.351502
479,2022-08-03,10.717376,0.593412,1.397085,9.019222,4.938394,1.206087,0.34519,1.676145,0.355162,1.33631
480,2022-08-04,10.711922,0.56929,1.435932,9.317085,4.703267,1.213513,0.353752,1.705549,0.354461,1.426323
481,2022-08-05,10.827513,0.575242,1.455381,9.746298,5.61314,1.240301,0.356517,1.727959,0.362367,1.388723
482,2022-08-08,11.02315,0.578419,1.472624,9.738492,5.757871,1.259718,0.357545,1.728511,0.364499,1.406709
483,2022-08-09,11.256256,0.575135,1.529066,10.091842,6.237195,1.268182,0.358055,1.763776,0.367906,1.41761
484,2022-08-10,11.164681,0.580797,1.509377,9.834795,6.219024,1.269488,0.358062,1.757903,0.367565,1.413496


In [375]:
def mean_absolute_percentage_error(y_true, predictions):
    '''Calculates MAPE for predictions'''
    y_true, y_pred = np.array(y_true), np.array(predictions)
    return np.mean(np.abs((y_true - predictions) / y_true)) * 100

In [392]:
columns = df.columns.drop('Date')
columns

Index(['CMG_ETN', 'ROST_MMM', 'RE_ZTS', 'MCK_GLW', 'D_WBD', 'DVA_OMC',
       'CARR_GOOGL', 'PGR_CTSH', 'RJF_MSFT', 'PKG_FIS'],
      dtype='object')

In [387]:
mape1 = {}
for columns in df:
    mape=mean_absolute_percentage_error(ratios_df[columns].iloc[-30],df[columns])
    print(columns,mape)
    mape1[columns] = mape
mape2 = pd.DataFrame(mape1,index=mape1.keys)

CMG_ETN 5.240811749764081
ROST_MMM 10.359166782031394
RE_ZTS 12.196108050451494
MCK_GLW 8.858523686338483
D_WBD 15.699334004093746
DVA_OMC 4.90748541902987
CARR_GOOGL 3.2891057727770123
PGR_CTSH 21.20570623610908
RJF_MSFT 3.263261190017688
PKG_FIS 6.625927646486324


TypeError: 'builtin_function_or_method' object is not iterable

In [413]:
def make_mape(self, preds_df, ratios_df):

        def mean_absolute_percentage_error(y_true, predictions):
            '''Calculates MAPE for predictions'''
            y_true, predictions = np.array(y_true), np.array(predictions)
            return np.mean(np.abs((y_true - predictions) / y_true)) * 100

        if 'Date' in preds_df.columns:
            columns = preds_df.columns.drop('Date')
        else:
            columns = preds_df.columns
        mape_dict = {}
        for columns in columns:
            mape = mean_absolute_percentage_error(ratios_df[columns].iloc[-30],
                                                  preds_df[columns])
            mape_dict[columns] = mape

        mape = pd.DataFrame(mape_dict.items(), columns=['ratio', 'MAPE'])
        return mape

In [414]:
mapes = make_mape(df,ratios_df)

In [415]:
mapes

Unnamed: 0,ratio,MAPE
0,CMG_ETN,5.240812
1,ROST_MMM,10.359167
2,RE_ZTS,12.196108
3,MCK_GLW,8.858524
4,D_WBD,15.699334
5,DVA_OMC,4.907485
6,CARR_GOOGL,3.289106
7,PGR_CTSH,21.205706
8,RJF_MSFT,3.263261
9,PKG_FIS,6.625928


In [403]:
mape = pd.DataFrame(mapes.items(), columns=['ratio', 'MAPE'])
mape

Unnamed: 0,ratio,MAPE
0,CMG_ETN,5.240812
1,ROST_MMM,10.359167
2,RE_ZTS,12.196108
3,MCK_GLW,8.858524
4,D_WBD,15.699334
5,DVA_OMC,4.907485
6,CARR_GOOGL,3.289106
7,PGR_CTSH,21.205706
8,RJF_MSFT,3.263261
9,PKG_FIS,6.625928


In [416]:
ratios_df

Unnamed: 0,Date,CMG_ETN,ROST_MMM,RE_ZTS,MCK_GLW,D_WBD,DVA_OMC,CARR_GOOGL,PGR_CTSH,RJF_MSFT,PKG_FIS
0,2020-09-08,13.506638,0.602499,1.329053,4.895305,3.251052,1.708033,0.378947,1.340740,0.238529,0.687761
1,2020-09-09,13.449365,0.592539,1.296271,4.776018,3.230514,1.795396,0.380226,1.365644,0.229566,0.681509
2,2020-09-10,13.527456,0.584031,1.266311,4.902900,3.140488,1.829054,0.372837,1.378674,0.234225,0.685586
3,2020-09-11,13.214654,0.570319,1.254798,4.882002,3.263059,1.820385,0.376656,1.334546,0.237897,0.700702
4,2020-09-14,12.823698,0.584702,1.257235,4.779497,3.176914,1.764097,0.377221,1.330864,0.243906,0.705164
...,...,...,...,...,...,...,...,...,...,...,...
500,2022-09-01,11.624631,0.702062,1.708488,10.741954,6.244125,1.304472,0.362584,1.949874,0.401536,1.505903
501,2022-09-02,11.773634,0.718372,1.745509,10.974962,6.265697,1.325083,0.366991,1.963902,0.404202,1.512838
502,2022-09-06,11.805430,0.743739,1.749554,11.003354,6.452890,1.338287,0.377305,1.968499,0.408292,1.523266
503,2022-09-07,11.897501,0.764330,1.754489,11.123370,6.576498,1.343383,0.379169,1.997801,0.409663,1.531353


In [455]:
df = pd.read_csv('../raw_data/Arima_actual_predictions.csv')

df


Unnamed: 0,Date,CMG_ETN,ROST_MMM,RE_ZTS,MCK_GLW,D_WBD,DVA_OMC,CARR_GOOGL,PGR_CTSH,RJF_MSFT,PKG_FIS
0,2022-09-09,11.962261,0.77331,1.809165,11.188054,6.627177,1.34177,0.388191,2.011887,0.416957,1.556247
1,2022-09-10,11.959198,0.773647,1.810111,11.20051,6.633858,1.339859,0.388205,2.013297,0.417305,1.557962
2,2022-09-11,11.956135,0.773985,1.811056,11.212966,6.640538,1.338436,0.388218,2.014618,0.417653,1.559677
3,2022-09-12,11.953072,0.774323,1.812002,11.225422,6.647218,1.337292,0.388231,2.015945,0.418002,1.561393
4,2022-09-13,11.950008,0.774661,1.812948,11.237878,6.653899,1.336306,0.388244,2.017272,0.41835,1.563108
5,2022-09-14,11.946945,0.774999,1.813893,11.250334,6.660579,1.335409,0.388258,2.018599,0.418699,1.564823
6,2022-09-15,11.943882,0.775336,1.814839,11.26279,6.66726,1.334564,0.388271,2.019926,0.419047,1.566539
7,2022-09-16,11.940819,0.775674,1.815785,11.275246,6.67394,1.333748,0.388284,2.021253,0.419395,1.568254
8,2022-09-17,11.937756,0.776012,1.816731,11.287702,6.68062,1.332949,0.388298,2.02258,0.419744,1.569969
9,2022-09-18,11.934693,0.77635,1.817676,11.300158,6.687301,1.332159,0.388311,2.023907,0.420092,1.571685


In [443]:
ratios_df['Date'].iloc[-1]

Timestamp('2022-09-08 00:00:00')

In [446]:
len(pd.date_range(ratios_df['Date'].iloc[-30],ratios_df['Date'].iloc[-1]))

43

In [None]:
last_value = df['City'].iat[-1]