# FBProphet without sentiment analysis

In [2]:
#Load modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
import math
import prophet as fbp

In [3]:
#Load datasets with compound value
df_shib = pd.read_csv('C:/Users/deann/Documents/University/Thesis/Thesis_git/datasets/SHIB_data.csv')
df_doge = pd.read_csv('C:/Users/deann/Documents/University/Thesis/Thesis_git/datasets/DOGE_data.csv')
df_mona = pd.read_csv('C:/Users/deann/Documents/University/Thesis/Thesis_git/datasets/MONA_data.csv')

In [3]:
#DOGE
df = df_doge
dates = df[['Date']]
input_feature = df[['Open', 'High', 'Close','Low', 'Adj Close']] 
input_feature = input_feature.to_numpy()

In [4]:
#DOGE with com
df = df_doge
dates = df[['Date']]
input_feature = df[['Open', 'High', 'Close','Low', 'Adj Close', 'com']] 
input_feature = input_feature.to_numpy()

In [43]:
#SHIB
df = df_shib
dates = df_shib[['Date']]
input_feature = df_shib[['Open', 'High', 'Close','Low', 'Adj Close']] #Ignores the comound value
input_feature = input_feature.to_numpy()

In [17]:
#SHIB with com
df = df_shib
dates = df[['Date']]
input_feature = df[['Open', 'High', 'Close','Low', 'Adj Close', 'com']] 
input_feature = input_feature.to_numpy()

In [60]:
#MONA
df = df_mona
dates = df_mona[['Date']]
input_feature = df_mona[['Open', 'High', 'Close','Low', 'Adj Close']] #Ignores the comound value
input_feature = input_feature.to_numpy()

In [31]:
#MONA with com
df = df_mona
dates = df[['Date']]
input_feature = df[['Open', 'High', 'Close','Low', 'Adj Close', 'com']] 
input_feature = input_feature.to_numpy()

In [32]:
#Function for creating the price percentage changes
def create_perc_change(data, steps):
    y_perc_change = []
    data_len = data.shape[0]
    for x in range(steps):
        y_perc_change.append(np.nan)
    for i in range(steps, data_len):
        if data[i-steps, 2] == 0:
            y_perc_change.append(0)
        else:
            perc_change = (data[i, 2] - data[i-steps, 2])/data[i-steps, 2]
            y_perc_change.append(perc_change)
    y_perc_change = np.array(y_perc_change)
    return y_perc_change

In [33]:
y_1 = create_perc_change(input_feature, 1)
y_7 = create_perc_change(input_feature, 7)
y_14 = create_perc_change(input_feature, 14)
y_30 = create_perc_change(input_feature, 30)
df['1_day'] = y_1
df['7_day'] = y_7
df['14_day'] = y_14
df['30_day'] = y_30

In [34]:
#Renaming the columns
df_1 = df.rename(columns={"Date": "ds", "1_day": "y"})[1:]
df_7 = df.rename(columns={"Date": "ds", "7_day": "y"})[7:]
df_14 = df.rename(columns={"Date": "ds", "14_day": "y"})[14:]
df_30 = df.rename(columns={"Date": "ds", "30_day": "y"})[30:]


In [35]:
#Splitting test and train sets
train_1, test_1 = df_1[:-int(0.15*len(df_1))], df_1[-int(0.15*len(df_1)):]
train_7, test_7 = df_7[:-int(0.15*len(df_7))], df_7[-int(0.15*len(df_7)):]
train_14, test_14 = df_14[:-int(0.15*len(df_14))], df_14[-int(0.15*len(df_14)):]
train_30, test_30 = df_30[:-int(0.15*len(df_30))], df_30[-int(0.15*len(df_30)):]

In [36]:
#defining the FBProphet model with added regressors
def fit_prophet(train):
    model = fbp.Prophet()
    model.add_regressor('High', standardize=False)
    model.add_regressor('Open', standardize=False)
    model.add_regressor('Low', standardize=False)
    model.add_regressor('Close', standardize=False)
    model.add_regressor('Adj Close', standardize=False)
    model.add_regressor('com', standardize=False) # Add when using the compound value
    model.fit(train)
    return model


In [37]:
#Fitting the four FBProphet models
model_1, model_7, model_14, model_30 = fit_prophet(train_1), fit_prophet(train_7), fit_prophet(train_14), fit_prophet(train_30)

00:58:18 - cmdstanpy - INFO - Chain [1] start processing
00:58:18 - cmdstanpy - INFO - Chain [1] done processing
00:58:18 - cmdstanpy - INFO - Chain [1] start processing
00:58:18 - cmdstanpy - INFO - Chain [1] done processing
00:58:19 - cmdstanpy - INFO - Chain [1] start processing
00:58:19 - cmdstanpy - INFO - Chain [1] done processing
00:58:19 - cmdstanpy - INFO - Chain [1] start processing
00:58:19 - cmdstanpy - INFO - Chain [1] done processing


In [38]:
#Define a function for predicting 1, 7, 14, 30 days with the use of the fitted models
def predict_model(test_set, n_days_dataset, dates):
    ranges = [x for x in range(-len(test_set[0]),0 , n_days_dataset[0])]
    preds = pd.DataFrame({})
    pred_yhat_low, pred_yhat_upp, pred_yhat = [], [], []
    trues = []
    dates_preds = []
    for pred in range(len(ranges)):
        if ranges[pred] == ranges[-1]:
            break

        else:
            predict = test_set[1].predict(test_set[0][ranges[pred]:ranges[pred]+n_days_dataset[0]])
            pred_yhat_low.append(predict['yhat_lower'][-1:])
            pred_yhat_upp.append(predict['yhat_upper'][-1:])
            pred_yhat.append(predict['yhat'][-1:])
            t = n_days_dataset[1]['y'][ranges[pred]+n_days_dataset[0]:ranges[pred]+n_days_dataset[0]+1]
            trues.append(t)
        dates_preds.append(test_set[0]['ds'][ranges[pred]+n_days_dataset[0]:ranges[pred]+n_days_dataset[0]+1])

    preds['Date'] = dates_preds
    preds['Date'] = preds['Date'].str[:16]
    preds['Date'] = preds['Date'].str[-6:]
    
    preds['pred_yhat_low'] = np.array(pred_yhat_low)
    preds['pred_yhat_upp'] = np.array(pred_yhat_upp)
    preds['pred_yhat'] = np.array(pred_yhat)
    preds['trues'] = np.array(trues)

    return preds

In [39]:
#Implement a parameter grid for more automatic process
param_grid = pd.DataFrame({
    'test_set': [(test_1, model_1),(test_7, model_7), (test_14, model_14),  (test_30, model_30)],
    'n_days_datasets': [[1, df_1], [7, df_7],[14, df_14],[30, df_30]]
    })



In [40]:
#Function that runs the predictions, add them to a DataFrame, and export it as a csv file
def run_predictions(param_grid, dates):
    for n_days_dataset in param_grid['n_days_datasets']:
        for test_sets in range(len(param_grid['test_set'])):
            print('N_days = ', n_days_dataset[0], 'test_set = ', test_sets)
            df_pred = predict_model(param_grid['test_set'][test_sets], n_days_dataset, dates)
            df_pred.to_csv(f'C:/Users/deann/Documents/University/Thesis/Thesis_git/Code thesis/data_temp/fbp_mona_with_{n_days_dataset[0]}_daysfuture_model{test_sets}.csv')
    return 

In [41]:
#Run the predictions
predictions = run_predictions(param_grid, dates)

N_days =  1 test_set =  0


  preds['trues'] = np.array(trues)


N_days =  1 test_set =  1


  preds['trues'] = np.array(trues)


N_days =  1 test_set =  2


  preds['trues'] = np.array(trues)


N_days =  1 test_set =  3


  preds['trues'] = np.array(trues)


N_days =  7 test_set =  0
N_days =  7 test_set =  1
N_days =  7 test_set =  2
N_days =  7 test_set =  3


  preds['trues'] = np.array(trues)


N_days =  14 test_set =  0
N_days =  14 test_set =  1
N_days =  14 test_set =  2
N_days =  14 test_set =  3
N_days =  30 test_set =  0
N_days =  30 test_set =  1
N_days =  30 test_set =  2
N_days =  30 test_set =  3


In [42]:
#Function to calculate the RMSE score
def RMSE(trues, preds, name_model):
    RMSE = np.sqrt(mean_squared_error(trues, preds))
    #print(f"Root Mean Square Error of {name_model}: ", RMSE)
    return RMSE
    

In [43]:
#Defining a function that loads the dataset obtained by the run_predictions() function
# and calculates the RMSE value
import os
def func(value):
    return ''.join(value.splitlines())

def get_files_RMSE():
    df_rmse = pd.DataFrame({})
    days = [1,7,14,30]
    for x in days:
        list_rmse = []
        for y in range(len(days)):
            path_name = f'C:/Users/deann/Documents/University/Thesis/Thesis_git/Code thesis/data_temp/fbp_mona_with_{x}_daysfuture_model{y}.csv'
            df = pd.read_csv(path_name)
            var = os.path.splitext(path_name)[0]
            var = var[76:]
            df['Date'] = df['Date'].apply(lambda x: x[7:17])
            #print(len(str(df['trues'][0])) > 23)
            if x == 1 or len(str(df['trues'][0])) > 24:
                df = df[:-1]
                df['trues'] = df['trues'].apply(lambda x: func(x).split('N')[0].split(' ')[-1])
                df['trues']= df['trues'].astype(float)
            trues = df['trues'].to_list()
            preds = df['pred_yhat'].to_list()  

            rmse = RMSE(trues, preds, var)
            list_rmse.append(rmse)
            

            #plot(df, trues, preds)
        #print('-----------------------------------------------------------------')
        df_rmse[f'{x} day(s)'] = np.array(list_rmse)
    print(df_rmse)
    return  df_rmse

rmse = get_files_RMSE()  


   1 day(s)  7 day(s)  14 day(s)  30 day(s)
0  0.021880  0.063001   0.083776   0.126791
1  0.042461  0.068087   0.088814   0.116353
2  0.065005  0.074796   0.093556   0.117866
3  0.129764  0.124093   0.107053   0.048796


### Results from personal predictions