## Importing Libraries

In [8]:
# !pip install yfinance
# %pip install localpip
# !localpip install fbprophet

In [9]:
import fbprophet
import prophet
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from dateutil.parser import parse
from fbprophet.plot import plot_cross_validation_metric
from fbprophet.diagnostics import cross_validation
from fbprophet.diagnostics import performance_metrics
from fbprophet import Prophet

import time
import pandas as pd
import yfinance as yahooFinance
from datetime import datetime, timedelta, date

import pickle
import json
from prophet.serialize import model_to_json, model_from_json
# from stock_download_pipeline.ipynb import *

ModuleNotFoundError: No module named 'fbprophet'

## Defining Functions

In [7]:
# Function to extract the data from yahoo finance

def YahooFinanceHistory(company, previous_days, path_save_as_csv = False):

    '''
    
    This function takes the company name and the number of previous days as input and returns the dataframe of the company history.

    Variables:

    company: string, name of the company
    previous_days: int, number of days to extract data from
    today: date, today's date
    past: date, date of the past
    query_string: string, query string to extract data from yahoo finance
    company_prices: dataframe, dataframe containing the prices of the company
    company_data: dataframe, dataframe containing the data of the company
    valuation_measures: list, list containing the valuation measures interested in
    company_valuation: dataframe, dataframe containing the valuation measures of the company
    path_save_as_csv: boolean, True if the dataframe is to be saved as a csv file, False otherwise
    
    '''
    
    today = int(time.mktime((datetime.now()).timetuple()))
    past = int(time.mktime((datetime.now() - timedelta(previous_days)).timetuple()))
    
    interval = '1d'

    # defining the query to get historical stock data
    query_string = f'https://query1.finance.yahoo.com/v7/finance/download/{company}?period1={past}&period2={today}&interval={interval}&events=history&includeAdjustedClose=true'
    
    company_prices = pd.read_csv(query_string)  

    # extracting info from yahoo finance for the said company
    company_data = yahooFinance.Ticker(f"{company}")

    # valuation measures which we need
    valuation_measures = ['marketCap', 'enterpriseValue', 'trailingPE', 'forwardPE', 'pegRatio', 'priceToSalesTrailing12Months', 'priceToBook', 'enterpriseToRevenue', 'enterpriseToEbitda']

    # storing the chosen measures to a list
    values = []
    for i in valuation_measures:
        values.append(company_data.info[i])

    #make df to store company valuation data

    company_valuation = pd.DataFrame([values], columns = valuation_measures)
    
    company_valuation.insert(0, column = 'date', value = date.today())

    company_valuation.insert(1, column = 'company', value = company)

    if path_save_as_csv != False:
        company_prices.to_csv(f'{path_save_as_csv}/{company}_prices.csv')
        company_valuation.to_csv(f'{path_save_as_csv}/{company}_valuation.csv')

    return company_prices, company_valuation

In [8]:
def get_mape(y_true, y_pred): 
    """
    Compute mean absolute percentage error (MAPE)
    """
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [9]:
def read_data(company, previous_days, holidays_list_path = 0):

    company_prices, company_valuation = YahooFinanceHistory(company, previous_days)
    company_prices = company_prices[['Date', 'Close']]
    company_prices.columns = ['ds', 'y']
    company_prices['ds'] = pd.to_datetime(company_prices['ds'])

    if holidays_list_path.endswith('.xls') == True:
        holidays_list = pd.read_excel(holidays_list_path)

    elif holidays_list_path.endswith('.csv') == True:
        holidays_list = pd.read_csv(holidays_list_path)

    elif holidays_list_path.endswith('.xlsx') == True:
        holidays_list = pd.read_excel(holidays_list_path)

    else:
        print('File type not supported')

    for i in range(len(holidays_list['Day'])):
        holidays_list['Day'][i] = pd.to_datetime(parse(holidays_list['Day'][i]))

    holidays_list = holidays_list[['Holiday','Day']]
    holidays_list = holidays_list.rename({'Day':'ds', 'Holiday':'holiday'}, axis = 1)   

    return company_prices, holidays_list

In [1]:
def model_building_prophet(company, company_prices, holidays_list, h, train_size, eliminate_weekends, save_model = False):

# defining the hyperparameters for the model building

    if holidays_list is not None:

        # variables for the model building and their meaning:
        '''
        holidays: list, list of holidays
        n_changepoints: int, number of changepoints. Change points are abrupt variations in time series data. (n_changepoints = 1 means there is only one changepoint.)
        n_changepoints_scale: float, scale of the number of changepoints 
        changepoint_prior_scale: float, scale of the changepoint prior
        yearly_seasonality: boolean, True if yearly seasonality is to be used, False otherwise
        weekly_seasonality: boolean, True if weekly seasonality is to be used, False otherwise
        daily_seasonality: boolean, True if daily seasonality is to be used, False otherwise
        holidays_prior_scale: float, scale of the holiday prior
        holidays_yearly_prior_scale: float, scale of the yearly holiday prior
        fourier_order: int, order of the fourier series. How quickly the seasonility of the time series can change.
        '''

        m = Prophet(growth="linear",
            holidays= holidays_list,
            seasonality_mode="multiplicative",
            changepoint_prior_scale=30,
            seasonality_prior_scale=35,
            holidays_prior_scale=20,
            daily_seasonality=False,
            weekly_seasonality=False,
            yearly_seasonality=False,
            ).add_seasonality(
                name='monthly',
                period=30.5,
                fourier_order=55
            ).add_seasonality(
                name="daily",
                period=1,
                fourier_order=15
            ).add_seasonality(
                name="weekly",
                period=7,
                fourier_order=20
            ).add_seasonality(
                name="yearly",
                period=365.25,
                fourier_order=20
            ).add_seasonality(
                name="quarterly",
                period = 365.25/4,
                fourier_order=5,
                prior_scale = 15)
    else:
        m = Prophet(growth = 'linear')

    model = m.fit(company_prices[0:train_size])

    future_dates = model.make_future_dataframe(periods = h)

    if eliminate_weekends is not None:
        future_dates['day'] = future_dates['ds'].dt.weekday
        future_dates = future_dates[future_dates['day']<=4]
    else:
        pass

    if save_model == True:
        with open(f'/Users/advait_t/Desktop/Jio/Stock_Prediction/Stock_Prediction/models/{company}.json', 'w') as fout:
            json.dump(model_to_json(model), fout)  # Save model

    prediction = model.predict(future_dates)

    return model, prediction, future_dates

## Defining hyper parameters

In [12]:
h = 5                           # prediction for H days
train_size = 247*4              # Use 4 years of data as train set. Note there are about 252 trading days in a year
val_size = 246                  # Use 1 year of data as validation set
eliminate_weekends = True
train_val_size = train_size + val_size
company = 'RELIANCE.NS'

holidays_list_path = '/content/2017-2022_Holidays_NSE_BSE_EQ_EQD.csv' # path to the holidays list
company_prices, holidays_list = read_data(company, 365*5, holidays_list_path) # read data for 5 years
model, prediction, future_dates = model_building_prophet(company, company_prices, holidays_list, h, train_size, eliminate_weekends, save_model = True)

In [8]:
# model = m.fit(company_prices[0:train_size])

In [9]:
# future_dates = model.make_future_dataframe(periods = h)

In [10]:
# if eliminate_weekends is not None:
#     future_dates['day'] = future_dates['ds'].dt.weekday
#     future_dates = future_dates[future_dates['day']<=4]
# else:
#     pass

In [11]:
# prediction = model.predict(future_dates)

In [12]:
# pred = model.predict(company_prices[(train_size+1):])

In [13]:
# print('The MAPE for Test values (Last 1 year) and Predicted values(Last 1 year): %d%%' %get_mape(company_prices[(train_size+1):]['y'], pred['yhat']))

The MAPE for Test values (Last 1 year) and Predicted values(Last 1 year): 8%


In [1]:
# model.plot(prediction)

In [15]:
# with open('RELIANCE_NS.json', 'w') as fout:
#     json.dump(model_to_json(model), fout)  # Save model

In [16]:
# df_cv = cross_validation(model, initial = f'{train_size} days', period = f'{round(h/2)} days', horizon = f'{h} days')

In [19]:
# df_p = performance_metrics(df_cv)
# df_p

In [18]:
# fig = plot_cross_validation_metric(df_cv, metric = 'mape')

In [13]:
# to load model 
with open('/content/RELIANCE_NS.json', 'r') as fin:
    saved_model = model_from_json(json.load(fin))  # Load model

In [14]:
saved_model.predict(company_prices[(train_size+1):])