# Inferencing Pipeline

## Importing Libraries

In [None]:
# %pip install prophet

In [5]:
import pickle
import json
from prophet.serialize import model_to_json, model_from_json

import pandas as pd
import numpy as np

import time
from datetime import datetime, timedelta, date
# remove warnings
import warnings
warnings.filterwarnings("ignore")


ModuleNotFoundError: No module named 'prophet'

## Defining Functions

In [5]:
# Loading Model
def load_model(model_path):
    with open(model_path, 'r') as fin:
        saved_model = model_from_json(json.load(fin))  # Load model
    return saved_model

In [6]:
# for next day prediction
def next_day_prediction(model_path, missing_dates, missing_dates_df = 0):

    saved_model = load_model(model_path)

    if missing_dates == False:
        next_day = date.today() + timedelta(days=1)
        future_date = pd.DataFrame(pd.date_range(start = next_day, end = next_day, freq ='D'), columns = ['ds'])

        predicted = saved_model.predict(future_date)
        print('Date %s:' %str(predicted['ds'][0].strftime('%d-%b-%Y').upper()))
        print('Predicted "Close" price: \u20B9%d'%predicted['yhat'][0])
        print('Predicted "Close" Minimum and "Close" Maximum: \u20B9%d - \u20B9%d'%(predicted['yhat_lower'][0], predicted['yhat_upper'][0]))

        return (predicted[['ds','yhat', 'yhat_upper', 'yhat_lower']])

    else:
        missing_dates_df.rename(columns={'Date':'ds'}, inplace=True)
        predicted = saved_model.predict(missing_dates_df)
        return (predicted[['ds','yhat', 'yhat_upper', 'yhat_lower']])

## Executing Code

In [None]:
predicted = next_day_prediction('/Users/advait_t/Desktop/Jio/Stock_Prediction/Stock_Prediction/models/RELIANCE.NS_01_Jul.json', False)

In [None]:
predicted

# Error Tracking Pipeline

## Loading Libraries

In [None]:
# !pip install prophet

In [None]:
import pandas as pd

## Defining Functions

In [160]:
def real_stock_price(company, predicted):

    now = datetime.now()
    weekday_weekend = datetime.strptime(str(predicted['ds'][0]), '%Y-%m-%d %H:%M:%S')
    
    if weekday_weekend.weekday() <= 5 and weekday_weekend.weekday() != 0:
        days = 1
    elif weekday_weekend.weekday() == 6:
         days = 2
    elif weekday_weekend.weekday() == 0:
         days = 3

    past = datetime.strptime(str(predicted['ds'][0]), '%Y-%m-%d %H:%M:%S') - timedelta(days)
    past = past.replace(hour = now.hour, minute = now.minute, second = now.second, microsecond = now.second)
    past = int(time.mktime(past.timetuple()))
    

    interval = '1d'

    # defining the query to get historical stock data
    query_string = f'https://query1.finance.yahoo.com/v7/finance/download/{company}?period1={past}&period2={past}&interval={interval}&events=history&includeAdjustedClose=true'
    try:
        company_stock_price = pd.read_csv(query_string)
        company_stock_price = company_stock_price[['Date', 'Close']]
        return company_stock_price
    except:
        company_stock_price = pd.DataFrame(np.nan, index = [0], columns=['Date'])
        return company_stock_price

In [None]:
def real_stock_price_missing_date(company, predicted):
    now = datetime.now()
    predicted['Close'] = None
    for i in range(len(predicted['ds'])):
        past = datetime.strptime(str(predicted['ds'][i]), '%Y-%m-%d %H:%M:%S')
        past = past.replace(hour = now.hour, minute = now.minute, second = now.second, microsecond = now.second)
        past = int(time.mktime(past.timetuple()))
        interval = '1d'
        query_string = f'https://query1.finance.yahoo.com/v7/finance/download/{company}?period1={past}&period2={past}&interval={interval}&events=history&includeAdjustedClose=true'
        company_stock_price = pd.read_csv(query_string)
        company_stock_price = company_stock_price[['Date', 'Close']]
        predicted['Close'][i] = company_stock_price['Close'].values[0]
    return predicted

In [55]:
def pred_vs_real_comparision(real_stock_price, predicted, combined_df, company):

    df = pd.DataFrame([[np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN]], columns=combined_df.columns)
    combined_df = pd.concat([combined_df, df], ignore_index =True)

    combined_df['Date'].iloc[-1] = str(predicted['ds'].iloc[-1].strftime('%Y-%m-%d'))
    combined_df['Date'] = pd.to_datetime(combined_df['Date'])
    combined_df = combined_df.set_index('Date')

    combined_df['Predicted_Close'].loc[predicted['ds']] = predicted['yhat'].iloc[-1]
    combined_df['Predicted_Close_Minimum'].loc[predicted['ds']] = predicted['yhat_lower'].iloc[-1]
    combined_df['Predicted_Close_Maximum'].loc[predicted['ds']] = predicted['yhat_upper'].iloc[-1]
    
    if pd.isna(real_stock_price['Date'])[0] == False:
        if predicted['ds'].iloc[-1].weekday() == 0:
            days = 3 #default days = 1
        elif predicted['ds'].iloc[-1].weekday() == 6:
            days = 2
        else:
            days = 1
            
        combined_df['Actual_Close'].loc[predicted['ds']-timedelta(days)] = real_stock_price['Close'].iloc[-1]
        percent_change = ((combined_df['Actual_Close'].loc[predicted['ds']-timedelta(days)] - combined_df['Predicted_Close'].loc[predicted['ds']-timedelta(days)])/combined_df['Actual_Close'].loc[predicted['ds']-timedelta(days)]*100)
        combined_df['Percent_Change_from_Close'].loc[predicted['ds']-timedelta(days)] = percent_change

        up_or_down_original = combined_df['Actual_Close'].loc[predicted['ds']][0]-combined_df['Actual_Close'].loc[predicted['ds']-timedelta(days)][0]

        if up_or_down_original > 0:
            # combined_df['Actual_Up_Down_Value'].iloc[-2] = up_or_down_original
            combined_df['Actual_Up_Down'].loc[predicted['ds']] = 'Up'

        elif up_or_down_original == 0:
            # combined_df['Actual_Up_Down_Value'].iloc[-2] = up_or_down_original
            combined_df['Actual_Up_Down'].loc[predicted['ds']] = 'Same'

        else:
            # combined_df['Actual_Up_Down_Value'].loc[-2] = up_or_down_original
            combined_df['Actual_Up_Down'].loc[predicted['ds']] = 'Down'

        # making a predicted_up_down columns
        up_or_down_predicted = combined_df['Predicted_Close'].loc[predicted['ds']][0]-combined_df['Predicted_Close'].loc[predicted['ds']-timedelta(days)][0]

        if up_or_down_predicted > 0:
            # combined_df['Actual_Up_Down_Value'].iloc[-2] = up_or_down_original
            combined_df['Predicted_Up_Down'].loc[predicted['ds']] = 'Up'

        elif up_or_down_predicted == 0:
            # combined_df['Actual_Up_Down_Value'].iloc[-2] = up_or_down_original
            combined_df['Predicted_Up_Down'].loc[predicted['ds']] = 'Same'

        else:
            # combined_df['Actual_Up_Down_Value'].iloc[-2] = up_or_down_original
            combined_df['Predicted_Up_Down'].loc[predicted['ds']] = 'Down'
        
        # combined_df['Date'] = combined_df.index
        combined_df.insert(0, 'Date', combined_df.index)
        combined_df = combined_df[~combined_df.index.duplicated(keep='first')]

        # add compnay name to the dataframe
        combined_df['Company'] = company

    else:
        pass

    return combined_df
        

## Tracking the error

In [7]:
error_df = pred_vs_real_comparision(real_stock_price('RELIANCE.NS', predicted), predicted, pd.read_csv('/Users/advait_t/Desktop/Jio/Stock_Prediction/Stock_Prediction/data/final/error_df.csv'), 'RELIANCE.NS')

NameError: name 'pred_vs_real_comparision' is not defined

In [None]:
error_df.head(10)

In [None]:
error_df.to_csv('/Users/advait_t/Desktop/Jio/Stock_Prediction/Stock_Prediction/data/final/error_df.csv',index = False)

In [25]:
pd.read_csv('/Users/advait_t/Desktop/Jio/Stock_Prediction/Stock_Prediction/data/final/training_data.csv')

Unnamed: 0,Date,Close,Company,Open,High,Low,Adj Close,Volume
0,2017-07-20,757.271790,RELIANCE.NS,,,,,
1,2017-07-21,785.058411,RELIANCE.NS,,,,,
2,2017-07-24,800.041382,RELIANCE.NS,,,,,
3,2017-07-25,793.552917,RELIANCE.NS,,,,,
4,2017-07-26,803.756165,RELIANCE.NS,794.568298,807.297607,793.032837,788.920837,10131916.0
...,...,...,...,...,...,...,...,...
2471,2022-07-19,294.600006,ITC.NS,295.000000,295.600006,292.700012,294.600006,9188419.0
2472,2022-07-20,298.200012,ITC.NS,294.899994,299.500000,293.600006,298.200012,15789281.0
2473,2022-07-21,299.549988,ITC.NS,299.000000,302.200012,298.000000,299.549988,16042840.0
2474,2022-07-22,300.549988,ITC.NS,300.000000,302.500000,298.100006,300.549988,9967545.0


In [15]:
import pandas as pd
from datetime import datetime, timedelta, date

data_path = '/Users/advait_t/Desktop/Jio/Stock_Prediction/Stock_Prediction/data/final/error_df1RELIANCE.NS.csv'
error_df = pd.read_csv(data_path)
# remove last 3 rows
# error_df = error_df.iloc[:-5]
for i in range(len(error_df)):
    error_df['Date'].iloc[i] = str(datetime.strptime(error_df['Date'].iloc[i], "%d/%m/%y").strftime('%Y-%m-%d'))
# # # # error = error.drop(['Predicted_Up_Down_Value', 'Predicted_Up_Down', 'Actual_Up_Down_Value'], axis = 1)
# # # error = error.iloc[0:3]
error_df.to_csv(data_path,index = False)
error_df.tail(20)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


Unnamed: 0,Date,Actual_Close,Predicted_Close,Predicted_Close_Minimum,Predicted_Close_Maximum,Percent_Change_from_Close,Actual_Up_Down,Predicted_Up_Down,Company
22,2022-08-03,2606.350098,2585.320928,2505.244196,2669.897806,0.806844,Down,Down,RELIANCE.NS
23,2022-08-04,2354.100098,2464.732623,2380.501347,2541.464091,-4.699568,Down,Up,RELIANCE.NS
24,2022-08-05,2392.25,2441.160422,2362.468133,2516.02606,-2.044536,Down,Up,RELIANCE.NS
25,2022-08-08,2399.149902,2420.860307,2341.624416,2502.221633,-0.904921,Down,Up,RELIANCE.NS
26,2022-08-10,2418.850098,2454.950655,2375.221259,2530.576585,-1.492468,Up,Up,RELIANCE.NS
27,2022-08-11,2363.449951,2443.958408,2364.007963,2525.946137,-3.406396,Down,Up,RELIANCE.NS
28,2022-08-12,2633.0,2642.406058,2569.560093,2719.358162,-0.357237,Down,Down,RELIANCE.NS
29,2022-08-16,2651.300049,2703.196771,2618.901728,2780.01842,-1.957407,Down,Down,RELIANCE.NS
30,2022-08-17,2665.149902,2688.765715,2602.545472,2769.231539,-0.886097,Down,Down,RELIANCE.NS
31,2022-08-18,,2649.67217,2571.017061,2733.707133,,Down,Down,RELIANCE.NS


# Error DF

In [10]:
from datetime import datetime, timedelta, date

def filling_missing_dates(error_df, company):

    Date = date.today()
    # check if the last date matches today
    if error_df.iloc[-1]['Date'] != Date:
        date_range = pd.date_range(start = error_df.iloc[-1]['Date'], end = Date, freq ='B')
        # make empty date_range dataframe with same columns as error_df
        date_range_df = pd.DataFrame(columns = error_df.columns)
        date_range_df['Date'] = date_range
        date_range_df['Date'] = date_range_df['Date'].dt.date
        missing_dates_df = next_day_prediction(f'/Users/advait_t/Desktop/Jio/Stock_Prediction/Stock_Prediction/models/{company}.json',True, date_range_df)
        missing_dates_df = real_stock_price_missing_date(company, missing_dates_df)
        date_range_df['Actual_Close'] = missing_dates_df['Close']
        date_range_df['Predicted_Close'] = missing_dates_df['yhat']
        date_range_df['Predicted_Close_Minimum'] = missing_dates_df['yhat_lower']
        date_range_df['Predicted_Close_Maximum'] = missing_dates_df['yhat_upper']
        date_range_df['Percent_Change_from_Close'] = ((date_range_df['Actual_Close'] - date_range_df['Predicted_Close'])/date_range_df['Actual_Close'])*100

        date_range_df['Actual_Up_Down'] = np.where((date_range_df['Actual_Close'] > date_range_df['Actual_Close'].shift(-1)), 'Up', 'Down')
        date_range_df['Predicted_Up_Down'] = np.where((date_range_df['Predicted_Close'] > date_range_df['Actual_Close'].shift(-1)), 'Up', 'Down')

        date_range_df = date_range_df.rename(columns = {'ds':'Date'})
        error_df = error_df.append(date_range_df, ignore_index= True)
        error_df = error_df.drop_duplicates(subset = 'Date', keep = 'last')
        error_df['Company'] = company

    else:
        error_df = pred_vs_real_comparision(real_stock_price(company, next_day_prediction(f'/Users/advait_t/Desktop/Jio/Stock_Prediction/Stock_Prediction/models/{company}.json')), next_day_prediction(f'/Users/advait_t/Desktop/Jio/Stock_Prediction/Stock_Prediction/models/{company}.json'), pd.read_csv('/Users/advait_t/Desktop/Jio/Stock_Prediction/Stock_Prediction/data/final/error_df1.csv'), company)
    return error_df
    # rename ds to Date

In [21]:
import pandas as pd
old_company_list = pd.read_csv('/Users/advait_t/Desktop/Jio/Stock_Prediction/Stock_Prediction/data/final/training_data.csv')["Company"].unique()
with open("/Users/advait_t/Desktop/Jio/Stock_Prediction/Stock_Prediction/config/process/companies_config copy.txt", "r") as f:
    new_companies_list=[i for line in f for i in line.split(',')]
# get the new company from the two lists
new = list(set(new_companies_list) - set(old_company_list))
old = list(set(old_company_list) - set(new_companies_list))
# covert list to string
new = ','.join(new)
old = ','.join(old)

In [112]:
# holidays_list['Day'][1].date()
# holidays_list.tail(15)
# ignore warnings
import warnings
warnings.filterwarnings('ignore')
from datetime import datetime, timedelta, date

today = datetime.now() - timedelta(days = 1)
today = today.date()
def is_holiday(today):
    holidays_list = pd.read_csv('/Users/advait_t/Desktop/Jio/Stock_Prediction/Stock_Prediction/data/final/2017-2022_Holidays_NSE_BSE_EQ_EQD.csv')
    for i in range(len(holidays_list['Day'])):
        holidays_list['Day'][i] = pd.to_datetime(parse(holidays_list['Day'][i]))
    for i in range(len(holidays_list['Day'])):
        if holidays_list['Day'][i].date() == today:
            return True
    return False

# (holidays_list['Day'][holidays_list['Day'] == today])