In [11]:
import pandas as pd

import glob

from pathlib import Path
import csv

import os
import requests
import json

import quandl

from dotenv import load_dotenv
load_dotenv()

True

In [2]:
## Load pickle for exports and imports of data  
import pickle 
def load_obj(path):
    with open(path, 'rb') as f:
        return pickle.load(f)
    
def save_obj(obj, path ):
    with open(path, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

In [3]:
def read_ftd_file(file_path):
    df = pd.read_table(
    Path(file_path),
    sep="|",
    index_col=None
    )
    
    df = df.iloc[:-2 , :] ## Drop last two rows as they are useless to keep
    df.drop(['DESCRIPTION','PRICE'], axis=1,inplace=True)
    df.rename(columns={'SETTLEMENT DATE':'Date','QUANTITY (FAILS)':'QUANTITY_FAILS'},
             inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df.sort_values(["SYMBOL", "Date"], ascending = (True, True),inplace=True)
    df.set_index('Date',inplace=True)
    
    return df

In [16]:
path = Path('../SEC_Test_Nov2021/cnsfails202111a.txt')
ftd_df = read_ftd_file(path)
ftd_df.dropna(inplace=True)
ftd_df.sort_values(["SYMBOL", "Date"], ascending = (True, True),inplace=True)
ftd_df.reset_index(inplace=True)

In [20]:
symbol_df = ftd_df[['SYMBOL','CUSIP']]
symbol_df = symbol_df.drop_duplicates()
symbol_df = symbol_df.sort_values('SYMBOL')
symbol_df = symbol_df.reset_index(drop=True)
#cusip_symbol_df

In [21]:
# ## Export DFs
# ftd_df.to_csv('../Resources/ftd_all_data_nov2021.csv')
# cusip_symbol_df.to_csv('../Resources/symbol_all_list_nov2021.csv')
# save_obj(ftd_df, '../Resources/ftd_all_data_nov2021.pkl')
# save_obj(cusip_symbol_df, '../Resources/symbol_all_list_nov2021.pkl')

In [22]:
## Get API data 

In [28]:
## Constants 

## Set start date variable - dataframes will be created starting from this date
start_date = '2021-11-01'
end_date = '2021-11-12'
#default_date_range = '71m' ## Default Range for IEX functions - don't need more at the moment

In [37]:
## QUANDL/NASDAQ 
nsdq_api_key = os.environ.get('NASDAQ_API_KEY')
base_url_nsdq = 'https://data.nasdaq.com/api/v3/datasets/FINRA/'

# iex_api_key = os.getenv("IEX_API_KEY")
# iex_test_api_key = os.getenv("IEX_TEST_API_KEY")

# ## Redundant Assignment but improves Readability throughout code 
# real_token = iex_api_key
# test_token = iex_test_api_key

# base_url_iex = 'https://cloud.iexapis.com/stable/'
# sandbox_url = 'https://sandbox.iexapis.com/stable/'

# ## IEX Status Test 
# test_resp = requests.get(base_url_iex + 'status')
# test_resp

In [38]:
## Get Short "Interest" Data from Quandl 
def get_short_data_QUANDL(symbol):
    string_nsdq = "FINRA/FNSQ_"+symbol
    string_nyse = "FINRA/FNYX_"+symbol
    
    df1 = quandl.get(string_nsdq,start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## Nasdaq
    df2 = quandl.get(string_nyse,start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## NYSE

    df1 = df1.rename(columns={'ShortVolume':'ShortVolumeNSDQ','TotalVolume':'TotalVolumeNSDQ'})
    #df1 = df1.drop(columns={'ShortExemptVolume'})
    df1 = df1.rename(columns={'ShortExemptVolume':'ShortExemptVolumeNSDQ'})

    df2 = df2.rename(columns={'ShortVolume':'ShortVolumeNYSE','TotalVolume':'TotalVolumeNYSE'})
    #df2 = df2.drop(columns={'ShortExemptVolume'})
    df2 = df2.rename(columns={'ShortExemptVolume':'ShortExemptVolumeNYSE'})

    df3 = pd.merge(df1,df2,on='Date',how='outer')
    #df3 = df3.fillna(0)
    
    return df3


## Return FTD Data from SEC FTD files using a Stock's CUSIP number to sort 
def return_ftd_data_cusip(cusip_number):
    df = ftd_df.copy()
    df.set_index("CUSIP",inplace=True)
    df = df.loc[cusip_number]
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.set_index('Date')
    return df

## Return the CUSIP symbol from the symbol_df symbol list 
def return_CUSIP_from_symbol(symbol):
    df = symbol_df.copy()
    df.set_index('SYMBOL',inplace=True)
    cusip_variable = df.loc[symbol]
    cusip_variable = cusip_variable['CUSIP']
    return cusip_variable

def return_ftd_data_symbol(symbol):
    cusip_number = return_CUSIP_from_symbol(symbol)
    df = return_ftd_data_cusip(cusip_number)
    return df

In [39]:
## FMP Constants 
fmpbase_urlv3 = 'https://fmpcloud.io/api/v3/'
fmpbase_urlv4 = 'https://fmpcloud.io/api/v4/'
api_key = os.getenv("FMP_CLOUD_API_KEY")

## FMP Functions 
def get_FMP_historical_data(symbol, startDate=start_date, endDate=end_date, apiKey=api_key):
    url_hist_price = fmpbase_urlv3+'historical-price-full/'
    url_hist_query_with_date = url_hist_price+symbol+'?from='+startDate+'&to='+endDate+'&apikey='+apiKey
    resp_data = requests.get(url_hist_query_with_date)
    json_ = resp_data.json()
    data = json_['historical']
    df = pd.DataFrame(data)
    df.rename(columns={'date':'Date'},inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.reindex(index=df.index[::-1]) ## Reverse the DataFrame 
    df.set_index('Date',inplace=True)
    df.drop(columns='label',inplace=True)
    return df

api_key = os.getenv("FMP_CLOUD_API_KEY")
def get_float_data_FMP(symbol):
    url_float_shares = fmpbase_urlv4+'shares_float?symbol='
    url_query_float_data = url_float_shares+symbol+'&apikey='+api_key
    resp_data = requests.get(url_query_float_data)
    #df = pd.DataFrame(resp_data.json())
    json_ = resp_data.json()
    return json_[0]

def get_company_profile_FMP_json(symbol):
    ## https://fmpcloud.io/api/v3/profile/AAPL?apikey='yourkeyhere'
    url_company_profile_url = fmpbase_urlv3+'profile/'+symbol+'?apikey='+api_key
    resp_data = requests.get(url_company_profile_url)
    json_response = resp_data.json()
    return json_response[0]

def save_and_export_raw_df_csv(data, symbol):
    path = ('../FilesExportIndividualStockDFs_Big/'+symbol+'_combined_df.csv')
    data.to_csv(path)

In [61]:
def get_time_series_data(symbol):
    ftd_data = return_ftd_data_symbol(symbol)
    ftd_data = ftd_data.drop(columns={'SYMBOL'})

    fmp_data = get_FMP_historical_data(symbol)
    df1 = pd.merge(fmp_data,ftd_data, on='Date',how='outer')
    df1['QUANTITY_FAILS'] = df1['QUANTITY_FAILS'].fillna(0)
    df1['volume'] = df1['volume'].fillna(0)
    df1['unadjustedVolume'] = df1['unadjustedVolume'].fillna(0)
    df1['vwap'] = df1['vwap'].fillna(0)
    df2 = get_short_data_QUANDL(symbol)
    df = pd.merge(df1,df2,on='Date',how='outer')
    df = df.fillna(0)
    return df

In [76]:
test_symbol = 'AMC'
symbol = test_symbol

In [77]:
test_df = return_ftd_data_symbol('AMC')
test_df

Unnamed: 0_level_0,SYMBOL,QUANTITY_FAILS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-11-01,AMC,101002.0
2021-11-02,AMC,57149.0
2021-11-03,AMC,87164.0
2021-11-04,AMC,301031.0
2021-11-05,AMC,367116.0
2021-11-08,AMC,159925.0
2021-11-09,AMC,307849.0
2021-11-10,AMC,104067.0
2021-11-12,AMC,23463.0


In [89]:
test_df = get_time_series_data(symbol)
#test_df

path = Path('../FilesExport_DFs_with_TI_pkl/'+symbol+'_data_dict_with_technicals.pkl')
data_import = load_obj(path)

import_df = data_import[symbol].copy()
import_df.drop(columns={'bollinger_signal','dema_signal','adl_signal','rsi_signal'},inplace=True)
#import_df

df1 = test_df.copy()
df2 = import_df.copy()
df3 = pd.concat([df2,df1])
df3.tail(30)

Unnamed: 0_level_0,open,high,low,close,adjClose,volume,unadjustedVolume,change,changePercent,vwap,changeOverTime,QUANTITY_FAILS,ShortVolumeNSDQ,ShortExemptVolumeNSDQ,TotalVolumeNSDQ,ShortVolumeNYSE,ShortExemptVolumeNYSE,TotalVolumeNYSE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2021-10-04,38.9,39.03,36.33,36.77,36.77,40057221.0,40057221.0,-2.13,-5.476,37.37667,-0.05476,192871.0,9757016.0,155573.0,16249587.0,3360859.0,8177.0,6637403.0
2021-10-05,36.86,38.1,36.19,37.06,37.06,37473123.0,37473123.0,0.2,0.543,37.11667,0.00543,614917.0,9974105.0,133078.0,16602395.0,2042428.0,9375.0,6598458.0
2021-10-06,36.36,37.65,35.64,36.83,36.83,35443740.0,35443740.0,0.47,1.293,36.70667,0.01293,229005.0,9206012.0,119397.0,15493032.0,2571883.0,7981.0,6215871.0
2021-10-07,36.78,38.55,36.6,38.14,38.14,37731373.0,37731373.0,1.36,3.698,37.76333,0.03698,79831.0,9199722.0,122211.0,15941717.0,2053551.0,13081.0,6475796.0
2021-10-08,37.92,38.78,37.06,37.19,37.19,29191323.0,29191323.0,-0.73,-1.925,37.67667,-0.01925,40344.0,7250691.0,99050.0,12396724.0,1609060.0,14686.0,5070244.0
2021-10-11,36.85,38.66,36.2982,37.25,37.25,37365800.0,37365800.0,0.4,1.085,37.40273,0.01085,0.0,9252378.0,113932.0,15067520.0,3196026.0,20050.0,6412699.0
2021-10-12,37.25,37.57,36.22,36.82,36.82,25158315.0,25158315.0,-0.43,-1.154,36.87,-0.01154,118229.0,5813981.0,104215.0,9686080.0,2226333.0,5285.0,4496844.0
2021-10-13,36.72,38.15,36.12,37.91,37.91,33462948.0,33462948.0,1.19,3.241,37.39333,0.03241,113277.0,7967385.0,132216.0,13352824.0,2397898.0,38456.0,5374688.0
2021-10-14,37.8,41.1,37.75,40.07,40.07,67820197.0,67820197.0,2.27,6.005,39.64,0.06005,78931.0,15695625.0,464562.0,27504200.0,3056523.0,24688.0,10551864.0
2021-10-15,40.2,41.79,39.78,40.74,40.74,45650933.0,45650933.0,0.54,1.343,40.77,0.01343,115789.0,10789615.0,215706.0,18941072.0,2135710.0,11147.0,7482834.0


In [81]:
# import pandas as pd
# import numpy as np
# import requests
# import json
import hvplot.pandas
#from dotenv import load_dotenv
from datetime import date
# import os
from scipy import stats
from numpy.lib.stride_tricks import as_strided
from numpy.lib import pad
#import pad
# import matplotlib.pyplot as plt
# %matplotlib inline

# from pathlib import Path

In [82]:
def rolling_spearman(seqa, seqb, window):
    stridea = seqa.values.strides[0]
    ssa = as_strided(seqa, shape=[len(seqa) - window + 1, window], strides=[stridea, stridea])
    strideb = seqa.values.strides[0]
    ssb = as_strided(seqb, shape=[len(seqb) - window + 1, window], strides =[strideb, strideb])
    ar = pd.DataFrame(ssa)
    br = pd.DataFrame(ssb)
    ar = ar.rank(1)
    br = br.rank(1)
    corrs = ar.corrwith(br, 1)
    return pad(corrs, (window - 1, 0), 'constant', constant_values=np.nan)

# def create_price_df(self,ticker, period='daily'):

#     if period =='hourly':
#         api_url = 'https://fmpcloud.io/api/v3/historical-chart/1hour'
#     else:
#         api_url = 'https://fmpcloud.io/api/v3/historical-price-full'

#     ticker_df = json.loads(requests.get(f"{api_url}/{ticker}?apikey={fmp_api}").content)['historical']
#     data = pd.DataFrame(ticker_df).set_index('date')[::-1]
#     data['Date'] = data.index
#     data.index = data.index.astype('datetime64[ns]')

#     return data

def use_csvs(ticker):

    #data = pd.read_csv("../FilesExportIndividualStockDFs_Big/"+ticker+"_combined_df.csv", index_col='Date', parse_dates=True)
#     path = Path('../FilesExport_Finished/'+ticker+'_finished_df.pkl')
#     data_import = load_obj(path)
#     data = data_import['dataFrame'].copy()

    test_df = get_time_series_data(ticker)
    #test_df

    path = Path('../FilesExport_DFs_with_TI_pkl/'+ticker+'_data_dict_with_technicals.pkl')
    data_import = load_obj(path)

    import_df = data_import[symbol].copy()
    import_df.drop(columns={'bollinger_signal','dema_signal','adl_signal','rsi_signal'},inplace=True)
    #import_df

    df1 = test_df.copy()
    df2 = import_df.copy()
    data = pd.concat([df2,df1])

    return data



def bollinger_bands(dataframe,period=20):
    data = dataframe.copy()
    data['middle_band'] = data[['adjClose']].rolling(window=period).mean()
    data[str(period)+'_day_stdev'] = data[['adjClose']].rolling(window=period).std()
    data['upper_band'] = data['middle_band']+2*data[str(period)+'_day_stdev']
    data['lower_band'] = data['middle_band'] - 2*data[str(period)+'_day_stdev']
    data['spread'] = data['upper_band'] + data['lower_band']
    data['change_in_spread'] = data['spread']/data['spread'].shift(1)-1
    data[str(period)+"_return"] = data['adjClose']/data['adjClose'].shift(period)-1
    data['bollinger_signal'] = data['change_in_spread'].rank(ascending=False, pct=True)
    data.dropna()

    return data

def dema(dataframe, period1=10, period2=20):
    data = dataframe.copy()
    data[str(period1)+'ema1'] = dataframe[['adjClose']].ewm(span=period1, adjust=False).mean()
    data[str(period1)+'ema2'] = data[str(period1)+'ema1'].ewm(span=period1, adjust=False).mean()
    data[str(period1)+'dema'] = 2*data[str(period1)+'ema1'] - data[str(period1)+'ema2']
    data[str(period2)+'ema1'] = data[['adjClose']].ewm(span=period2, adjust=False).mean()
    data[str(period2)+'ema2'] = data[str(period2)+'ema1'].ewm(span=period2, adjust=False).mean()
    data[str(period2)+'dema'] = 2*data[str(period2)+'ema1'] - data[str(period2)+'ema2']
    data[str(period1)+"_return"] = data['adjClose']/data['adjClose'].shift(period1)-1
    data['spread'] = data[str(period1)+'dema'] - data[str(period2)+'dema']
    data['dema_signal'] = data['spread'].rank(ascending=True, pct=True)
    data = data.dropna()
    return data

def price_momentum(dataframe, smoothing1=0.0571, smoothing2=0.1, periods1=15, periods2=10):
    data = dataframe.copy()
    data['smoothing_factor'] = smoothing1
    data[str(periods1)+"average"] = data['changeOverTime'].rolling(window=periods1).mean()
    smoothing_factor_list = [data.iloc[periods1][str(periods1)+"average"]]
    data = data.dropna()
    i=1
    j=0
    while i < len(data[str(periods1)+"average"]):
        smoothing_factor = data.iloc[i]['changeOverTime']*data.iloc[i]['smoothing_factor'] + smoothing_factor_list[j]*(1-data.iloc[i]['smoothing_factor'])
        smoothing_factor_list.append(smoothing_factor)
        j+=1
        i+=1
    data['35d_custom_smoothing'] = smoothing_factor_list
    data['35d_custom_10'] = data['35d_custom_smoothing']*10
    data['smoothing_factor2'] = smoothing2
    data[str(periods2)+"average"] = data['35d_custom_10'].rolling(window=periods2).mean()
    data = data.dropna()
    smoothing_factor_list2 = [data.iloc[0][str(periods2)+"average"]]
    i=1
    j=0
    while i < len(data[str(periods2)+"average"]):
        smoothing_factor = (data.iloc[i]['35d_custom_10'] - smoothing_factor_list2[j])*data.iloc[i]['smoothing_factor2'] + smoothing_factor_list2[j]
        smoothing_factor_list2.append(smoothing_factor)
        j+=1
        i+=1
    data[str(periods2)+'d_custom_smoothing'] = smoothing_factor_list2
    data[str(periods2)+"_return"] = data['adjClose']/data['adjClose'].shift(periods2)-1
#         data['signal'] = np.where(data[str(periods2)+'d_custom_smoothing'] > data[str(periods2)+'d_custom_smoothing'].shift(1), 1.0, 0.0)
#         data = data.rename(columns={'signal':'price_mo'})
    return data

def get_ichimoku_cloud(dataframe, period1=4, period2=8, period3=15):

    #TODO generate signal, ichimoku works better in current market regime with shorter periods, being able to respond faster to events than a traditional version
    # The conversion crossing the base would be the signal

    data = dataframe.copy()
    data['conversion_line'] = data[['adjClose']].rolling(window=period1).mean()
    data['base_line'] = data[['adjClose']].rolling(window=period2).mean()
    data['senkou_spanA_line'] = (data['conversion_line']+data['base_line'])/2
    data['senkou_spanB_line'] = data[['adjClose']].rolling(window=period3).mean()
    data['lagging_span'] = data['adjClose'].shift(period2)
    data = data.dropna()

    return data

def accumulation_distribution_line(dataframe):

    ##TODO define periodicity and pass as arguments, use the mean as the signal generator, -1 is buy and and 1 is sell
    ##TODO need to add ability to ignore a -1 during a range of 1s

    data = dataframe.copy()
    data['money_flow_mult'] = round(((data['adjClose'] - data['low']) - (data['high'] - data['adjClose']))/(data['high'] - data['low']),2)
    data = data.dropna()
    data['money_flow_volume'] = data['money_flow_mult']*data['volume']
    money_flow_multiplier_list = list(data['money_flow_volume'].values)
    adl = [money_flow_multiplier_list[0]]
    i = 1
    while i < len(money_flow_multiplier_list):
        a_d_indicator = adl[i-1]+money_flow_multiplier_list[i]
        adl.append(a_d_indicator)
        i+=1
    data['adl'] = adl
    data['adl_change'] = data['adl']/data['adl'].shift(1)-1
    negative_change_count = [0]*9
    i = 0
    counter = 0
    while i < len(data)-9:
        j=0
        while j < 9:
            if data.iloc[j+i]['adl_change'] <0:
                counter+=1
            if j %19 == 0:
                negative_change_count.append(counter)
                counter = 0
            j+=1
        i+=1
    data['negative_change_counter'] = negative_change_count
    data['9_day_return'] = data['adjClose']/data['adjClose'].shift(9)-1
    data['adl_signal'] = rolling_spearman(data['adl'], data['9_day_return'], 9)

    return data

def rsi(dataframe, periods=14):
    data = dataframe.copy()
    data['gains'] = np.where(data['changeOverTime']>0, data['changeOverTime'], 0)
    data['losses'] = np.where(data['changeOverTime']<0, np.absolute(data['changeOverTime']), 0)
    data['average_gain'] = data['gains'].rolling(window=periods).mean()
    data['average_loss'] = data['losses'].rolling(window=periods).mean()
    data['rs'] = data['average_gain']/data['average_loss']
    data['rsi'] = (100 - 100/(1+data['rs']))
    data['rsi_signal'] = data['rsi'].rank(ascending=True, pct=True)

    return data


# def get_ratings(self):

#     ratings = json.loads(requests.get(f"https://fmpcloud.io/api/v3/historical-rating/{self.ticker}?limit=100&apikey={fmp_api}").content)
#     ratings_df = pd.DataFrame(ratings)
#     ratings_df['average_rating'] = (ratings_df['ratingScore']+ratings_df['ratingDetailsDCFScore']+ratings_df['ratingDetailsROEScore']+ratings_df['ratingDetailsROAScore'] \
#                                     +ratings_df['ratingDetailsDEScore'] + ratings_df['ratingDetailsPEScore']+ ratings_df['ratingDetailsPBScore'])/7

#     return ratings_df

# def get_stock_market_performances(self, dataframe):
#     limit = len(dataframe)
#     data = json.loads(requests.get(f"https://fmpcloud.io/api/v3/historical-sectors-performance?limit=525&apikey={fmp_api}").content)
#     sector_df = pd.DataFrame(data).set_index('date')
#     sector_df_clean = sector_df[::-1]
#     s_p500 = json.loads(requests.get(f"https://fmpcloud.io/api/v3/historical-price-full/^SP500TR?from="+sector_df_clean.index[0]+"&to="+sector_df_clean.index[-1]+"&apikey="+fmp_api).content)['historical']
#     sp_df = pd.DataFrame(s_p500)
#     sp_df_clean = sp_df[::-1]

#     return sp_df_clean

def get_all_indicators(ticker):
    dataframe = use_csvs(ticker)
    bb = bollinger_bands(dataframe)
    DEMA = dema(dataframe)
    ADL = accumulation_distribution_line(dataframe)
    RSI = rsi(dataframe)

    dataframe['bollinger_signal'] = bb['bollinger_signal']
    dataframe['dema_signal'] = DEMA['dema_signal']
    dataframe['adl_signal'] = ADL['adl_signal']
    dataframe['rsi_signal'] = RSI['rsi_signal']
    dataframe.dropna(inplace=True)
    return dataframe

# def merge_data(self):
#     ticker_list = self.stock_list
#     for stock in ticker_list:
#         stock_csv = self.use_csvs(stock)
#         stock_csv['Date'] = stock_csv.index.astype("string")
#         stock_csv = stock_csv.iloc[:-1]
#         stock_csv.index.names =[""]
#         stock_indicators = self.get_all_indicators(stock)
#         stock_signals = stock_indicators[['Date','bollinger_signal','dema_signal', 'adl_signal', 'rsi_signal']]
#         stock_signals.loc[:,'Date'] = stock_signals['Date'].astype('string')
#         stock_signals.index.names =[""]
#         merged_data = pd.merge(left = stock_csv, right=stock_signals, on=['Date']).set_index('Date')
#         merged_data = merged_data.dropna()
#         writer = pd.ExcelWriter("NewCsvs/"+stock+"_all_indicator_dfs.xlsx", engine="xlsxwriter")

#         merged_data.to_excel(writer, sheet_name=stock+"_data")

#         writer.save()

#     return print("All files successfully saved")

In [85]:
test_df = get_all_indicators(symbol)
test_df

Unnamed: 0_level_0,open,high,low,close,adjClose,volume,unadjustedVolume,change,changePercent,vwap,...,ShortVolumeNSDQ,ShortExemptVolumeNSDQ,TotalVolumeNSDQ,ShortVolumeNYSE,ShortExemptVolumeNYSE,TotalVolumeNYSE,bollinger_signal,dema_signal,adl_signal,rsi_signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-03-02,24.940001,25.389999,24.620001,25.350000,19.225668,177300.0,177300.0,0.41,1.644,25.12000,...,24261.0,0.0,39418.0,1860.0,0.0,4100.0,0.128651,0.932099,-0.533333,0.905190
2016-03-03,25.240000,25.780001,25.129999,25.700001,19.646111,264500.0,264500.0,0.46,1.823,25.53667,...,35886.0,2336.0,69522.0,8558.0,0.0,11805.0,0.118220,0.944444,-0.633333,0.883045
2016-03-04,26.400000,27.240000,25.400000,27.020000,20.655172,1264100.0,1264100.0,0.62,2.348,26.55333,...,136925.0,1887.0,380932.0,37132.0,2700.0,146211.0,0.102921,0.951303,-0.783333,0.942561
2016-03-07,26.990000,28.740000,26.840000,28.680000,21.924139,433500.0,433500.0,1.69,6.262,28.08667,...,50718.0,800.0,129688.0,7091.0,0.0,24014.0,0.059805,0.963649,-0.883333,0.921107
2016-03-08,28.459999,28.840000,28.299999,28.389999,21.702448,486000.0,486000.0,-0.07,-0.246,28.51000,...,40113.0,0.0,106944.0,7116.0,0.0,146998.0,0.068150,0.965706,-0.933333,0.982699
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-11-08,42.480000,45.950000,41.770000,45.060000,45.060000,65445727.0,65445727.0,2.58,6.073,44.26000,...,11094447.0,295820.0,28447766.0,2175155.0,8686.0,10719336.0,0.134214,0.975995,0.933333,0.736332
2021-11-09,42.430000,42.600000,39.250000,39.930000,39.930000,37592583.0,37592583.0,-2.50,-5.892,40.59333,...,5596961.0,225335.0,14728993.0,1466846.0,55282.0,6266508.0,0.282337,0.972565,0.933333,0.532180
2021-11-10,38.790000,40.869100,38.050000,38.290000,38.290000,31759945.0,31759945.0,-0.50,-1.289,39.06970,...,4285659.0,181892.0,12950810.0,1852950.0,84915.0,4712083.0,0.429068,0.956104,0.850000,0.619377
2021-11-11,38.000000,40.200000,37.500000,39.460000,39.460000,30209754.0,30209754.0,1.46,3.842,39.05333,...,4000107.0,106216.0,12310735.0,1700013.0,4617.0,4303696.0,0.507650,0.938272,0.833333,0.775779


In [73]:
## Load machine learning model and test with sliced data
# Initial imports
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import model_from_json
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

%matplotlib inline

In [86]:
symbol = test_symbol
symbol

'AMC'

In [88]:
## Load Data

# path = Path('../FilesExport_DFs_with_TI_pkl/'+symbol+'_data_dict_with_technicals.pkl')
# data_import = load_obj(path)

df = test_df #data_import[symbol].copy()

## Set X and y data 
X = df.drop(columns={'close','adjClose'}).values
y = df['close'].values

scaler = StandardScaler().fit(X)
X = scaler.transform(X)

# load json and create model
file_path = Path('../Model_Data_low_acc/'+symbol+'_model_data.json')
with open(file_path, "r") as json_file:
    model_json = json_file.read()
loaded_model = model_from_json(model_json)

# load weights into new model
file_path = Path('../Model_Data_low_acc/'+symbol+'_model_weights.h5')
loaded_model.load_weights(file_path)

## Compile loaded model and print mse score 
loaded_model.compile(loss="mean_squared_error", optimizer="adam", metrics=["mse"])
score = loaded_model.evaluate(X, y, verbose=0)
print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))
print()

## take dataframe copy and compare predicted values to actual values 
df2 = df.copy()
df2['predicted'] = loaded_model.predict(X)
df_pred = df2[['close','predicted']]
df_pred

mse: 1745.58%



Unnamed: 0_level_0,close,predicted
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-03-02,25.350000,25.149000
2016-03-03,25.700001,25.526295
2016-03-04,27.020000,26.521856
2016-03-07,28.680000,28.454992
2016-03-08,28.389999,28.258223
...,...,...
2021-11-08,45.060000,39.597195
2021-11-09,39.930000,33.559387
2021-11-10,38.290000,32.037735
2021-11-11,39.460000,37.023670


In [90]:
df_pred.tail(15)

Unnamed: 0_level_0,close,predicted
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-10-25,36.83,33.816566
2021-10-26,36.05,33.218834
2021-10-27,34.76,31.983517
2021-10-28,35.23,32.835415
2021-10-29,35.37,33.063847
2021-11-01,37.07,33.836494
2021-11-02,38.79,35.043381
2021-11-03,40.79,34.749115
2021-11-04,40.05,37.376785
2021-11-05,41.7,38.166954
