In [1]:
import pandas as pd
import numpy as np

import glob

from pathlib import Path
import csv

import os
import requests
import json

import quandl

from dotenv import load_dotenv
load_dotenv()

True

In [2]:
## Load pickle for exports and imports of data  
import pickle 
def load_obj(path):
    with open(path, 'rb') as f:
        return pickle.load(f)
    
def save_obj(obj, path ):
    with open(path, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

In [3]:
def read_ftd_file(file_path):
    df = pd.read_table(
    Path(file_path),
    sep="|",
    index_col=None
    )
    
    df = df.iloc[:-2 , :] ## Drop last two rows as they are useless to keep
    df.drop(['DESCRIPTION','PRICE'], axis=1,inplace=True)
    df.rename(columns={'SETTLEMENT DATE':'Date','QUANTITY (FAILS)':'QUANTITY_FAILS'},
             inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df.sort_values(["SYMBOL", "Date"], ascending = (True, True),inplace=True)
    df.set_index('Date',inplace=True)
    
    return df

In [4]:
path = Path('../SEC_Test_Nov2021/cnsfails202111a.txt')
ftd_df = read_ftd_file(path)
ftd_df.dropna(inplace=True)
ftd_df.sort_values(["SYMBOL", "Date"], ascending = (True, True),inplace=True)
ftd_df.reset_index(inplace=True)

In [5]:
symbol_df = ftd_df[['SYMBOL','CUSIP']]
symbol_df = symbol_df.drop_duplicates()
symbol_df = symbol_df.sort_values('SYMBOL')
symbol_df = symbol_df.reset_index(drop=True)
#cusip_symbol_df

In [6]:
# ## Export DFs
# ftd_df.to_csv('../Resources/ftd_all_data_nov2021.csv')
# cusip_symbol_df.to_csv('../Resources/symbol_all_list_nov2021.csv')
# save_obj(ftd_df, '../Resources/ftd_all_data_nov2021.pkl')
# save_obj(cusip_symbol_df, '../Resources/symbol_all_list_nov2021.pkl')

In [7]:
## Get API data 

In [8]:
## Constants 

## Set start date variable - dataframes will be created starting from this date
start_date = '2021-11-01'
end_date = '2021-11-12'
#default_date_range = '71m' ## Default Range for IEX functions - don't need more at the moment

In [9]:
## QUANDL/NASDAQ 
nsdq_api_key = os.environ.get('NASDAQ_API_KEY')
base_url_nsdq = 'https://data.nasdaq.com/api/v3/datasets/FINRA/'

# iex_api_key = os.getenv("IEX_API_KEY")
# iex_test_api_key = os.getenv("IEX_TEST_API_KEY")

# ## Redundant Assignment but improves Readability throughout code 
# real_token = iex_api_key
# test_token = iex_test_api_key

# base_url_iex = 'https://cloud.iexapis.com/stable/'
# sandbox_url = 'https://sandbox.iexapis.com/stable/'

# ## IEX Status Test 
# test_resp = requests.get(base_url_iex + 'status')
# test_resp

In [10]:
## Get Short "Interest" Data from Quandl 
def get_short_data_QUANDL(symbol):
    string_nsdq = "FINRA/FNSQ_"+symbol
    string_nyse = "FINRA/FNYX_"+symbol
    
    df1 = quandl.get(string_nsdq,start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## Nasdaq
    df2 = quandl.get(string_nyse,start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## NYSE

    df1 = df1.rename(columns={'ShortVolume':'ShortVolumeNSDQ','TotalVolume':'TotalVolumeNSDQ'})
    #df1 = df1.drop(columns={'ShortExemptVolume'})
    df1 = df1.rename(columns={'ShortExemptVolume':'ShortExemptVolumeNSDQ'})

    df2 = df2.rename(columns={'ShortVolume':'ShortVolumeNYSE','TotalVolume':'TotalVolumeNYSE'})
    #df2 = df2.drop(columns={'ShortExemptVolume'})
    df2 = df2.rename(columns={'ShortExemptVolume':'ShortExemptVolumeNYSE'})

    df3 = pd.merge(df1,df2,on='Date',how='outer')
    #df3 = df3.fillna(0)
    
    return df3


## Return FTD Data from SEC FTD files using a Stock's CUSIP number to sort 
def return_ftd_data_cusip(cusip_number):
    df = ftd_df.copy()
    df.set_index("CUSIP",inplace=True)
    df = df.loc[cusip_number]
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.set_index('Date')
    return df

## Return the CUSIP symbol from the symbol_df symbol list 
def return_CUSIP_from_symbol(symbol):
    df = symbol_df.copy()
    df.set_index('SYMBOL',inplace=True)
    cusip_variable = df.loc[symbol]
    cusip_variable = cusip_variable['CUSIP']
    return cusip_variable

def return_ftd_data_symbol(symbol):
    cusip_number = return_CUSIP_from_symbol(symbol)
    df = return_ftd_data_cusip(cusip_number)
    return df

In [11]:
## FMP Constants 
fmpbase_urlv3 = 'https://fmpcloud.io/api/v3/'
fmpbase_urlv4 = 'https://fmpcloud.io/api/v4/'
api_key = os.getenv("FMP_CLOUD_API_KEY")

## FMP Functions 
def get_FMP_historical_data(symbol, startDate=start_date, endDate=end_date, apiKey=api_key):
    url_hist_price = fmpbase_urlv3+'historical-price-full/'
    url_hist_query_with_date = url_hist_price+symbol+'?from='+startDate+'&to='+endDate+'&apikey='+apiKey
    resp_data = requests.get(url_hist_query_with_date)
    json_ = resp_data.json()
    data = json_['historical']
    df = pd.DataFrame(data)
    df.rename(columns={'date':'Date'},inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.reindex(index=df.index[::-1]) ## Reverse the DataFrame 
    df.set_index('Date',inplace=True)
    df.drop(columns='label',inplace=True)
    return df

api_key = os.getenv("FMP_CLOUD_API_KEY")
def get_float_data_FMP(symbol):
    url_float_shares = fmpbase_urlv4+'shares_float?symbol='
    url_query_float_data = url_float_shares+symbol+'&apikey='+api_key
    resp_data = requests.get(url_query_float_data)
    #df = pd.DataFrame(resp_data.json())
    json_ = resp_data.json()
    return json_[0]

def get_company_profile_FMP_json(symbol):
    ## https://fmpcloud.io/api/v3/profile/AAPL?apikey='yourkeyhere'
    url_company_profile_url = fmpbase_urlv3+'profile/'+symbol+'?apikey='+api_key
    resp_data = requests.get(url_company_profile_url)
    json_response = resp_data.json()
    return json_response[0]

def save_and_export_raw_df_csv(data, symbol):
    path = ('../FilesExportIndividualStockDFs_Big/'+symbol+'_combined_df.csv')
    data.to_csv(path)

In [12]:
def get_time_series_data(symbol):
    ftd_data = return_ftd_data_symbol(symbol)
    ftd_data = ftd_data.drop(columns={'SYMBOL'})

    fmp_data = get_FMP_historical_data(symbol)
    df1 = pd.merge(fmp_data,ftd_data, on='Date',how='outer')
    df1['QUANTITY_FAILS'] = df1['QUANTITY_FAILS'].fillna(0)
    df1['volume'] = df1['volume'].fillna(0)
    df1['unadjustedVolume'] = df1['unadjustedVolume'].fillna(0)
    df1['vwap'] = df1['vwap'].fillna(0)
    df2 = get_short_data_QUANDL(symbol)
    df = pd.merge(df1,df2,on='Date',how='outer')
    df = df.fillna(0)
    return df

In [26]:
test_symbol = 'EURN'
symbol = test_symbol

In [27]:
test_df = return_ftd_data_symbol(symbol)
test_df

Unnamed: 0_level_0,SYMBOL,QUANTITY_FAILS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-11-02,EURN,218.0
2021-11-03,EURN,22839.0
2021-11-04,EURN,8802.0
2021-11-08,EURN,11629.0
2021-11-10,EURN,5162.0
2021-11-12,EURN,2431.0


In [28]:
test_df = get_time_series_data(symbol)
#test_df

path = Path('../FilesExport_DFs_with_TI_pkl/'+symbol+'_data_dict_with_technicals.pkl')
data_import = load_obj(path)

import_df = data_import[symbol].copy()
import_df.drop(columns={'bollinger_signal','dema_signal','adl_signal','rsi_signal'},inplace=True)
#import_df

df1 = test_df.copy()
df2 = import_df.copy()
df3 = pd.concat([df2,df1])
df3.tail(30)

Unnamed: 0_level_0,open,high,low,close,adjClose,volume,unadjustedVolume,change,changePercent,vwap,changeOverTime,QUANTITY_FAILS,ShortVolumeNSDQ,ShortExemptVolumeNSDQ,TotalVolumeNSDQ,ShortVolumeNYSE,ShortExemptVolumeNYSE,TotalVolumeNYSE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2021-10-04,9.76,9.9,9.71,9.87,9.87,3473227.0,3473227.0,0.11,1.127,9.82667,0.01127,183214.0,654856.0,100.0,1018604.0,9611.0,0.0,44574.0
2021-10-05,9.87,10.16,9.82,10.09,10.09,5960480.0,5960480.0,0.22,2.229,10.02333,0.02229,69306.0,1200230.0,608.0,2287748.0,25201.0,0.0,67805.0
2021-10-06,9.99,10.21,9.88,10.2,10.2,4157849.0,4157849.0,0.21,2.102,10.09667,0.02102,326493.0,658342.0,2520.0,2406455.0,17323.0,0.0,23493.0
2021-10-07,10.05,10.37,10.04,10.35,10.35,3184249.0,3184249.0,0.3,2.985,10.25333,0.02985,151234.0,859370.0,1835.0,1068318.0,14142.0,0.0,32616.0
2021-10-08,10.32,10.42,10.2,10.22,10.22,2842820.0,2842820.0,-0.1,-0.969,10.28,-0.00969,1144068.0,564089.0,1395.0,764442.0,10953.0,0.0,25228.0
2021-10-11,10.65,10.755,10.52,10.56,10.56,2261126.0,2261126.0,-0.09,-0.845,10.61167,-0.00845,0.0,237980.0,0.0,469524.0,13887.0,0.0,39337.0
2021-10-12,10.42,10.46,10.125,10.13,10.13,2063590.0,2063590.0,-0.29,-2.783,10.23833,-0.02783,423066.0,265702.0,255.0,466513.0,8223.0,0.0,34350.0
2021-10-13,10.11,10.185,10.03,10.16,10.16,1124282.0,1124282.0,0.05,0.495,10.125,0.00495,172870.0,130048.0,0.0,219210.0,4007.0,200.0,10697.0
2021-10-14,10.28,10.39,10.215,10.3,10.3,1886973.0,1886973.0,0.02,0.195,10.30167,0.00195,37403.0,247265.0,1500.0,389335.0,16774.0,2500.0,28465.0
2021-10-15,10.7,11.01,10.68,10.87,10.87,2551501.0,2551501.0,0.17,1.589,10.85333,0.01589,6943.0,216339.0,0.0,607604.0,10597.0,0.0,36018.0


In [29]:
# import pandas as pd
# import numpy as np
# import requests
# import json
import hvplot.pandas
#from dotenv import load_dotenv
from datetime import date
# import os
from scipy import stats
from numpy.lib.stride_tricks import as_strided
from numpy.lib import pad
#import pad
# import matplotlib.pyplot as plt
# %matplotlib inline

# from pathlib import Path

In [30]:
def rolling_spearman(seqa, seqb, window):
    stridea = seqa.values.strides[0]
    ssa = as_strided(seqa, shape=[len(seqa) - window + 1, window], strides=[stridea, stridea])
    strideb = seqa.values.strides[0]
    ssb = as_strided(seqb, shape=[len(seqb) - window + 1, window], strides =[strideb, strideb])
    ar = pd.DataFrame(ssa)
    br = pd.DataFrame(ssb)
    ar = ar.rank(1)
    br = br.rank(1)
    corrs = ar.corrwith(br, 1)
    return pad(corrs, (window - 1, 0), 'constant', constant_values=np.nan)

def use_csvs(ticker):

#     data = pd.read_csv("../FilesExportIndividualStockDFs_Big/"+ticker+"_combined_df.csv", index_col='Date', parse_dates=True)
# #     path = Path('../FilesExport_Finished/'+ticker+'_finished_df.pkl')
# #     data_import = load_obj(path)
# #     data = data_import['dataFrame'].copy()

    test_df = get_time_series_data(ticker)
    #test_df

    path = Path('../FilesExport_DFs_with_TI_pkl/'+ticker+'_data_dict_with_technicals.pkl')
    data_import = load_obj(path)

    import_df = data_import[symbol].copy()
    import_df.drop(columns={'bollinger_signal','dema_signal','adl_signal','rsi_signal'},inplace=True)
    #import_df

    df1 = test_df.copy()
    df2 = import_df.copy()
    data = pd.concat([df2,df1])

    return data.iloc[0:-1]



def bollinger_bands(dataframe,period=20):
    data = dataframe.copy()
    data['middle_band'] = data[['adjClose']].rolling(window=period).mean()
    data[str(period)+'_day_stdev'] = data[['adjClose']].rolling(window=period).std()
    data['upper_band'] = data['middle_band']+2*data[str(period)+'_day_stdev']
    data['lower_band'] = data['middle_band'] - 2*data[str(period)+'_day_stdev']
    data['spread'] = data['upper_band'] + data['lower_band']
    data['change_in_spread'] = data['spread']/data['spread'].shift(1)-1
    data[str(period)+"_return"] = data['adjClose']/data['adjClose'].shift(period)-1
    data['bollinger_signal'] = data['change_in_spread'].rank(ascending=False, pct=True)
    data.dropna()

    return data

def dema(dataframe, period1=10, period2=20):
    data = dataframe.copy()
    data[str(period1)+'ema1'] = dataframe[['adjClose']].ewm(span=period1, adjust=False).mean()
    data[str(period1)+'ema2'] = data[str(period1)+'ema1'].ewm(span=period1, adjust=False).mean()
    data[str(period1)+'dema'] = 2*data[str(period1)+'ema1'] - data[str(period1)+'ema2']
    data[str(period2)+'ema1'] = data[['adjClose']].ewm(span=period2, adjust=False).mean()
    data[str(period2)+'ema2'] = data[str(period2)+'ema1'].ewm(span=period2, adjust=False).mean()
    data[str(period2)+'dema'] = 2*data[str(period2)+'ema1'] - data[str(period2)+'ema2']
    data[str(period1)+"_return"] = data['adjClose']/data['adjClose'].shift(period1)-1
    data['spread'] = data[str(period1)+'dema'] - data[str(period2)+'dema']
    data['dema_signal'] = data['spread'].rank(ascending=True, pct=True)
    data = data.dropna()
    return data

def price_momentum(dataframe, smoothing1=0.0571, smoothing2=0.1, periods1=15, periods2=10):
    data = dataframe.copy()
    data['smoothing_factor'] = smoothing1
    data[str(periods1)+"average"] = data['changeOverTime'].rolling(window=periods1).mean()
    smoothing_factor_list = [data.iloc[periods1][str(periods1)+"average"]]
    data = data.dropna()
    i=1
    j=0
    while i < len(data[str(periods1)+"average"]):
        smoothing_factor = data.iloc[i]['changeOverTime']*data.iloc[i]['smoothing_factor'] + smoothing_factor_list[j]*(1-data.iloc[i]['smoothing_factor'])
        smoothing_factor_list.append(smoothing_factor)
        j+=1
        i+=1
    data['35d_custom_smoothing'] = smoothing_factor_list
    data['35d_custom_10'] = data['35d_custom_smoothing']*10
    data['smoothing_factor2'] = smoothing2
    data[str(periods2)+"average"] = data['35d_custom_10'].rolling(window=periods2).mean()
    data = data.dropna()
    smoothing_factor_list2 = [data.iloc[0][str(periods2)+"average"]]
    i=1
    j=0
    while i < len(data[str(periods2)+"average"]):
        smoothing_factor = (data.iloc[i]['35d_custom_10'] - smoothing_factor_list2[j])*data.iloc[i]['smoothing_factor2'] + smoothing_factor_list2[j]
        smoothing_factor_list2.append(smoothing_factor)
        j+=1
        i+=1
    data[str(periods2)+'d_custom_smoothing'] = smoothing_factor_list2
    data[str(periods2)+"_return"] = data['adjClose']/data['adjClose'].shift(periods2)-1
#         data['signal'] = np.where(data[str(periods2)+'d_custom_smoothing'] > data[str(periods2)+'d_custom_smoothing'].shift(1), 1.0, 0.0)
#         data = data.rename(columns={'signal':'price_mo'})
    return data

def get_ichimoku_cloud(dataframe, period1=4, period2=8, period3=15):

    #TODO generate signal, ichimoku works better in current market regime with shorter periods, being able to respond faster to events than a traditional version
    # The conversion crossing the base would be the signal

    data = dataframe.copy()
    data['conversion_line'] = data[['adjClose']].rolling(window=period1).mean()
    data['base_line'] = data[['adjClose']].rolling(window=period2).mean()
    data['senkou_spanA_line'] = (data['conversion_line']+data['base_line'])/2
    data['senkou_spanB_line'] = data[['adjClose']].rolling(window=period3).mean()
    data['lagging_span'] = data['adjClose'].shift(period2)
    data = data.dropna()

    return data

def accumulation_distribution_line(dataframe):

    ##TODO define periodicity and pass as arguments, use the mean as the signal generator, -1 is buy and and 1 is sell
    ##TODO need to add ability to ignore a -1 during a range of 1s

    data = dataframe.copy()
    data['money_flow_mult'] = round(((data['adjClose'] - data['low']) - (data['high'] - data['adjClose']))/(data['high'] - data['low']),2)
    data = data.dropna()
    data['money_flow_volume'] = data['money_flow_mult']*data['volume']
    money_flow_multiplier_list = list(data['money_flow_volume'].values)
    adl = [money_flow_multiplier_list[0]]
    i = 1
    while i < len(money_flow_multiplier_list):
        a_d_indicator = adl[i-1]+money_flow_multiplier_list[i]
        adl.append(a_d_indicator)
        i+=1
    data['adl'] = adl
    data['adl_change'] = data['adl']/data['adl'].shift(1)-1
    negative_change_count = [0]*9
    i = 0
    counter = 0
    while i < len(data)-9:
        j=0
        while j < 9:
            if data.iloc[j+i]['adl_change'] <0:
                counter+=1
            if j %19 == 0:
                negative_change_count.append(counter)
                counter = 0
            j+=1
        i+=1
    data['negative_change_counter'] = negative_change_count
    data['9_day_return'] = data['adjClose']/data['adjClose'].shift(9)-1
    data['adl_signal'] = rolling_spearman(data['adl'], data['9_day_return'], 9)

    return data

def rsi(dataframe, periods=14):
    data = dataframe.copy()
    data['gains'] = np.where(data['changeOverTime']>0, data['changeOverTime'], 0)
    data['losses'] = np.where(data['changeOverTime']<0, np.absolute(data['changeOverTime']), 0)
    data['average_gain'] = data['gains'].rolling(window=periods).mean()
    data['average_loss'] = data['losses'].rolling(window=periods).mean()
    data['rs'] = data['average_gain']/data['average_loss']
    data['rsi'] = (100 - 100/(1+data['rs']))
    data['rsi_signal'] = data['rsi'].rank(ascending=True, pct=True)

    return data

def get_all_indicators(ticker):
    dataframe = use_csvs(ticker)
    bb = bollinger_bands(dataframe)
    DEMA = dema(dataframe)
    ADL = accumulation_distribution_line(dataframe)
    RSI = rsi(dataframe)

    dataframe['bollinger_signal'] = bb['bollinger_signal']
    dataframe['dema_signal'] = DEMA['dema_signal']
    dataframe['adl_signal'] = ADL['adl_signal']
    dataframe['rsi_signal'] = RSI['rsi_signal']
    dataframe = dataframe.dropna()
    return dataframe

In [31]:
def shift_dataset(dataframe, period=15):
    data= dataframe.copy()
    data = data.dropna()
    shifted_dataframe = data.drop(['close', 'adjClose'], axis=1).shift(-period)
    shifted_dataframe = shifted_dataframe.dropna()
    data_close = data[['close']].iloc[period:]
    shifted_dataframe.loc[:,'close'] = data_close.values
    
    return shifted_dataframe

In [32]:
def do_shifted_indicators(symbol):
    df = get_all_indicators(symbol)
    shift_df = shift_dataset(df)
    
    return shift_df

In [33]:
# ## 


# def rolling_spearman(seqa, seqb, window):
#     stridea = seqa.values.strides[0]
#     ssa = as_strided(seqa, shape=[len(seqa) - window + 1, window], strides=[stridea, stridea])
#     strideb = seqa.values.strides[0]
#     ssb = as_strided(seqb, shape=[len(seqb) - window + 1, window], strides =[strideb, strideb])
#     ar = pd.DataFrame(ssa)
#     br = pd.DataFrame(ssb)
#     ar = ar.rank(1)
#     br = br.rank(1)
#     corrs = ar.corrwith(br, 1)
#     return pad(corrs, (window - 1, 0), 'constant', constant_values=np.nan)

# # def create_price_df(self,ticker, period='daily'):

# #     if period =='hourly':
# #         api_url = 'https://fmpcloud.io/api/v3/historical-chart/1hour'
# #     else:
# #         api_url = 'https://fmpcloud.io/api/v3/historical-price-full'

# #     ticker_df = json.loads(requests.get(f"{api_url}/{ticker}?apikey={fmp_api}").content)['historical']
# #     data = pd.DataFrame(ticker_df).set_index('date')[::-1]
# #     data['Date'] = data.index
# #     data.index = data.index.astype('datetime64[ns]')

# #     return data

# def use_csvs(ticker):

#     #data = pd.read_csv("../FilesExportIndividualStockDFs_Big/"+ticker+"_combined_df.csv", index_col='Date', parse_dates=True)
# #     path = Path('../FilesExport_Finished/'+ticker+'_finished_df.pkl')
# #     data_import = load_obj(path)
# #     data = data_import['dataFrame'].copy()

#     test_df = get_time_series_data(ticker)
#     #test_df

#     path = Path('../FilesExport_DFs_with_TI_pkl/'+ticker+'_data_dict_with_technicals.pkl')
#     data_import = load_obj(path)

#     import_df = data_import[symbol].copy()
#     import_df.drop(columns={'bollinger_signal','dema_signal','adl_signal','rsi_signal'},inplace=True)
#     #import_df

#     df1 = test_df.copy()
#     df2 = import_df.copy()
#     data = pd.concat([df2,df1])

#     return data



# def bollinger_bands(dataframe,period=20):
#     data = dataframe.copy()
#     data['middle_band'] = data[['adjClose']].rolling(window=period).mean()
#     data[str(period)+'_day_stdev'] = data[['adjClose']].rolling(window=period).std()
#     data['upper_band'] = data['middle_band']+2*data[str(period)+'_day_stdev']
#     data['lower_band'] = data['middle_band'] - 2*data[str(period)+'_day_stdev']
#     data['spread'] = data['upper_band'] + data['lower_band']
#     data['change_in_spread'] = data['spread']/data['spread'].shift(1)-1
#     data[str(period)+"_return"] = data['adjClose']/data['adjClose'].shift(period)-1
#     data['bollinger_signal'] = data['change_in_spread'].rank(ascending=False, pct=True)
#     data.dropna()

#     return data

# def dema(dataframe, period1=10, period2=20):
#     data = dataframe.copy()
#     data[str(period1)+'ema1'] = dataframe[['adjClose']].ewm(span=period1, adjust=False).mean()
#     data[str(period1)+'ema2'] = data[str(period1)+'ema1'].ewm(span=period1, adjust=False).mean()
#     data[str(period1)+'dema'] = 2*data[str(period1)+'ema1'] - data[str(period1)+'ema2']
#     data[str(period2)+'ema1'] = data[['adjClose']].ewm(span=period2, adjust=False).mean()
#     data[str(period2)+'ema2'] = data[str(period2)+'ema1'].ewm(span=period2, adjust=False).mean()
#     data[str(period2)+'dema'] = 2*data[str(period2)+'ema1'] - data[str(period2)+'ema2']
#     data[str(period1)+"_return"] = data['adjClose']/data['adjClose'].shift(period1)-1
#     data['spread'] = data[str(period1)+'dema'] - data[str(period2)+'dema']
#     data['dema_signal'] = data['spread'].rank(ascending=True, pct=True)
#     data = data.dropna()
#     return data

# def price_momentum(dataframe, smoothing1=0.0571, smoothing2=0.1, periods1=15, periods2=10):
#     data = dataframe.copy()
#     data['smoothing_factor'] = smoothing1
#     data[str(periods1)+"average"] = data['changeOverTime'].rolling(window=periods1).mean()
#     smoothing_factor_list = [data.iloc[periods1][str(periods1)+"average"]]
#     data = data.dropna()
#     i=1
#     j=0
#     while i < len(data[str(periods1)+"average"]):
#         smoothing_factor = data.iloc[i]['changeOverTime']*data.iloc[i]['smoothing_factor'] + smoothing_factor_list[j]*(1-data.iloc[i]['smoothing_factor'])
#         smoothing_factor_list.append(smoothing_factor)
#         j+=1
#         i+=1
#     data['35d_custom_smoothing'] = smoothing_factor_list
#     data['35d_custom_10'] = data['35d_custom_smoothing']*10
#     data['smoothing_factor2'] = smoothing2
#     data[str(periods2)+"average"] = data['35d_custom_10'].rolling(window=periods2).mean()
#     data = data.dropna()
#     smoothing_factor_list2 = [data.iloc[0][str(periods2)+"average"]]
#     i=1
#     j=0
#     while i < len(data[str(periods2)+"average"]):
#         smoothing_factor = (data.iloc[i]['35d_custom_10'] - smoothing_factor_list2[j])*data.iloc[i]['smoothing_factor2'] + smoothing_factor_list2[j]
#         smoothing_factor_list2.append(smoothing_factor)
#         j+=1
#         i+=1
#     data[str(periods2)+'d_custom_smoothing'] = smoothing_factor_list2
#     data[str(periods2)+"_return"] = data['adjClose']/data['adjClose'].shift(periods2)-1
# #         data['signal'] = np.where(data[str(periods2)+'d_custom_smoothing'] > data[str(periods2)+'d_custom_smoothing'].shift(1), 1.0, 0.0)
# #         data = data.rename(columns={'signal':'price_mo'})
#     return data

# def get_ichimoku_cloud(dataframe, period1=4, period2=8, period3=15):

#     #TODO generate signal, ichimoku works better in current market regime with shorter periods, being able to respond faster to events than a traditional version
#     # The conversion crossing the base would be the signal

#     data = dataframe.copy()
#     data['conversion_line'] = data[['adjClose']].rolling(window=period1).mean()
#     data['base_line'] = data[['adjClose']].rolling(window=period2).mean()
#     data['senkou_spanA_line'] = (data['conversion_line']+data['base_line'])/2
#     data['senkou_spanB_line'] = data[['adjClose']].rolling(window=period3).mean()
#     data['lagging_span'] = data['adjClose'].shift(period2)
#     data = data.dropna()

#     return data

# def accumulation_distribution_line(dataframe):

#     ##TODO define periodicity and pass as arguments, use the mean as the signal generator, -1 is buy and and 1 is sell
#     ##TODO need to add ability to ignore a -1 during a range of 1s

#     data = dataframe.copy()
#     data['money_flow_mult'] = round(((data['adjClose'] - data['low']) - (data['high'] - data['adjClose']))/(data['high'] - data['low']),2)
#     data = data.dropna()
#     data['money_flow_volume'] = data['money_flow_mult']*data['volume']
#     money_flow_multiplier_list = list(data['money_flow_volume'].values)
#     adl = [money_flow_multiplier_list[0]]
#     i = 1
#     while i < len(money_flow_multiplier_list):
#         a_d_indicator = adl[i-1]+money_flow_multiplier_list[i]
#         adl.append(a_d_indicator)
#         i+=1
#     data['adl'] = adl
#     data['adl_change'] = data['adl']/data['adl'].shift(1)-1
#     negative_change_count = [0]*9
#     i = 0
#     counter = 0
#     while i < len(data)-9:
#         j=0
#         while j < 9:
#             if data.iloc[j+i]['adl_change'] <0:
#                 counter+=1
#             if j %19 == 0:
#                 negative_change_count.append(counter)
#                 counter = 0
#             j+=1
#         i+=1
#     data['negative_change_counter'] = negative_change_count
#     data['9_day_return'] = data['adjClose']/data['adjClose'].shift(9)-1
#     data['adl_signal'] = rolling_spearman(data['adl'], data['9_day_return'], 9)

#     return data

# def rsi(dataframe, periods=14):
#     data = dataframe.copy()
#     data['gains'] = np.where(data['changeOverTime']>0, data['changeOverTime'], 0)
#     data['losses'] = np.where(data['changeOverTime']<0, np.absolute(data['changeOverTime']), 0)
#     data['average_gain'] = data['gains'].rolling(window=periods).mean()
#     data['average_loss'] = data['losses'].rolling(window=periods).mean()
#     data['rs'] = data['average_gain']/data['average_loss']
#     data['rsi'] = (100 - 100/(1+data['rs']))
#     data['rsi_signal'] = data['rsi'].rank(ascending=True, pct=True)

#     return data


# # def get_ratings(self):

# #     ratings = json.loads(requests.get(f"https://fmpcloud.io/api/v3/historical-rating/{self.ticker}?limit=100&apikey={fmp_api}").content)
# #     ratings_df = pd.DataFrame(ratings)
# #     ratings_df['average_rating'] = (ratings_df['ratingScore']+ratings_df['ratingDetailsDCFScore']+ratings_df['ratingDetailsROEScore']+ratings_df['ratingDetailsROAScore'] \
# #                                     +ratings_df['ratingDetailsDEScore'] + ratings_df['ratingDetailsPEScore']+ ratings_df['ratingDetailsPBScore'])/7

# #     return ratings_df

# # def get_stock_market_performances(self, dataframe):
# #     limit = len(dataframe)
# #     data = json.loads(requests.get(f"https://fmpcloud.io/api/v3/historical-sectors-performance?limit=525&apikey={fmp_api}").content)
# #     sector_df = pd.DataFrame(data).set_index('date')
# #     sector_df_clean = sector_df[::-1]
# #     s_p500 = json.loads(requests.get(f"https://fmpcloud.io/api/v3/historical-price-full/^SP500TR?from="+sector_df_clean.index[0]+"&to="+sector_df_clean.index[-1]+"&apikey="+fmp_api).content)['historical']
# #     sp_df = pd.DataFrame(s_p500)
# #     sp_df_clean = sp_df[::-1]

# #     return sp_df_clean

# def get_all_indicators(ticker):
#     dataframe = use_csvs(ticker)
#     bb = bollinger_bands(dataframe)
#     DEMA = dema(dataframe)
#     ADL = accumulation_distribution_line(dataframe)
#     RSI = rsi(dataframe)

#     dataframe['bollinger_signal'] = bb['bollinger_signal']
#     dataframe['dema_signal'] = DEMA['dema_signal']
#     dataframe['adl_signal'] = ADL['adl_signal']
#     dataframe['rsi_signal'] = RSI['rsi_signal']
#     dataframe.dropna(inplace=True)
#     return dataframe

# # def merge_data(self):
# #     ticker_list = self.stock_list
# #     for stock in ticker_list:
# #         stock_csv = self.use_csvs(stock)
# #         stock_csv['Date'] = stock_csv.index.astype("string")
# #         stock_csv = stock_csv.iloc[:-1]
# #         stock_csv.index.names =[""]
# #         stock_indicators = self.get_all_indicators(stock)
# #         stock_signals = stock_indicators[['Date','bollinger_signal','dema_signal', 'adl_signal', 'rsi_signal']]
# #         stock_signals.loc[:,'Date'] = stock_signals['Date'].astype('string')
# #         stock_signals.index.names =[""]
# #         merged_data = pd.merge(left = stock_csv, right=stock_signals, on=['Date']).set_index('Date')
# #         merged_data = merged_data.dropna()
# #         writer = pd.ExcelWriter("NewCsvs/"+stock+"_all_indicator_dfs.xlsx", engine="xlsxwriter")

# #         merged_data.to_excel(writer, sheet_name=stock+"_data")

# #         writer.save()

# #     return print("All files successfully saved")

In [34]:
test_df = do_shifted_indicators(symbol)
test_df

Unnamed: 0_level_0,open,high,low,volume,unadjustedVolume,change,changePercent,vwap,changeOverTime,QUANTITY_FAILS,...,ShortExemptVolumeNSDQ,TotalVolumeNSDQ,ShortVolumeNYSE,ShortExemptVolumeNYSE,TotalVolumeNYSE,bollinger_signal,dema_signal,adl_signal,rsi_signal,close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-03-02,10.14,10.1600,9.875,848900.0,848900.0,-0.14,-1.381,10.01167,-0.01381,59.0,...,100.0,188530.0,5454.0,0.0,11053.0,0.791232,0.658202,0.000000,0.270083,10.00
2016-03-03,9.90,10.0800,9.820,612000.0,612000.0,0.16,1.616,9.98667,0.01616,0.0,...,0.0,209426.0,10497.0,0.0,29942.0,0.668058,0.602608,0.466667,0.379501,10.06
2016-03-04,10.03,10.1000,9.860,491400.0,491400.0,-0.08,-0.798,9.97000,-0.00798,420.0,...,0.0,191678.0,1900.0,0.0,7600.0,0.329854,0.512011,0.816667,0.405817,9.95
2016-03-07,9.81,10.0400,9.770,800200.0,800200.0,0.21,2.141,9.94333,0.02141,1300.0,...,0.0,292340.0,6500.0,0.0,56900.0,0.518789,0.490048,0.966667,0.657202,10.02
2016-03-08,10.18,10.5100,10.150,1346700.0,1346700.0,0.15,1.473,10.33000,0.01473,604.0,...,0.0,472299.0,14720.0,0.0,38909.0,0.530271,0.592313,0.916667,0.740305,10.33
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-10-15,10.10,10.1300,9.640,2139108.0,2139108.0,-0.21,-2.079,9.88667,-0.02079,0.0,...,1100.0,428503.0,19521.0,803.0,35399.0,0.664579,0.014413,0.250000,0.104571,9.89
2021-10-18,9.96,10.1700,9.930,1702870.0,1702870.0,0.02,0.201,10.02667,0.00201,11629.0,...,987.0,509347.0,14239.0,0.0,26249.0,0.759221,0.008922,0.166667,0.087258,9.98
2021-10-19,10.15,10.1800,9.985,1197025.0,1197025.0,0.01,0.099,10.10833,0.00099,0.0,...,0.0,333253.0,5537.0,0.0,14449.0,0.501044,0.011668,0.400000,0.097645,10.16
2021-10-20,10.20,10.2300,10.060,1462895.0,1462895.0,-0.09,-0.882,10.13333,-0.00882,5162.0,...,2000.0,394940.0,17243.0,0.0,34638.0,0.541406,0.015100,0.300000,0.105263,10.11


In [35]:
## Load machine learning model and test with sliced data
# Initial imports
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import model_from_json
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

%matplotlib inline

In [36]:
symbol = test_symbol
symbol

'EURN'

In [38]:
## Load Data

# path = Path('../FilesExport_DFs_with_TI_pkl/'+symbol+'_data_dict_with_technicals.pkl')
# data_import = load_obj(path)

df = test_df #data_import[symbol].copy()

## Set X and y data 
X = df.drop(columns={'close'}).values
y = df['close'].values

scaler = StandardScaler().fit(X)
X = scaler.transform(X)

# load json and create model
file_path = Path('../Model_Data_shift_low_loss/'+symbol+'_model_data.json')
with open(file_path, "r") as json_file:
    model_json = json_file.read()
loaded_model = model_from_json(model_json)

# load weights into new model
file_path = Path('../Model_Data_shift_low_loss/'+symbol+'_model_weights.h5')
loaded_model.load_weights(file_path)

## Compile loaded model and print mse score 
loaded_model.compile(loss="mean_squared_error", optimizer="adam", metrics=["mse"])
score = loaded_model.evaluate(X, y, verbose=0)
print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))
print()

## take dataframe copy and compare predicted values to actual values 
df2 = df.copy()
df2['predicted'] = loaded_model.predict(X)
df_pred = df2[['close','predicted']]
df_pred

mse: 1.64%



Unnamed: 0_level_0,close,predicted
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-03-02,10.00,10.010026
2016-03-03,10.06,10.063305
2016-03-04,9.95,9.966858
2016-03-07,10.02,10.017332
2016-03-08,10.33,10.346226
...,...,...
2021-10-15,9.89,9.942487
2021-10-18,9.98,10.007347
2021-10-19,10.16,10.172587
2021-10-20,10.11,10.129862


In [39]:
df_pred.tail(15)

Unnamed: 0_level_0,close,predicted
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-10-01,10.99,10.992914
2021-10-04,10.85,10.868191
2021-10-05,10.64,10.663757
2021-10-06,10.74,10.744451
2021-10-07,10.69,10.710335
2021-10-08,10.55,10.579581
2021-10-11,10.6,10.612307
2021-10-12,10.53,10.555122
2021-10-13,10.64,10.652887
2021-10-14,10.34,10.352922
