In [1]:
## Try shifting close price data, then run neural networks in a way that can predict future price movements

In [2]:
import pandas as pd

import glob

from pathlib import Path
import csv

import glob

In [3]:
# Machine-learning specific imports
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import model_from_json

import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler

%matplotlib inline

In [4]:
## API Imports 
import os
import requests
import json

from dotenv import load_dotenv
load_dotenv()

True

In [5]:
## Load pickle for exports and imports of data  
import pickle 
def load_obj(path):
    with open(path, 'rb') as f:
        return pickle.load(f)
    
def save_obj(obj, path ):
    with open(path, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

In [6]:
## FMP Constants 
fmpbase_urlv3 = 'https://fmpcloud.io/api/v3/'
fmpbase_urlv4 = 'https://fmpcloud.io/api/v4/'
api_key = os.getenv("FMP_CLOUD_API_KEY")

start_date = '2022-01-01'
end_date = '2022-01-31'

## FMP Functions 
def get_FMP_historical_data(symbol, startDate=start_date, endDate=end_date, apiKey=api_key):
    url_hist_price = fmpbase_urlv3+'historical-price-full/'
    url_hist_query_with_date = url_hist_price+symbol+'?from='+startDate+'&to='+endDate+'&apikey='+apiKey
    resp_data = requests.get(url_hist_query_with_date)
    json_ = resp_data.json()
    data = json_['historical']
    df = pd.DataFrame(data)
    df.rename(columns={'date':'Date'},inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.reindex(index=df.index[::-1]) ## Reverse the DataFrame 
    df.set_index('Date',inplace=True)
    df.drop(columns='label',inplace=True)
    return df

In [7]:
# ## Import symbol list 
# path = Path('../Resources/06_01_ML_symbol_success_list.pkl')
# symbol_list = load_obj(path)
# len(symbol_list)

In [8]:
def prepare_data(symbol, n_days):
    path = Path('../FilesExport_Complete_DFs_TI_noShift/'+symbol+'_TI_DF_no_shift.pkl')
    data = load_obj(path)
    df = data[symbol]
    
    df_close = df[['close']]
    df_close = df_close.reset_index().rename(columns={"Date": "Close_Date"})
    
    features_df = df.reset_index().drop(columns=['close','adjClose'])
    
    new_close_df = df_close.iloc[n_days: , :].reset_index(drop=True)
    
    api_df = get_FMP_historical_data(symbol)
    new_data = api_df[['close']]
    new_data = new_data.reset_index().rename(columns={"Date": "Close_Date"})
    new_data = new_data.iloc[0:n_days]
    
    new_close_df = new_close_df.append(new_data, ignore_index=True)
    
    return features_df, new_close_df

In [9]:
## Test function 
symbol = 'GME'

X , y = prepare_data('GME',n_days=5)

In [11]:
y

Unnamed: 0,Close_Date,close
0,2016-02-09,27.120001
1,2016-02-10,26.650000
2,2016-02-11,26.670000
3,2016-02-12,26.980000
4,2016-02-16,28.000000
...,...,...
1486,2022-01-03,152.840000
1487,2022-01-04,148.910000
1488,2022-01-05,129.370000
1489,2022-01-06,131.030000


In [6]:
# ## Test import code for the dataframe 

symbol = 'GME'
path = Path('../FilesExport_Complete_DFs_TI_noShift/'+symbol+'_TI_DF_no_shift.pkl')

data = load_obj(path)
import_df = data[symbol]
df = import_df.copy()
df

Unnamed: 0_level_0,open,high,low,close,adjClose,volume,unadjustedVolume,change,changePercent,vwap,...,ShortVolumeNSDQ,ShortExemptVolumeNSDQ,TotalVolumeNSDQ,ShortVolumeNYSE,ShortExemptVolumeNYSE,TotalVolumeNYSE,bollinger_signal,dema_signal,adl_signal,rsi_signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-02-02,26.629999,26.709999,25.590000,25.920000,20.004274,1840600.0,1840600.0,-0.71,-2.666,26.07333,...,117857.0,152.0,277309.0,11326.0,0.0,34626.0,0.692824,0.607545,-0.933333,0.180908
2016-02-03,26.110001,26.280001,24.750000,26.170000,20.197214,2223400.0,2223400.0,0.06,0.230,25.73333,...,212359.0,14.0,523222.0,9575.0,0.0,26571.0,0.714956,0.657842,-0.933333,0.251669
2016-02-04,26.059999,27.190001,25.379999,27.010000,20.845501,2000200.0,2000200.0,0.95,3.645,26.52667,...,156570.0,0.0,325811.0,17331.0,0.0,35015.0,0.591549,0.767042,-0.950000,0.666889
2016-02-05,27.010000,27.180000,26.430000,26.830000,20.706583,1545800.0,1545800.0,-0.18,-0.666,26.81333,...,115038.0,0.0,215572.0,6611.0,0.0,14960.0,0.617706,0.796823,-0.950000,0.532710
2016-02-08,26.549999,27.959999,26.500000,27.889999,21.524656,2944800.0,2944800.0,1.34,5.047,27.45000,...,257398.0,30.0,496626.0,27972.0,0.0,66574.0,0.491616,0.857710,-0.933333,0.795060
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-27,152.000000,152.620000,140.000000,148.310000,148.310000,1613729.0,1613729.0,-3.69,-2.428,146.97667,...,227591.0,593.0,539793.0,56501.0,139.0,159382.0,0.951040,0.119788,0.716667,0.362483
2021-12-28,147.500000,157.410000,146.410000,146.460000,146.460000,1320374.0,1320374.0,-1.04,-0.705,150.09333,...,185176.0,0.0,424993.0,50426.0,0.0,126633.0,0.963783,0.165453,0.566667,0.321095
2021-12-29,147.850000,155.490000,142.140000,153.930000,153.930000,2029352.0,2029352.0,6.08,4.112,150.52000,...,280068.0,2383.0,627874.0,112580.0,798.0,295581.0,0.931590,0.930510,0.566667,0.421896
2021-12-30,151.000000,160.000000,150.000000,155.330000,155.330000,1556076.0,1556076.0,4.33,2.868,155.11000,...,172058.0,329.0,463841.0,41010.0,0.0,161071.0,0.840376,0.944408,0.533333,0.593458


In [41]:
df_close = df[['close']]
df_close = df_close.reset_index().rename(columns={"Date": "Close_Date"})
df_close

Unnamed: 0,Close_Date,close
0,2016-02-02,25.920000
1,2016-02-03,26.170000
2,2016-02-04,27.010000
3,2016-02-05,26.830000
4,2016-02-08,27.889999
...,...,...
1486,2021-12-27,148.310000
1487,2021-12-28,146.460000
1488,2021-12-29,153.930000
1489,2021-12-30,155.330000


In [42]:
features_df = df.copy()
features_df = features_df.drop(columns=['close','adjClose'])
features_df.reset_index()  

Unnamed: 0,Date,open,high,low,volume,unadjustedVolume,change,changePercent,vwap,changeOverTime,...,ShortVolumeNSDQ,ShortExemptVolumeNSDQ,TotalVolumeNSDQ,ShortVolumeNYSE,ShortExemptVolumeNYSE,TotalVolumeNYSE,bollinger_signal,dema_signal,adl_signal,rsi_signal
0,2016-02-02,26.629999,26.709999,25.590000,1840600.0,1840600.0,-0.71,-2.666,26.07333,-0.02666,...,117857.0,152.0,277309.0,11326.0,0.0,34626.0,0.692824,0.607545,-0.933333,0.180908
1,2016-02-03,26.110001,26.280001,24.750000,2223400.0,2223400.0,0.06,0.230,25.73333,0.00230,...,212359.0,14.0,523222.0,9575.0,0.0,26571.0,0.714956,0.657842,-0.933333,0.251669
2,2016-02-04,26.059999,27.190001,25.379999,2000200.0,2000200.0,0.95,3.645,26.52667,0.03645,...,156570.0,0.0,325811.0,17331.0,0.0,35015.0,0.591549,0.767042,-0.950000,0.666889
3,2016-02-05,27.010000,27.180000,26.430000,1545800.0,1545800.0,-0.18,-0.666,26.81333,-0.00666,...,115038.0,0.0,215572.0,6611.0,0.0,14960.0,0.617706,0.796823,-0.950000,0.532710
4,2016-02-08,26.549999,27.959999,26.500000,2944800.0,2944800.0,1.34,5.047,27.45000,0.05047,...,257398.0,30.0,496626.0,27972.0,0.0,66574.0,0.491616,0.857710,-0.933333,0.795060
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1486,2021-12-27,152.000000,152.620000,140.000000,1613729.0,1613729.0,-3.69,-2.428,146.97667,-0.02428,...,227591.0,593.0,539793.0,56501.0,139.0,159382.0,0.951040,0.119788,0.716667,0.362483
1487,2021-12-28,147.500000,157.410000,146.410000,1320374.0,1320374.0,-1.04,-0.705,150.09333,-0.00705,...,185176.0,0.0,424993.0,50426.0,0.0,126633.0,0.963783,0.165453,0.566667,0.321095
1488,2021-12-29,147.850000,155.490000,142.140000,2029352.0,2029352.0,6.08,4.112,150.52000,0.04112,...,280068.0,2383.0,627874.0,112580.0,798.0,295581.0,0.931590,0.930510,0.566667,0.421896
1489,2021-12-30,151.000000,160.000000,150.000000,1556076.0,1556076.0,4.33,2.868,155.11000,0.02868,...,172058.0,329.0,463841.0,41010.0,0.0,161071.0,0.840376,0.944408,0.533333,0.593458


In [43]:
# df.to_csv('../Resources/dataframe.csv')

In [48]:
## Need to cut "n" of rows from the top of df_close , equal to amount of days trying to predict. 
## df_close then needs to be updated with same amount of days removed, in order to continue to use entire 
def days_change(df,n_days):
    df = df.iloc[n_days: , :]
    
    return df.reset_index(drop=True)

d_c = lambda df, n : df.iloc[n: ,:]

In [49]:
n_days = 5
new_close_df = days_change(df_close,n_days)
new_close_df

Unnamed: 0,Close_Date,close
0,2016-02-09,27.120001
1,2016-02-10,26.650000
2,2016-02-11,26.670000
3,2016-02-12,26.980000
4,2016-02-16,28.000000
...,...,...
1481,2021-12-27,148.310000
1482,2021-12-28,146.460000
1483,2021-12-29,153.930000
1484,2021-12-30,155.330000


In [32]:
## Now add new close prices from days removed from data (this dataset ends at Dec 31)

## FMP Constants 
fmpbase_urlv3 = 'https://fmpcloud.io/api/v3/'
fmpbase_urlv4 = 'https://fmpcloud.io/api/v4/'
api_key = os.getenv("FMP_CLOUD_API_KEY")

start_date = '2022-01-01'
end_date = '2022-01-31'

## FMP Functions 
def get_FMP_historical_data(symbol, startDate=start_date, endDate=end_date, apiKey=api_key):
    url_hist_price = fmpbase_urlv3+'historical-price-full/'
    url_hist_query_with_date = url_hist_price+symbol+'?from='+startDate+'&to='+endDate+'&apikey='+apiKey
    resp_data = requests.get(url_hist_query_with_date)
    json_ = resp_data.json()
    data = json_['historical']
    df = pd.DataFrame(data)
    df.rename(columns={'date':'Date'},inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.reindex(index=df.index[::-1]) ## Reverse the DataFrame 
    df.set_index('Date',inplace=True)
    df.drop(columns='label',inplace=True)
    return df

In [50]:
ml_close_df = new_close_df.copy()
api_df = get_FMP_historical_data(symbol)
new_data = api_df[['close']]
new_data = new_data.reset_index().rename(columns={"Date": "Close_Date"})
new_data = new_data.iloc[0:n_days]
new_data

Unnamed: 0,Close_Date,close
0,2022-01-03,152.84
1,2022-01-04,148.91
2,2022-01-05,129.37
3,2022-01-06,131.03
4,2022-01-07,140.62


In [51]:
ml_close_df = ml_close_df.append(new_data, ignore_index=True)
ml_close_df ## Success 

Unnamed: 0,Close_Date,close
0,2016-02-09,27.120001
1,2016-02-10,26.650000
2,2016-02-11,26.670000
3,2016-02-12,26.980000
4,2016-02-16,28.000000
...,...,...
1486,2022-01-03,152.840000
1487,2022-01-04,148.910000
1488,2022-01-05,129.370000
1489,2022-01-06,131.030000


In [None]:
def update_close_data(df,n_days):
    df = days_change(df,n_days):
    