In [1]:
import pandas as pd

from pathlib import Path
import csv

import os
import requests
import json

import quandl

from dotenv import load_dotenv
load_dotenv()

True

In [2]:
# import sys
# !{sys.executable} -m pip install quandl

In [3]:
## Constants 

## Set start date variable - dataframes will be created starting from this date
start_date = '2022-01-01'   ## Use existing dataframes, and add new data to them 
end_date = '2022-01-31'     ## Use today's date

#default_date_range = '71m' ## Default Range for IEX functions - don't need more at the moment

In [4]:
## QUANDL/NASDAQ 
nsdq_api_key = os.environ.get('NASDAQ_API_KEY')
base_url_nsdq = 'https://data.nasdaq.com/api/v3/datasets/FINRA/'

In [5]:
## Use pickle module to import and export and save files
import pickle
def load_obj(path):
    with open(path, 'rb') as f:
        return pickle.load(f)
def save_obj(obj, path ):
    with open(path, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

In [6]:
## Get Short "Interest" Data from Quandl 
## QUANDL/NASDAQ 
nsdq_api_key = os.environ.get('NASDAQ_API_KEY')
base_url_nsdq = 'https://data.nasdaq.com/api/v3/datasets/FINRA/'

def get_short_data_QUANDL(symbol):
    string_nsdq = "FINRA/FNSQ_"+symbol
    string_nyse = "FINRA/FNYX_"+symbol
    
    df1 = quandl.get(string_nsdq,start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## Nasdaq
    df2 = quandl.get(string_nyse,start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## NYSE

    df1 = df1.rename(columns={'ShortVolume':'ShortVolumeNSDQ','TotalVolume':'TotalVolumeNSDQ'})
    #df1 = df1.drop(columns={'ShortExemptVolume'})
    df1 = df1.rename(columns={'ShortExemptVolume':'ShortExemptVolumeNSDQ'})

    df2 = df2.rename(columns={'ShortVolume':'ShortVolumeNYSE','TotalVolume':'TotalVolumeNYSE'})
    #df2 = df2.drop(columns={'ShortExemptVolume'})
    df2 = df2.rename(columns={'ShortExemptVolume':'ShortExemptVolumeNYSE'})

    df3 = pd.merge(df1,df2,on='Date',how='outer')
    #df3 = df3.fillna(0)
    
    return df3


## Return FTD Data from SEC FTD files using a Stock's CUSIP number to sort 
def return_ftd_data_cusip(cusip_number):
    df = ftd_df.copy()
    df.set_index("CUSIP",inplace=True)
    df = df.loc[cusip_number]
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.set_index('Date')
    return df

## Return the CUSIP symbol from the symbol_df symbol list 
def return_CUSIP_from_symbol(symbol):
    df = symbol_df.copy()
    df.set_index('SYMBOL',inplace=True)
    cusip_variable = df.loc[symbol]
    cusip_variable = cusip_variable['CUSIP']
    return cusip_variable

def return_ftd_data_symbol(symbol):
    cusip_number = return_CUSIP_from_symbol(symbol)
    df = return_ftd_data_cusip(cusip_number)
    return df

In [7]:
## FMP Constants 
fmpbase_urlv3 = 'https://fmpcloud.io/api/v3/'
fmpbase_urlv4 = 'https://fmpcloud.io/api/v4/'
api_key = os.getenv("FMP_CLOUD_API_KEY")

## FMP Functions 
def get_FMP_historical_data(symbol, startDate=start_date, endDate=end_date, apiKey=api_key):
    url_hist_price = fmpbase_urlv3+'historical-price-full/'
    url_hist_query_with_date = url_hist_price+symbol+'?from='+startDate+'&to='+endDate+'&apikey='+apiKey
    resp_data = requests.get(url_hist_query_with_date)
    json_ = resp_data.json()
    data = json_['historical']
    df = pd.DataFrame(data)
    df.rename(columns={'date':'Date'},inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.reindex(index=df.index[::-1]) ## Reverse the DataFrame 
    df.set_index('Date',inplace=True)
    df.drop(columns='label',inplace=True)
    return df

def get_float_data_FMP(symbol):
    url_float_shares = fmpbase_urlv4+'shares_float?symbol='
    url_query_float_data = url_float_shares+symbol+'&apikey='+api_key
    resp_data = requests.get(url_query_float_data)
    #df = pd.DataFrame(resp_data.json())
    json_ = resp_data.json()
    return json_[0]

def get_company_profile_FMP_json(symbol):
    ## https://fmpcloud.io/api/v3/profile/AAPL?apikey='yourkeyhere'
    url_company_profile_url = fmpbase_urlv3+'profile/'+symbol+'?apikey='+api_key
    resp_data = requests.get(url_company_profile_url)
    json_response = resp_data.json()
    return json_response[0]

# def save_and_export_raw_df_csv(data, symbol, path='None'):
#     ## Can set custom path (useful for testing) otherwise will default to below path
#     if path=='None':
#         path = ('../FilesExportIndividualStockDFs_Big/'+symbol+'_combined_df.csv')
#     data.to_csv(path)
    
def save_and_export_raw_df_pkl(data, symbol, path='None'):
    ## Can set custom path (useful for testing) otherwise will default to below path
    if path=='None':
        path = ('../FilesExport_TimeSeries_DFs/'+symbol+'_combined_df.pkl')
    save_obj(data,path)

In [8]:
def get_time_series_data(symbol):
    ftd_data = return_ftd_data_symbol(symbol)
    ftd_data = ftd_data.drop(columns={'SYMBOL'})

    fmp_data = get_FMP_historical_data(symbol)
    df1 = pd.merge(fmp_data,ftd_data, on='Date',how='outer')
    df1['QUANTITY_FAILS'] = df1['QUANTITY_FAILS'].fillna(0)
    df1['volume'] = df1['volume'].fillna(0)
    df1['unadjustedVolume'] = df1['unadjustedVolume'].fillna(0)
    df1['vwap'] = df1['vwap'].fillna(0)
    df2 = get_short_data_QUANDL(symbol)
    df = pd.merge(df1,df2,on='Date',how='outer')
    df = df.fillna(0)
    return df

In [9]:
## Import dataframe objects using Pickle 
# ftd_df = load_obj('../Resources/ftd_all_data.pkl')
# symbol_df = load_obj('../Resources/symbol_all_list.pkl')

## Import machine learning list 
## Import symbol list 
path = Path('../Resources/06_01_ML_symbol_success_list.pkl')
symbol_list = load_obj(path)
len(symbol_list)

770

In [10]:
symbol = 'AMC' 
fmp_data = get_FMP_historical_data(symbol)
fmp_data

Unnamed: 0_level_0,open,high,low,close,adjClose,volume,unadjustedVolume,change,changePercent,vwap,changeOverTime
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-01-03,27.415,28.13,26.42,26.52,26.52,26670653.0,26670653.0,-0.895,-3.265,27.02333,-0.03265
2022-01-04,26.67,26.67,24.64,25.49,25.49,33327577.0,33327577.0,-1.18,-4.424,25.6,-0.04424
2022-01-05,25.17,25.3,22.3618,22.75,22.75,44564219.0,44564219.0,-2.42,-9.615,23.4706,-0.09615
2022-01-06,22.96,23.77,20.8,22.46,22.46,51554688.0,51554688.0,-0.5,-2.178,22.34333,-0.02178
2022-01-07,23.61,24.3,22.44,22.99,22.99,49283273.0,49283273.0,-0.62,-2.626,23.24333,-0.02626
2022-01-10,22.42,22.8703,21.25,22.78,22.78,37582417.0,37582417.0,0.36,1.606,22.3001,0.01606
2022-01-11,22.4,23.75,22.09,22.79,22.79,35808007.0,35808007.0,0.39,1.741,22.87667,0.01741
2022-01-12,22.86,23.3598,22.06,22.72,22.72,27138920.0,27138920.0,-0.14,-0.612,22.71327,-0.00612
2022-01-13,22.65,23.15,20.525,20.66,20.66,40550856.0,40550856.0,-1.99,-8.786,21.445,-0.08786
2022-01-14,20.33,21.08,19.51,20.57,20.57,56216232.0,56216232.0,0.24,1.181,20.38667,0.01181


In [11]:
df = fmp_data.copy()

# df1['volume'] = df1['volume'].fillna(0)
# df1['unadjustedVolume'] = df1['unadjustedVolume'].fillna(0)
# df1['vwap'] = df1['vwap'].fillna(0)
df2 = get_short_data_QUANDL(symbol)
df = pd.merge(df,df2,on='Date',how='outer')
df = df.fillna(0)
df

Unnamed: 0_level_0,open,high,low,close,adjClose,volume,unadjustedVolume,change,changePercent,vwap,changeOverTime,ShortVolumeNSDQ,ShortExemptVolumeNSDQ,TotalVolumeNSDQ,ShortVolumeNYSE,ShortExemptVolumeNYSE,TotalVolumeNYSE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2022-01-03,27.415,28.13,26.42,26.52,26.52,26670653.0,26670653.0,-0.895,-3.265,27.02333,-0.03265,5292484.0,44459.0,9390062.0,2093490.0,2845.0,3746389.0
2022-01-04,26.67,26.67,24.64,25.49,25.49,33327577.0,33327577.0,-1.18,-4.424,25.6,-0.04424,7072977.0,63860.0,12463612.0,2602348.0,1103.0,5111742.0
2022-01-05,25.17,25.3,22.3618,22.75,22.75,44564219.0,44564219.0,-2.42,-9.615,23.4706,-0.09615,9577390.0,249837.0,16175676.0,3312263.0,93068.0,6513238.0
2022-01-06,22.96,23.77,20.8,22.46,22.46,51554688.0,51554688.0,-0.5,-2.178,22.34333,-0.02178,11492366.0,353673.0,19315710.0,3865361.0,121152.0,7915602.0
2022-01-07,23.61,24.3,22.44,22.99,22.99,49283273.0,49283273.0,-0.62,-2.626,23.24333,-0.02626,12018312.0,63014.0,18804410.0,4016683.0,3719.0,7362813.0
2022-01-10,22.42,22.8703,21.25,22.78,22.78,37582417.0,37582417.0,0.36,1.606,22.3001,0.01606,8221128.0,94324.0,13156446.0,2661041.0,9267.0,5302938.0
2022-01-11,22.4,23.75,22.09,22.79,22.79,35808007.0,35808007.0,0.39,1.741,22.87667,0.01741,7798679.0,12440.0,12826102.0,2560727.0,631.0,5112768.0
2022-01-12,22.86,23.3598,22.06,22.72,22.72,27138920.0,27138920.0,-0.14,-0.612,22.71327,-0.00612,5882322.0,54648.0,9477339.0,2045353.0,2361.0,3992401.0
2022-01-13,22.65,23.15,20.525,20.66,20.66,40550856.0,40550856.0,-1.99,-8.786,21.445,-0.08786,9243672.0,67940.0,14425582.0,2980229.0,4623.0,5231661.0
2022-01-14,20.33,21.08,19.51,20.57,20.57,56216232.0,56216232.0,0.24,1.181,20.38667,0.01181,12938269.0,82862.0,19696907.0,3633282.0,5485.0,7062274.0


In [15]:
## Import old dataframe, drop FTDs, join data and save 
path = Path('../FilesExport_Complete_DFs_TI_noShift/'+symbol+'_TI_DF_no_shift.pkl')
data = load_obj(path)
import_df = data[symbol].drop(columns='QUANTITY_FAILS')
import_df

Unnamed: 0_level_0,open,high,low,close,adjClose,volume,unadjustedVolume,change,changePercent,vwap,...,ShortVolumeNSDQ,ShortExemptVolumeNSDQ,TotalVolumeNSDQ,ShortVolumeNYSE,ShortExemptVolumeNYSE,TotalVolumeNYSE,bollinger_signal,dema_signal,adl_signal,rsi_signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-02-02,21.070000,21.190001,20.440001,20.850000,15.812827,224700.0,224700.0,-0.220,-1.044,20.82667,...,40735.0,0.0,60870.0,2186.0,0.0,15799.0,0.730563,0.602513,-0.883333,0.335557
2016-02-03,21.010000,21.150000,20.530001,21.030001,15.949338,147200.0,147200.0,0.020,0.095,20.90333,...,18085.0,0.0,38897.0,1465.0,0.0,1970.0,0.745308,0.623677,-0.883333,0.415610
2016-02-04,21.059999,21.780001,21.059999,21.610001,16.389210,253500.0,253500.0,0.550,2.612,21.48333,...,43055.0,0.0,54305.0,6432.0,0.0,63258.0,0.656836,0.690476,-0.900000,0.590394
2016-02-05,21.889999,21.980000,20.040001,20.270000,15.372950,412100.0,412100.0,-1.620,-7.401,20.76333,...,61798.0,0.0,80519.0,7440.0,0.0,10964.0,0.741957,0.590608,-0.650000,0.381588
2016-02-08,20.059999,20.780001,19.280001,20.250000,15.357783,333100.0,333100.0,0.190,0.947,20.10333,...,38585.0,0.0,72351.0,2665.0,0.0,4373.0,0.717158,0.525794,-0.100000,0.440961
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-27,28.350000,29.390000,27.010000,28.700000,28.700000,30717695.0,30717695.0,0.350,1.235,28.36667,...,6649591.0,98983.0,12127507.0,2184990.0,6127.0,4834408.0,0.905496,0.958995,0.600000,0.748499
2021-12-28,28.180000,29.740000,27.590000,27.720000,27.720000,31447233.0,31447233.0,-0.460,-1.632,28.35000,...,7937352.0,77168.0,13726265.0,2397892.0,8937.0,5222430.0,0.910858,0.953042,0.383333,0.701134
2021-12-29,27.745000,28.350000,26.650000,27.950000,27.950000,30894171.0,30894171.0,0.205,0.739,27.65000,...,6844497.0,58725.0,12268925.0,2436367.0,2984.0,4880260.0,0.855898,0.954365,0.200000,0.649099
2021-12-30,27.910000,30.190000,27.695000,28.940000,28.940000,35796504.0,35796504.0,1.030,3.690,28.94167,...,7822306.0,106996.0,14723214.0,2080364.0,20391.0,5653288.0,0.420912,0.962963,-0.041841,0.781855


In [16]:
updated_df = import_df.append(df)
#updated_df

Unnamed: 0_level_0,open,high,low,close,adjClose,volume,unadjustedVolume,change,changePercent,vwap,...,ShortVolumeNSDQ,ShortExemptVolumeNSDQ,TotalVolumeNSDQ,ShortVolumeNYSE,ShortExemptVolumeNYSE,TotalVolumeNYSE,bollinger_signal,dema_signal,adl_signal,rsi_signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-02-02,21.070000,21.190001,20.440001,20.850000,15.812827,224700.0,224700.0,-0.22,-1.044,20.82667,...,40735.0,0.0,60870.0,2186.0,0.0,15799.0,0.730563,0.602513,-0.883333,0.335557
2016-02-03,21.010000,21.150000,20.530001,21.030001,15.949338,147200.0,147200.0,0.02,0.095,20.90333,...,18085.0,0.0,38897.0,1465.0,0.0,1970.0,0.745308,0.623677,-0.883333,0.415610
2016-02-04,21.059999,21.780001,21.059999,21.610001,16.389210,253500.0,253500.0,0.55,2.612,21.48333,...,43055.0,0.0,54305.0,6432.0,0.0,63258.0,0.656836,0.690476,-0.900000,0.590394
2016-02-05,21.889999,21.980000,20.040001,20.270000,15.372950,412100.0,412100.0,-1.62,-7.401,20.76333,...,61798.0,0.0,80519.0,7440.0,0.0,10964.0,0.741957,0.590608,-0.650000,0.381588
2016-02-08,20.059999,20.780001,19.280001,20.250000,15.357783,333100.0,333100.0,0.19,0.947,20.10333,...,38585.0,0.0,72351.0,2665.0,0.0,4373.0,0.717158,0.525794,-0.100000,0.440961
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-01-25,15.890000,16.619400,15.550000,16.020000,16.020000,42220722.0,42220722.0,0.13,0.818,16.06313,...,8158731.0,176867.0,14548715.0,2597484.0,84667.0,5789849.0,,,,
2022-01-26,16.210000,18.155000,15.650000,15.940000,15.940000,76432209.0,76432209.0,-0.27,-1.666,16.58167,...,18838070.0,182242.0,29763994.0,5638109.0,16451.0,11711555.0,,,,
2022-01-27,16.110000,16.580000,14.395000,14.520000,14.520000,50074448.0,50074448.0,-1.59,-9.870,15.16500,...,12476790.0,153690.0,19039258.0,3704413.0,5565.0,6834055.0,,,,
2022-01-28,14.600000,15.250000,13.400000,15.060000,15.060000,53945344.0,53945344.0,0.46,3.151,14.57000,...,11830875.0,136735.0,19465415.0,4404829.0,3169.0,7848759.0,,,,


In [18]:
#updated_df.tail(30)