In [1]:
import talib
import numpy
import requests
import json
import pandas as pd


from string import Template

In [2]:
#Indicators we don't want
excluded_indicators = ['MAVP', 'COS', 'COSH', 'ACOS', 'SIN', 'SINH', 'ASIN', 'TAN', 'TANH', 'ATAN']

#Assuming we're using AlphaVantage
api_key = 'REDACTED'

In [3]:
#Returns a dataframe populated with OHLC data for the passed symbol

def api_req(symb):
    cleaned_symb = symb.strip()
    URL = 'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol='+cleaned_symb+'&outputsize=full&apikey='+api_key
    api_result = requests.get(url = URL)
    api_res_dict = api_result.json()

    api_res_df = pd.DataFrame(api_res_dict['Time Series (Daily)'])
    api_res_df = api_res_df.T
    api_res_df.rename(columns={'1. open':'open', '2. high':'high', '3. low':'low', '4. close':'close', '5. volume':'volume'}, inplace=True)
    
    return api_res_df

In [4]:
#Helper function to make our goal column

def percent_change_to_binary(pc):
    if(pc['percent_change'] > 0):
        return 1
    else:
        return 0

In [5]:
#Returns the fully constructed dataframe, ready for the AI

def get_symbol_data_df(symb):
    
    #Make request and convert into a dataframe of floats
    data_df = api_req(symb).astype('f8')
    
    data_df = calculate_and_append_indicators(data_df)
        
    #Construct goal column for training, and price/percent change column for profit tracking
    data_df['percent_change'] = data_df['close'].pct_change().shift(periods=-1)
    data_df['goal'] = data_df.apply(percent_change_to_binary, axis=1)
    
    #We shifted to align the goals with the NEXT percent change, not the previous, so now our last row has a NaN value
    #Trim that NaN value
    data_df = data_df[0:-1]
    
    #Find the last row with a NaN value
    highest_index_with_nan = ""
    for index, row in data_df.iterrows():
        if row.isna().any():
            highest_index_with_nan = index
     
    #Should cut only non-NaN values
    data_df = data_df.loc[highest_index_with_nan:]
    
    #Cut off the first value since our slice is inclusive
    data_df = data_df[1:]
    
    return data_df

In [6]:
def calculate_and_append_indicators(ohlc_df):
    
    #Get the indic functions we're using
    indic_function_name_list = talib.get_functions()
    
    #Exclude functions in the exclude list
    indic_function_name_list = list(filter(lambda x: x not in excluded_indicators, indic_function_name_list))  
    
    #Iterate through the functions
    for indic_name in indic_function_name_list:
    
        #print('Working on', indic_name)
        
        #Get the actual function object for the indic
        indic_function = getattr(talib.abstract, indic_name)
    
        #Calculate the indicator dataframe
        indic_res = indic_function(ohlc_df, periods=10)
    
        #Handle series
        if isinstance(indic_res, pd.Series):
            indic_res = indic_res.to_frame(indic_name)
    
        #Append the indicator dataframe to the main dataframe
        ohlc_df = ohlc_df.join(indic_res, rsuffix="_2")
        
    return ohlc_df

In [7]:
#with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
#    print()

In [8]:
#get_symbol_data_df('A')

Unnamed: 0,open,high,low,close,volume,HT_DCPERIOD,HT_DCPHASE,inphase,quadrature,sine,...,MININDEX,min,max,minidx,maxidx,MULT,SUB,SUM,percent_change,goal
2000-03-27,118.20,119.500,113.200,114.30,605700.0,18.900203,287.981985,-7.873095,11.379390,-0.951154,...,59,76.44,159.00,59,73,13527.40000,6.300,3427.77,0.041995,1
2000-03-28,114.00,121.900,113.500,119.10,996100.0,18.420422,-38.556386,-5.502787,13.222156,-0.623285,...,61,81.00,159.00,61,73,13835.65000,8.400,3470.43,-0.059614,0
2000-03-29,119.00,121.000,110.000,112.00,894400.0,18.018728,-16.970237,0.001757,9.562171,-0.291875,...,61,81.00,159.00,61,73,13310.00000,11.000,3500.55,-0.062500,0
2000-03-30,108.20,112.500,99.130,105.00,1455100.0,17.722540,21.449580,1.725736,3.598824,0.365682,...,64,91.50,159.00,64,73,11152.12500,13.370,3524.55,-0.009524,0
2000-03-31,106.00,106.000,90.000,104.00,2670200.0,17.521326,32.533814,2.526425,-2.271657,0.537797,...,64,91.50,159.00,64,73,9540.00000,16.000,3531.55,-0.057692,0
2000-04-03,103.00,103.300,91.750,98.00,1452700.0,17.392408,37.726974,-0.650567,-7.179562,0.611899,...,64,91.50,159.00,64,73,9477.77500,11.550,3535.80,-0.045918,0
2000-04-04,98.00,102.400,82.000,93.50,2517800.0,17.309565,27.464326,-2.573364,-6.113022,0.461196,...,94,93.50,159.00,94,73,8396.80000,20.400,3537.80,0.036791,1
2000-04-05,92.75,99.880,90.130,96.94,1508700.0,17.252584,4.069758,-4.197964,-10.803592,0.070971,...,94,93.50,159.00,94,73,9002.18440,9.750,3535.74,0.083144,1
2000-04-06,98.38,105.000,97.060,105.00,983600.0,17.207267,-9.553371,-10.343984,-10.124633,-0.165966,...,94,93.50,159.00,94,73,10191.30000,7.940,3533.94,0.161905,1
2000-04-07,107.50,125.000,107.100,122.00,2128200.0,17.175120,-7.576260,-11.823903,-2.822432,-0.131846,...,94,93.50,159.00,94,73,13387.50000,17.900,3547.84,-0.115574,0
