# Set-up

In [1]:
%pwd  
%cd /workspaces/image-classification-for-technical-indicators

/workspaces/image-classification-for-technical-indicators


# Introduction

In this script, we will walk through the entire project. This script will result in plots uploaded in an AWS s3 bucket. 

# Imports

In [2]:
from source import aws
from source import features
from source import plots

import json
import time

import boto3
import pandas as pd
import alpaca_trade_api

# Obtain Raw Data

The data consists of 8 years (1 January 2011 - 1 January 2019) of open, high, low, and close data for 505 tickers. All data comes from Alpaca Inc. (https://alpaca.markets/algotrading).

We connected to the Amazon AWS Secret Manager to obtain the Alpaca API log-in information. Then, we iterated through the list of S&P 500 firms in the data/list_of_SP_500.csv file and obtained the data from the Alpaca API for each. 

In [None]:
secret_name = "AlpacaAPI"
region_name = "us-east-2"

session = boto3.session.Session()
client = session.client(
    service_name='secretsmanager',
    region_name=region_name
)

get_secret_value_response = client.get_secret_value(SecretId=secret_name)
secret = json.loads(get_secret_value_response['SecretString'])

api_key = secret["api_key"]
api_secret = secret["api_secret"]
base_url = "https://api.alpaca.markets"
api = alpaca_trade_api.REST(api_key, api_secret, base_url, api_version='v2')
account = api.get_account()

In [None]:
SP_500_df = pd.read_csv('data/list_of_SP_500.csv', header=None, names=['symbol', 'name'])
SP_500 = SP_500_df["symbol"].tolist()

list_of_dataframes = []

start_dates = ['2011-01-01 09:30', '2014-01-02 09:30', '2017-01-02 09:30']
end_dates = ['2014-01-01 09:30', '2017-01-01 09:30', '2019-01-01 09:30']

for firm in SP_500:
    time.sleep(1) # ensures we don't hit any API limits   
    for idx in range(len(start_dates)):
        get_data = api.get_barset(symbols=firm, timeframe="day", start=pd.Timestamp(start_dates[idx],tz='America/New_York').isoformat(), \
            end=pd.Timestamp(end_dates[idx],tz='America/New_York').isoformat(), limit=1000).df.stack(level=0)
        list_of_dataframes.append(get_data)

df = pd.concat(list_of_dataframes)
df.index.names = ['time','firm']

In [None]:
df.info()

In [None]:
df.head()

# Add Features

We compute three technical indicators: (moving average convergence/divergence)[https://en.wikipedia.org/wiki/MACD] (MACD), (relative strength index)[https://en.wikipedia.org/wiki/Relative_strength_index] (RSI), and (Bollinger Bands)[https://en.wikipedia.org/wiki/Bollinger_Bands] (BB). The MACD is measured over 26 days, RSI over 27 days, and BB over 20 days. A buy signal is triggered when:

**MACD:** the MACD crosses above the MACD signal line.

**RSI:** the RSI crosses above 30. 

**BB:** the close value crosses below the lower band.

We obtained the technical indicator values, then created a column in the DataFrame to indicate when a buy signal was triggered. 

In [None]:
df['rsi'] = features.build_indicator(df, "rsi", 27)
df['rsi_buy'] = features.rsi_buy_indicator(df['rsi'])

df['bb_lower_band'] = features.build_indicator(df, "bbands", 20)['BBL_20_2.0']
df['bb_buy'] = features.bb_buy_indicator(bb_lower_band_col=df['bb_lower_band'], close_col=df['close'])

df['macd_signal'] = features.build_indicator(df, "macd", 26)['MACDs_12_26_9']
df['macd'] = features.build_indicator(df, "macd", 26)['MACD_12_26_9']
df['macd_buy'] = features.macd_buy_indicator(macd_signal_col=df['macd_signal'], macd_col=df['macd'])

In [3]:
df = pd.read_parquet('data/historical_data_added_features.gzip')

We checked to make sure that we had at least 20 buy indicators to sample from.

In [None]:
too_few_rsi = [(firm, data['rsi_buy'].sum()) for firm, data in df.groupby('firm') if (data['rsi_buy'].sum() < 20)]
too_few_bb = [(firm, data['bb_buy'].sum()) for firm, data in df.groupby('firm') if (data['bb_buy'].sum() < 20)]
too_few_macd = [(firm, data['macd_buy'].sum()) for firm, data in df.groupby('firm') if (data['macd_buy'].sum() < 20)]


In [None]:
print(f'{round(len(too_few_rsi) / len(df.groupby("firm")) * 100, 2)}% of firms have fewer than 20 RSI buy signals.')
print(f'{round(len(too_few_bb) / len(df.groupby("firm")) * 100, 2)}% of firms have fewer than 20 BB buy signals.')
print(f'{round(len(too_few_macd) / len(df.groupby("firm")) * 100, 2)}% of firms have fewer than 20 MACD buy signals.')

Because none of the firms have at least 20 RSI buy signals, we will exclude that technical indicator from all future analyses.  

# Create AWS s3 Bucket

We created an AWS s3 Bucket to send the data. 

In [None]:
bucket = "line-candle-ohlc-plot"

aws.create_bucket(bucket)

# Create Plots

We randomly sampled 20 buy signals and 20 non-buy signals per firm for the BB and MACD technical indicators. This resulted in [505 firms X 2 indicators X 40 (20 buy signals + 20 non-buy signals) = 40400] sampled signals. For each signal in our sample, we created two plots: an OHLC plot and a line graph. Thus, we made 80800 plots.

We created the plots for the technical indicators and save them locally. Next, we send the files to the AWS bucket and delete the local file.

In [4]:
# macd, buy
macd_buy_sampled_indices = df[df["macd_buy"] == 1.0].pipe(plots.check_signals,'firm','macd_buy',1.0,20).groupby('firm').sample(20, random_state=748574).index
df['macd_buy_sampled'] = [True if x in macd_buy_sampled_indices else False for x in df.index]        

plots.plot_sampled(df, 'macd_buy_sampled', 'macd', 'buy', 26, 'plots/', 'close', 'line')
plots.build_h2o_del_dir("plots/", 1, "data/macd_buy_line.parquet.gzip", True)

plots.plot_sampled(df, 'macd_buy_sampled', 'macd', 'buy', 26, 'plots/', 'close', 'candle')
plots.build_h2o_del_dir("plots/", 1, "data/macd_buy_candle.parquet.gzip", True)


ImportError: Missing optional dependency 'fastparquet'. fastparquet is required for parquet support. Use pip or conda to install fastparquet.

In [None]:
# macd, no buy
macd_nobuy_sampled_indices = df[df["macd_buy"] == 0.0].pipe(plots.check_signals,'firm','macd_buy',0.0,20).groupby('firm').sample(20, random_state=748574).index
df['macd_nobuy_sampled'] = [True if x in macd_nobuy_sampled_indices else False for x in df.index]

plots.plot_sampled(df, 'macd_nobuy_sampled', 'macd','nobuy',  26, 'plots/', 'close', 'line')
plots.build_h2o_del_dir("plots/", 0, "data/macd_nobuy_line.parquet.gzip", True)

plots.plot_sampled(df, 'macd_nobuy_sampled', 'macd', 'nobuy', 26, 'plots/','close', 'candle')
plots.build_h2o_del_dir("plots/", 0, "data/macd_nobuy_line.parquet.gzip", True)


In [None]:

# bb, buy
bb_buy_sampled_indices = df[df["bb_buy"] == 1.0].pipe(plots.check_signals,'firm','bb_buy',1.0,20).groupby('firm').sample(20, random_state=9224279).index
df['bb_buy_sampled'] = [True if x in bb_buy_sampled_indices else False for x in df.index]  

plots.plot_sampled(df, 'bb_buy_sampled', 'bb', 'buy', 20, 'plots/', 'close', 'line')
plots.build_h2o_del_dir("plots/", 1, "data/bb_buy_line.parquet.gzip", True)

plots.plot_sampled(df, 'bb_buy_sampled', 'bb', 'buy', 20, 'plots/', 'close', 'candle')
plots.build_h2o_del_dir("plots/", 1, "data/bb_buy_candle.parquet.gzip", True)


In [None]:
bb_nobuy_sampled_indices = df[df["bb_buy"] == 0.0].pipe(plots.check_signals,'firm','bb_buy',0.0,20).groupby('firm').sample(20, random_state=9224279).index
df['bb_nobuy_sampled'] = [True if x in bb_nobuy_sampled_indices else False for x in df.index]

plots.plot_sampled(df, 'bb_nobuy_sampled', 'bb', 'nobuy', 20, 'plots/', 'close', 'line')
plots.build_h2o_del_dir("plots/", 0, "data/bb_nobuy_line.parquet.gzip", True)

plots.plot_sampled(df, 'bb_nobuy_sampled', 'bb', 'nobuy', 20, 'plots/', 'close', 'candle')
plots.build_h2o_del_dir("plots/", 0, "data/bb_nobuy_candle.parquet.gzip", True)
