In [1]:
import alpaca_trade_api as tradeapi
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import pacf
from statsmodels.tsa.arima_model import ARIMA
from pandas.plotting import autocorrelation_plot
from sklearn import metrics

%matplotlib inline

In [2]:
# Import the live NASDAQ stock list from nasdaq.com
nasdaq_url = 'https://old.nasdaq.com/screening/companies-by-name.aspx?letter=0&exchange=nasdaq&render=download'
nasdaq_df = pd.read_csv(nasdaq_url)

print('\nThere are {} stocks in the NASDAQ'.format(len(nasdaq_df)))
nasdaq_df.head()


There are 3537 stocks in the NASDAQ


Unnamed: 0,Symbol,Name,LastSale,MarketCap,IPOyear,Sector,industry,Summary Quote,Unnamed: 8
0,TXG,"10x Genomics, Inc.",67.85,$6.52B,2019.0,Capital Goods,Biotechnology: Laboratory Analytical Instruments,https://old.nasdaq.com/symbol/txg,
1,YI,"111, Inc.",5.2,$424.63M,2018.0,Health Care,Medical/Nursing Services,https://old.nasdaq.com/symbol/yi,
2,PIH,"1347 Property Insurance Holdings, Inc.",4.62,$27.81M,2014.0,Finance,Property-Casualty Insurers,https://old.nasdaq.com/symbol/pih,
3,PIHPP,"1347 Property Insurance Holdings, Inc.",25.6981,$17.99M,,Finance,Property-Casualty Insurers,https://old.nasdaq.com/symbol/pihpp,
4,TURN,180 Degree Capital Corp.,2.165,$67.38M,,Finance,Finance/Investors Services,https://old.nasdaq.com/symbol/turn,


In [3]:
# Import the live NYSE stock list from nasdaq.com
nyse_url = 'https://old.nasdaq.com/screening/companies-by-name.aspx?letter=0&exchange=nyse&render=download'
nyse_df = pd.read_csv(nyse_url)

print('There are {} stocks in the NYSE'.format(len(nyse_df)))
nyse_df.head()

There are 3133 stocks in the NYSE


Unnamed: 0,Symbol,Name,LastSale,MarketCap,IPOyear,Sector,industry,Summary Quote,Unnamed: 8
0,DDD,3D Systems Corporation,8.99,$1.06B,,Technology,Computer Software: Prepackaged Software,https://old.nasdaq.com/symbol/ddd,
1,MMM,3M Company,171.88,$98.84B,,Health Care,Medical/Dental Instruments,https://old.nasdaq.com/symbol/mmm,
2,WBAI,500.com Limited,8.9,$382.48M,2013.0,Consumer Services,Services-Misc. Amusement & Recreation,https://old.nasdaq.com/symbol/wbai,
3,WUBA,58.com Inc.,51.55,$7.66B,2013.0,Technology,"Computer Software: Programming, Data Processing",https://old.nasdaq.com/symbol/wuba,
4,EGHT,8x8 Inc,20.69,$2.07B,,Technology,EDP Services,https://old.nasdaq.com/symbol/eght,


In [4]:
# Concat both the NYSE and NASDAQ stocks 
col = ['Symbol', 'Name', 'MarketCap', 'Sector', 'industry']
df = pd.concat([nasdaq_df[col], nyse_df[col]])
df.shape

(6670, 5)

In [26]:
api = ['PK5ZLGMKZ4LUIPPTQREM', 'mPGGruw0J5TzflIiWgndeBNh6wVUmFVYnjdH847r', 
       'https://paper-api.alpaca.markets']

api = tradeapi.REST(api[0], api[1], api[2])

# Daily OHLCV dataframe for Ferrari
race_daily = api.polygon.historic_agg('day', 'RACE', limit=250).df

race_daily.head()

Unnamed: 0_level_0,open,high,low,close,volume
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-11-19 00:00:00-05:00,110.5,110.5397,108.64,108.82,358289
2018-11-20 00:00:00-05:00,105.76,107.04,104.97,105.24,630640
2018-11-21 00:00:00-05:00,105.4,107.79,105.38,106.73,294077
2018-11-23 00:00:00-05:00,106.71,107.66,106.66,106.66,180802
2018-11-26 00:00:00-05:00,107.99,109.39,107.79,109.22,301700


In [27]:
def add_features(dataframe):
    """Add additional features to the OHLCV dataframes
    Parameters: (dataframe) to add features to
    Return 20, 50, 200 moving averages; bollinger bands; 
    and average volume"""

    df = dataframe
    # Create features for 20, 50, 200 moving averages
    df['20MA'] = df['close'].rolling(window=20).mean()
    df['50MA'] = df['close'].rolling(window=50).mean()
    df['200MA'] = df['close'].rolling(window=200).mean()

    # Create features for upper and lower bollinger bands
    df['20_day_std'] = df['close'].rolling(window=20).std()
    df['upper_band'] = df['20MA'] + (df['20_day_std']*2)
    df['lower_band'] = df['20MA'] - (df['20_day_std']*2)
    df['width_%'] = ((df['upper_band']-
                              df['lower_band'])/df['20MA']) * 100

    # Create feature for average volume
    df['avg_volume'] = df['volume'].rolling(window=20).mean()
    
    return df

In [28]:
race_daily = add_features(race_daily)
if race_daily['20MA'][-1] > race_daily['50MA'][-1] > race_daily['200MA'][-1]:
    if race_daily['width_%'][-1] < 6:
        print('RACE')
    else:
        print('nope large %')
else:
    print('nope Bearish trend')

nope large %


In [29]:
race_daily.tail()

Unnamed: 0_level_0,open,high,low,close,volume,20MA,50MA,200MA,20_day_std,upper_band,lower_band,width_%,avg_volume
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2019-11-11 00:00:00-05:00,163.8,164.74,163.54,164.4,321036,159.6315,155.7118,147.41075,4.802755,169.23701,150.02599,12.034605,430303.75
2019-11-12 00:00:00-05:00,164.71,165.31,163.59,165.02,260996,160.0775,155.852,147.6831,4.87123,169.81996,150.33504,12.172179,433389.55
2019-11-13 00:00:00-05:00,163.99,164.78,163.39,163.86,323041,160.277,155.9294,147.93895,4.943454,170.163908,150.390092,12.337276,428652.95
2019-11-14 00:00:00-05:00,163.95,165.89,163.62,165.47,310300,160.67,156.0012,148.1348,5.031911,170.733822,150.606178,12.527319,425799.7
2019-11-15 00:00:00-05:00,166.91,167.5,166.61,167.0,223370,161.2275,156.079,148.33415,5.08715,171.4018,151.0532,12.621047,420893.35


## Alpaca
Very bad servers constant error 500. Not very reliable

In [23]:
def stock_scan(df, api, timeframe='day'):
    
    """
    Parameters: 
    dataframe: Pandas dataframe of stock tickers
    api: api key info to scrape live data
    timeframe: daily, hourly, 5 min, 1 min etc. 
    
    Returns a generated list of tickers that meet the following:

    Bullish Trend: 20 MA > 50 MA > 200 MA
    Bollinger Band Squeeze: Width of bollinger bands < 6 percent
    """
    
    # list of stocks
    stock_list = []
    
    # Log into API
    api = tradeapi.REST(api[0], api[1], api[2])
    
    # Generate timeframe OHLCV datasets for each ticker
    for x in df.iloc[:,0]:
        tmp_df = api.polygon.historic_agg(timeframe, x, limit=250).df
        
        # Add additional features
        tmp_df = add_features(tmp_df)

        # Append to stock list if meets parameters
        if tmp_df['20MA'][-1] > tmp_df['50MA'][-1] > tmp_df['200MA'][-1]:
            if tmp_df['width_%'][-1] < 6:
                stock_list.append(x)
                print(x)
            else:
                print('nope large %')
        else:
            print('nope Bearish trend')

    return stock_list

In [24]:
api = ['PK5ZLGMKZ4LUIPPTQREM', 'mPGGruw0J5TzflIiWgndeBNh6wVUmFVYnjdH847r', 
       'https://paper-api.alpaca.markets']

watch_list = stock_scan(df, api)
print(watch_list)

nope Bearish trend
nope Bearish trend
nope Bearish trend


HTTPError: 500 Server Error: Internal Server Error for url: https://api.polygon.io/v1/historic/agg/day/PIHPP?limit=250&apiKey=PK5ZLGMKZ4LUIPPTQREM

## Alphavantage
Issues after 5-8 stocks, either with the generate features function or server timeouts

In [35]:
def stock_scan(df):
    
    """
    Parameters: 
    dataframe: Pandas dataframe of stock tickers
    
    Returns a generated list of tickers that meet the following:

    Bullish Trend: 20 MA > 50 MA > 200 MA
    Bollinger Band Squeeze: Width of bollinger bands < 6 percent
    """
    
    # list of stocks
    stock_list = []
    
    # Generate timeframe OHLCV datasets for each ticker
    for x in df.iloc[:,0]:
        url = 'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={}&datatype=csv&apikey=B6RB0B3JW7VDR8B4'.format(x)
        tmp_df = pd.read_csv(url)
        
        # Add additional features
        tmp_df = add_features(tmp_df)

        # Append to stock list if meets parameters
        if tmp_df['20MA'].iloc[-1] > tmp_df['50MA'].iloc[-1] > tmp_df['200MA'].iloc[-1]:
            if tmp_df['width_%'].iloc[-1] < 6:
                stock_list.append(x)
                print(x)
            else:
                print('nope large % {}'.format(x))
        else:
            print('nope Bearish trend {}'.format(x))

    return stock_list

In [36]:
watch_list = stock_scan(df)
print(watch_list)

nope Bearish trend TXG
nope Bearish trend YI
nope Bearish trend PIH
nope Bearish trend PIHPP
nope Bearish trend TURN


KeyError: 'close'

In [38]:
df.head(10)

Unnamed: 0,Symbol,Name,MarketCap,Sector,industry
0,TXG,"10x Genomics, Inc.",$6.52B,Capital Goods,Biotechnology: Laboratory Analytical Instruments
1,YI,"111, Inc.",$424.63M,Health Care,Medical/Nursing Services
2,PIH,"1347 Property Insurance Holdings, Inc.",$27.81M,Finance,Property-Casualty Insurers
3,PIHPP,"1347 Property Insurance Holdings, Inc.",$17.99M,Finance,Property-Casualty Insurers
4,TURN,180 Degree Capital Corp.,$67.38M,Finance,Finance/Investors Services
5,FLWS,"1-800 FLOWERS.COM, Inc.",$826.76M,Consumer Services,Other Specialty Stores
6,BCOW,"1895 Bancorp of Wisconsin, Inc.",$48.33M,Finance,Banks
7,FCCY,1st Constitution Bancorp (NJ),$168.18M,Finance,Savings Institutions
8,SRCE,1st Source Corporation,$1.31B,Finance,Major Banks
9,VNET,"21Vianet Group, Inc.",$907.01M,Technology,"Computer Software: Programming, Data Processing"


## Tiingo
I believe this is the best option of the three. Reliable servers. Am able to download entire maret data as much as I need for 10USD a month. Onl issue is the Unbound Local Error in the function below. I do not know how to fix this error.

In [39]:
api = '73d3a6aeca232c1374f0c610b84ad43e5700afd1'

In [47]:
import os
import pandas_datareader as pdr
x = 'RACE'
df1 = pdr.get_data_tiingo(x, api_key=api, )
df1.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,adjClose,adjHigh,adjLow,adjOpen,adjVolume,close,divCash,high,low,open,splitFactor,volume
symbol,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
RACE,2015-10-21 00:00:00+00:00,53.083762,58.845763,53.083762,57.909558,22498775,55.0,0.0,60.97,55.0,60.0,1.0,22498775
RACE,2015-10-22 00:00:00+00:00,54.772791,56.172272,53.759373,55.081642,4545111,56.75,0.0,58.2,55.7,57.07,1.0,4545111
RACE,2015-10-23 00:00:00+00:00,54.415682,55.97924,54.309514,55.757253,1967749,56.38,0.0,58.0,56.27,57.77,1.0,1967749
RACE,2015-10-26 00:00:00+00:00,53.103065,55.014081,52.634963,55.014081,1466277,55.02,0.0,57.0,54.535,57.0,1.0,1466277
RACE,2015-10-27 00:00:00+00:00,51.973829,53.07411,47.640263,52.89073,5949211,53.85,0.0,54.99,49.36,54.8,1.0,5949211


In [54]:
def stock_scan(df, api):
    
    """
    Parameters: 
    dataframe: Pandas dataframe of stock tickers
    
    Returns a generated list of tickers that meet the following:

    Bullish Trend: 20 MA > 50 MA > 200 MA
    Bollinger Band Squeeze: Width of bollinger bands < 6 percent
    """
    
    # list of stocks
    stock_list = []
    
    # Generate timeframe OHLCV datasets for each ticker
    for x in df.iloc[:,0]:
        temp_df = pdr.get_data_tiingo(x, api_key=api)
        
        # Add additional features
        tmp_df = add_features(tmp_df)

        # Append to stock list if meets parameters
        if tmp_df['20MA'].iloc[-1] > tmp_df['50MA'].iloc[-1] > tmp_df['200MA'].iloc[-1]:
            if tmp_df['width_%'].iloc[-1] < 6:
                stock_list.append(x)
                print(x)
            else:
                print('nope large % {}'.format(x))
        else:
            print('nope Bearish trend {}'.format(x))

    return stock_list

In [55]:
watch_list = stock_scan(df, api)
print(watch_list)

UnboundLocalError: local variable 'tmp_df' referenced before assignment

In [53]:
x = ['RACE', 'AMZN', 'AAPL']
for y in x:
    temp_df = pdr.get_data_tiingo(y, api_key=api)
    print(temp_df.head())

                                   adjClose    adjHigh     adjLow    adjOpen  \
symbol date                                                                    
RACE   2015-10-21 00:00:00+00:00  53.083762  58.845763  53.083762  57.909558   
       2015-10-22 00:00:00+00:00  54.772791  56.172272  53.759373  55.081642   
       2015-10-23 00:00:00+00:00  54.415682  55.979240  54.309514  55.757253   
       2015-10-26 00:00:00+00:00  53.103065  55.014081  52.634963  55.014081   
       2015-10-27 00:00:00+00:00  51.973829  53.074110  47.640263  52.890730   

                                  adjVolume  close  divCash   high     low  \
symbol date                                                                  
RACE   2015-10-21 00:00:00+00:00   22498775  55.00      0.0  60.97  55.000   
       2015-10-22 00:00:00+00:00    4545111  56.75      0.0  58.20  55.700   
       2015-10-23 00:00:00+00:00    1967749  56.38      0.0  58.00  56.270   
       2015-10-26 00:00:00+00:00    1466277  55.0

# Notes  Issues
### 1
The functions within the scan works. The main issue is the server timing out error. The highest number of tickers i could run through was 9 before the error, so it seems like it is a server dropping issue. Below is the documentation for ALPACA

ALPACA = https://github.com/alpacahq/alpaca-trade-api-python

**Solution idea: is there a way for me to reset the function maybe a nested try and except function before line 24 when the for loop starts? 
**
Solution Ideas: I may need to buy a server subscription to pull 7k stock info at once. 

### 2
Tiingo function having issues. Out of the three potential APIs to use, Tiingo is looking like the best / most reliable. Just need to fix the unbound errors