In [3]:
smallcap_tickers = pd.read_csv("small_cap_volume.csv")
smallcap_tickers.head()

Unnamed: 0,Symbol,Name,Last,Change,%Chg,High,Low,Volume,Time
0,HTZ,Hertz Global Holdings Inc,2.84,-0.23,-7.49%,3.04,2.8,25626701.0,5/22/20
1,SAVE,Spirit Airlines Inc,10.17,0.1,0.99%,10.47,9.86,17173000.0,5/22/20
2,BBBY,Bed Bath & Beyond,6.7,-0.1,-1.47%,7.13,6.57,12453100.0,5/22/20
3,KDMN,Kadmon Holdings Llc,4.66,0.11,2.42%,4.78,4.5,11744300.0,5/22/20
4,DVAX,Dynavax Technologies,5.13,-0.53,-9.36%,5.71,5.0,11450400.0,5/22/20


In [3]:
#!/usr/pythonq
import pandas as pd
import os
import pandas_datareader.data as web
import pandas_datareader as pdr
from datetime import datetime
import matplotlib
import yfinance as yf
from datetime import timedelta
import numpy as np
yf.pdr_override()
ntickers = pd.read_csv("nasdaq_ticker_info.txt", sep="\t")
top_tickers = pd.read_csv("nasdaq_volume_leaders.csv")
midcap_tickers = pd.read_csv("mid_cap_volume_leaders.csv")
largecap_tickers = pd.read_csv("large_cap_volume_leaders.csv")
smallcap_tickers = pd.read_csv("small_cap_volume.csv")
# allsymbols = [i for i in set([x for x in largecap_tickers.Symbol] + [y for y in top_tickers.Symbol])]
allsymbols = [i for i in smallcap_tickers.Symbol]

def get_normalize_close(ticker, start, end):
    df = yf.download(ticker, auto_adjust = True, start = start, end = end)
    df[ticker] = df['Close']/df['Close'][0]
    return df[[ticker]]

def get_return(mdf, symbol, start, end):
    mdf = yf.download(symbol, start=start, end=end, progress=False)
    df = pd.DataFrame(mdf['Close'])
    dfout = df.loc[(df.index >= pd.to_datetime(start)) & (df.index <= pd.to_datetime(end))].dropna()
    returnval = (dfout['Close'][-1]-dfout['Close'][0])/dfout['Close'][0]
    return returnval

def add_days(date, ndays=5):
    return str(pd.to_datetime(date) + pd.tseries.offsets.BusinessDay(ndays)).split(" ")[0]


def get_combined_return(allreturns):
    return np.mean([np.mean(x) for x in [[i for i in j[1] if str(i) != 'nan'] for j in allreturns]])

def open_close_time(teststart):
    openstart, closestart = pd.to_datetime(str(teststart).split(" ")[0] + ' 13:30:00'), pd.to_datetime(str(teststart).split(" ")[0] + ' 20:00:00')
    if pd.to_datetime(teststart) > pd.to_datetime(openstart) and pd.to_datetime(teststart) < pd.to_datetime(closestart):
        return "midday"
    elif pd.to_datetime(teststart) > pd.to_datetime(closestart):
        return 'aftermarket'
    else:
        return 'premarket'

def next_date(date):
    return pd.to_datetime(date) + pd.tseries.offsets.BusinessDay(1)

def get_adj_date(release_type, date):
    if release_type == 'midday' or release_type == 'aftermarket':
        return next_date(date)
    else:
        return date
    
def get_up_ratings(symbol, start, end):
    dft = pd.DataFrame(index=pd.bdate_range(start, end))
    ticker = yf.Ticker(symbol)
    try:
        openstart = str(start) + ' 13:30:00'
        closestart = str(start) + ' 20:00:00'
        recs = ticker.recommendations
        recs.loc[(recs.index >= start) & (recs.index <= end)]
        recs['date'] = recs.copy().index.astype('str')
        recs['timing'] = recs.apply(lambda x: open_close_time(x['date']), axis=1)
        recs['adj_date'] = recs.apply(lambda x: get_adj_date(x['timing'], x['date']), axis=1)
        return [str(i).split(' ')[0] for i in recs[recs['Action'] == "up"].adj_date]
    except:
        return None

def get_ratings_returns(mdf, symbol, start, end, ndays):  
    up_days = get_up_ratings(symbol, start, end)
    returns = [get_return(mdf, symbol, day, add_days(day, ndays)) for day in up_days]
    print("stock: " + symbol + " return: " + str(np.mean(returns)) + " std: " + str(np.std(returns)))
    return returns
    
def get_returns_recs(mdf, symbols, startdate, enddate, ndays):
    allreturns = []
    for symbol in symbols:
        try:
            print("running for: " + str(symbol))
            returns = get_ratings_returns(mdf, symbol, startdate, enddate, ndays)
            allreturns.append([symbol, returns])
        except:
            print("failed for: " + str(symbol))
    return allreturns

# startdate = sys.argsv[1]
# enddate = sys.argsv[2]
# ndays = sys.argsv[3]


In [None]:
rated_stocks = []
startdate = '2019-01-01'
enddate = '2020-01-30'
symbols = allsymbols[:100]
for symbol in symbols:
    if get_up_ratings(symbol, startdate, enddate):
        rated_stocks.append(symbol)
mdf = yf.download(symbols, start=startdate, end=enddate)['Close']

symbols = rated_stocks

for ndays in [20, 30, 50, 100]:
    starttime = datetime.now()
    allreturns = get_returns_recs(mdf, symbols, startdate, enddate, ndays)
    combined_return = get_combined_return(allreturns)
    print("combined return for " + str(ndays) + " day hold: " + str(combined_return))
    print("all return length: " + str(len(allreturns)))
    outdf = pd.DataFrame.from_dict({'symbol': [i[0] for i in allreturns], 'returns': [i[1] for i in allreturns]}, orient='columns')
    outdf.to_csv("out_allreturns_smallcap_" + str(ndays) + "days_first100_2019-01-01_2_2019-04-03_may25.txt", sep="\t")
    endtime = datetime.now()
    print("this took: " + str(endtime-starttime) + " for " + str(ndays) + " days!")
    
rated_stocks = []
startdate = '2019-01-01'
enddate = '2019-04-30'
symbols = allsymbols[101:200]
for symbol in symbols:
    if get_up_ratings(symbol, startdate, enddate):
        rated_stocks.append(symbol)
mdf = yf.download(symbols, start=startdate, end=enddate)['Close']

symbols = rated_stocks

for ndays in [20, 30, 50, 100]:
    starttime = datetime.now()
    allreturns = get_returns_recs(mdf, symbols, startdate, enddate, ndays)
    combined_return = get_combined_return(allreturns)
    print("combined return for " + str(ndays) + " day hold: " + str(combined_return))
    print("all return length: " + str(len(allreturns)))
    outdf = pd.DataFrame.from_dict({'symbol': [i[0] for i in allreturns], 'returns': [i[1] for i in allreturns]}, orient='columns')
    outdf.to_csv("out_allreturns_smallcap_" + str(ndays) + "days_100to200stocks_2019-01-01_2_2019-04-03_may25.txt", sep="\t")
    endtime = datetime.now()
    print("this took: " + str(endtime-starttime) + " for " + str(ndays) + " days!")

rated_stocks = []
startdate = '2019-01-01'
enddate = '2019-04-30'
symbols = allsymbols[201:300]
for symbol in symbols:
    if get_up_ratings(symbol, startdate, enddate):
        rated_stocks.append(symbol)
mdf = yf.download(symbols, start=startdate, end=enddate)['Close']

symbols = rated_stocks

for ndays in [20, 30, 50, 100]:
    starttime = datetime.now()
    allreturns = get_returns_recs(mdf, symbols, startdate, enddate, ndays)
    combined_return = get_combined_return(allreturns)
    print("combined return for " + str(ndays) + " day hold: " + str(combined_return))
    print("all return length: " + str(len(allreturns)))
    outdf = pd.DataFrame.from_dict({'symbol': [i[0] for i in allreturns], 'returns': [i[1] for i in allreturns]}, orient='columns')
    outdf.to_csv("out_allreturns_smallcap_" + str(ndays) + "days_200to300stocks_2019-01-01_2_2019-04-03_may25.txt", sep="\t")
    endtime = datetime.now()
    print("this took: " + str(endtime-starttime) + " for " + str(ndays) + " days!")

[*********************100%***********************]  100 of 100 completed

1 Failed download:
- GAN: Data doesn't exist for startDate = 1546329600, endDate = 1580371200
running for: SAVE
stock: SAVE return: -0.00780574436398953 std: 0.07737823909849947
running for: BBBY
stock: BBBY return: 0.017859759455806753 std: 0.12074184663226732
running for: KDMN
stock: KDMN return: -0.014285700905079517 std: 0.0
running for: CLVS
stock: CLVS return: 0.026477298582897702 std: 0.10387414901370717
running for: INSG
stock: INSG return: 0.05434783805858864 std: 0.0
running for: IVR
stock: IVR return: 0.008175353455407399 std: 0.048783478163430455
running for: ATRA
stock: ATRA return: 0.5257693907791129 std: 0.5359314504633725
running for: AR
stock: AR return: -0.027337391955430158 std: 0.09780533557182573
running for: NAT
stock: NAT return: -0.03917630380424263 std: 0.0498250472238621
running for: RWT
stock: RWT return: 0.004058085323037648 std: 0.08005843555260819
running for: HOME

1 Failed download