In [None]:
# default_exp priceHist

# priceHist

> Parse historical stock price data files.

In [None]:
#hide
%load_ext autoreload
%autoreload 2
from nbdev import show_doc

In [None]:
#export

import datetime
import numpy as np
import os
import pandas as pd

from secscan import utils

USExchanges = ['AMEX','NASDAQ','NYSE','OTCBB']

Uses the historical price data from eoddata.com -
this comes as one CSV file per exchange per day, sample:

```
Symbol,Date,Open,High,Low,Close,Volume
AACG,08-Jan-2020,1.41,1.58,1.3642,1.5112,90800
AAL,08-Jan-2020,27.1,28.09,27.07,27.84,10497200
...
```


In [None]:
#export

def getHistFStuff(exch,dateStr) :
    """
    Returns (fName, fDir, fPath) containing the CSV file of historical prices
    for the given exchange and date.
    """
    fName = exch+'_'+dateStr+'.csv'
    histFDir = os.path.join(utils.stockPriceRoot,fName[:len(exch)+5])
    histFPath = os.path.join(histFDir,fName)
    return fName,histFDir,histFPath

In [None]:
# test getHistFStuff:

t = getHistFStuff('NASDAQ','20240510')
assert (t[0]=='NASDAQ_20240510.csv'
        and t[1]==os.path.join(utils.stockPriceRoot,'NASDAQ_2024')
        and t[2]==os.path.join(utils.stockPriceRoot,'NASDAQ_2024',t[0]))

In [None]:
#export

def getDayMap(dateStr, exch, symCol='Symbol', priceCol='Close') :
    """
    Parse the CSV file for a single day and exchange,
    returning a dict:<stock symbol> -> <value>
    """
    fPath = getHistFStuff(exch,dateStr)[2]
    if not os.path.exists(fPath) :
        return {}
    df = pd.read_csv(fPath, na_filter=False)
    dayMap = {}
    for sym,val in zip(df[symCol],df[priceCol]) :
        if isinstance(sym,str) :
            dayMap[sym] = val
        else :
            print(dateStr,exch,'non-string symbol',repr(sym))
    return dayMap

In [None]:
# test getDayMap:

utils.setStockPriceRoot('testdata')

m = getDayMap('20200108','AMEX')
assert (len(m)==2024
        and min(m.keys())=='AAAU' and m['AAAU']==15.56
        and max(m.keys())=='ZSL' and m['ZSL']==26.04)

m = getDayMap('20200108','NASDAQ')
assert (len(m)==3233
        and min(m.keys())=='AACG' and m['AACG']==1.5112
        and max(m.keys())=='ZYXI' and m['ZYXI']==8.47)

In [None]:
#export

@utils.delegates(getDayMap)
def getCombDayMap(dateStr, exchs=USExchanges, checkDups=True, **kwargs) :
    """
    Combines the day maps for a list of exchanges, optionally checking for symbols
    duplicated between exchanges.
    """
    combDayMap = {}
    for exch in exchs :
        m = getDayMap(dateStr,exch,**kwargs)
        if checkDups :
            dupKeys = (set(m.keys()) & combDayMap.keys())
            if len(dupKeys) > 0 :
                print('duplicated keys:',exch,dateStr,sorted(dupKeys)[:10])
        combDayMap.update(m)
    return combDayMap

In [None]:
# test getCombDayMap:

m = getCombDayMap('20200108', exchs=['AMEX','NASDAQ'])
assert len(m)==2024+3233

In [None]:
#export

@utils.delegates(getCombDayMap)
def getCombDayMapsForRange(d1, d2, **kwargs) :
    """
    Get all the combined day maps from d1 (inclusive) to d2 (exclusive).
    Returns a dict:dateStr -> {<stock symbol> -> <value>}
    """
    combDayMapsForRange = {}
    for d in utils.dateStrsBetween(d1, d2) :
        combDayMapsForRange[d] = getCombDayMap(d, **kwargs)
    return combDayMapsForRange

def getDayMapWithLookback(combDayMapsForRange, forD, lookback) :
    """
    Get the day map for a given day, looking back a given number of days -
    i.e. uses the value from a previous day if it's not present in the given day.
    """
    if lookback == 0 :
        return combDayMapsForRange[forD]
    d1 = utils.toDate(forD) + datetime.timedelta(-lookback)
    d2 = utils.toDate(forD) + datetime.timedelta(1)
    dayMap = {}
    for d in utils.dateStrsBetween(d1, d2) :
        dayMap.update(combDayMapsForRange[d])
    return dayMap

@utils.delegates(getCombDayMap)
def getCombDayMapsForRangeWithLookback(d1, d2, lookback=7, **kwargs) :
    """
    Get all the combined day maps from d1 (inclusive) to d2 (exclusive), with lookback.
    Returns a dict:dateStr -> {<stock symbol> -> <value>}
    Skips weekend dates.
    """
    combDayMapsForRange = getCombDayMapsForRange(utils.toDate(d1)+datetime.timedelta(-lookback),
                                                 d2, **kwargs)
    res = {}
    for d in utils.dateStrsBetween(d1, d2) :
        if utils.isWeekend(d) :
            continue
        res[d] = getDayMapWithLookback(combDayMapsForRange, d, lookback)
    return res

@utils.delegates(getCombDayMapsForRangeWithLookback)
def getCleanedPriceData(d1, d2, minPrice=None, **kwargs) :
    """
    """
    dayMaps = getCombDayMapsForRangeWithLookback(d1, d2, **kwargs)
    dateList = sorted(dayMaps.keys())
    symsPresentAllDays = sorted(set.intersection(*(set(dayMap.keys())
                                                   for dayMap in dayMaps.values())))
    if minPrice is not None :
        symsToRemove = set()
        for dayMap in dayMaps.values() :
            for sym,val in dayMap.items() :
                if val < minPrice :
                    symsToRemove.add(sym)
        symsPresentAllDays = [sym for sym in symsPresentAllDays
                              if sym not in symsToRemove]
    priceMat = np.zeros((len(symsPresentAllDays),len(dateList)))
    for dateNo,d in enumerate(dateList) :
        dayMap = dayMaps[d]
        for symNo,sym in enumerate(symsPresentAllDays) :
            priceMat[symNo,dateNo] = dayMap[sym]
    print(len(symsPresentAllDays),'symbols',len(dateList),'dates')
    return (symsPresentAllDays, dateList, priceMat)

@utils.delegates(getCleanedPriceData)
def getForwardReturns(d1, d2, d3, weekdaysForward=20, **kwargs) :
    """
    """
    syms, dateStrs, priceMat = getCleanedPriceData(d1,d3,**kwargs)
    dateStrs = [d for d in dateStrs if d<d2]
    returnMat = np.zeros((len(syms), len(dateStrs)))
    for j in range(len(dateStrs)) :
        returnMat[:,j] = priceMat[:,j+weekdaysForward]/priceMat[:,j]
    return syms,dateStrs,returnMat

def getClosestReturn(sym, syms, dateStrs, returnMat) :
    """
    """
    rowNo = syms.index(sym)
    diffs = ((returnMat - returnMat[rowNo])**2).sum(axis=1)
    symsAndDiffs = list(zip(syms,diffs))
    symsAndDiffs.sort(key=lambda x : x[1])
    return symsAndDiffs

In [None]:
# m = getForwardReturns('20230101','20240101','20240201',minPrice=1.0,priceCol='Open',lookback=4)

duplicated keys: NYSE 20230306 ['AMB.W']
duplicated keys: NYSE 20230307 ['AMB.W']
duplicated keys: NYSE 20230308 ['AMB.W']
duplicated keys: NYSE 20230309 ['AMB.W']
duplicated keys: NYSE 20230310 ['AMB.W']
duplicated keys: NYSE 20230313 ['AMB.W']
duplicated keys: NYSE 20230314 ['AMB.W']
duplicated keys: NYSE 20230315 ['AMB.W']
duplicated keys: NYSE 20230316 ['AMB.W']
duplicated keys: NYSE 20230317 ['AMB.W']
duplicated keys: NYSE 20230320 ['AMB.W']
duplicated keys: NYSE 20230321 ['AMB.W']
duplicated keys: NYSE 20230322 ['AMB.W']
duplicated keys: OTCBB 20230323 ['CETY']
duplicated keys: OTCBB 20230614 ['LTRY', 'LTRYW']
8541 symbols 283 dates


In [None]:
# getClosestReturn('MNDO',*m)[:20]

[('MNDO', 0.0),
 ('FPE', 0.3939572159416754),
 ('FPEI', 0.4037418391021024),
 ('LOAN', 0.4058725785512839),
 ('CHI', 0.4152784927005114),
 ('IHTA', 0.44170900864517204),
 ('JPC', 0.452376416767534),
 ('EFSCP', 0.45330330487244486),
 ('JPI', 0.4547505807153403),
 ('HIO', 0.46793309830180163),
 ('SPN-B', 0.4700152766777501),
 ('PRE-J', 0.47212628763457065),
 ('ACR-C', 0.47461934816855417),
 ('HIPS', 0.47686817879050203),
 ('JHPI', 0.47973534129229467),
 ('FINS', 0.4845376741716778),
 ('PGZ', 0.4862788449951719),
 ('PQDI', 0.4975648213837466),
 ('OAK-A', 0.4991444262088841),
 ('GPM-A', 0.5011538793366852)]

In [None]:
# getClosestReturn('TAIT',*m)[:20]

[('TAIT', 0.0),
 ('NNI', 0.7368105665953435),
 ('SPN-B', 0.771899859504044),
 ('ICVT', 0.81698375759366),
 ('COD-B', 0.8352495377240741),
 ('CLSM', 0.8366884639203913),
 ('CET', 0.8384678465364781),
 ('CWB', 0.8487942439973535),
 ('ET-D', 0.8519271383152587),
 ('TLGPY', 0.8523217702721007),
 ('CSF', 0.8566815134696606),
 ('MGMT', 0.858181003231267),
 ('TDSC', 0.8644684103862601),
 ('WEDXF', 0.8715553578868909),
 ('WBIF', 0.8718234798733043),
 ('NCV-A', 0.8744353593018128),
 ('STT-D', 0.8744449134382206),
 ('QRMI', 0.8770904594690824),
 ('ET-C', 0.8804238997785947),
 ('XRMI', 0.8821812102633625)]

In [None]:
# getClosestReturn('RELL',*m)[:20]

[('RELL', 0.0),
 ('CASS', 2.3632986384989505),
 ('POWI', 2.8332377785971987),
 ('GWRS', 2.8404981620316088),
 ('ANGO', 2.8471128912348176),
 ('OARK', 2.9107047683916987),
 ('RRBI', 2.9644257369823506),
 ('RICK', 2.9732589455373617),
 ('GTY', 2.982627077477133),
 ('RGCO', 2.9997206622373067),
 ('TMP', 3.0179666671694),
 ('KLIC', 3.0608739237335296),
 ('SEAS', 3.0614640639877395),
 ('AMBA', 3.0634315959649765),
 ('CWE.A', 3.0799862214034004),
 ('CNRG', 3.136485251345724),
 ('CWEN', 3.1397049276381717),
 ('KEYS', 3.1410272634367935),
 ('MYE', 3.1592785252589612),
 ('DENN', 3.1643088355572795)]

In [None]:
#hide
# uncomment and run to regenerate all library Python files
# from nbdev.export import notebook2script; notebook2script()