In [40]:
import sys
import requests

import pandas as pd

from tqdm import tqdm_notebook as tqdm
from concurrent.futures import ThreadPoolExecutor

In [41]:
BASEURL = 'https://query1.finance.yahoo.com'

In [42]:
notnone = lambda x: x is not None

In [43]:
stocks = pd.read_csv('../datasets/stocks.csv.gz', index_col=0)
stocks = stocks[~stocks['Ticker'].str.contains('\.')]

stocks.head()

Unnamed: 0,Ticker,Name,Exchange,Category Name,Country
0,OEDV,"Osage Exploration and Development, Inc.",PNK,,USA
1,AAPL,Apple Inc.,NMS,Electronic Equipment,USA
2,BAC,Bank of America Corporation,NYQ,Money Center Banks,USA
3,AMZN,"Amazon.com, Inc.",NMS,Catalog & Mail Order Houses,USA
4,T,AT&T Inc.,NYQ,Telecom Services - Domestic,USA


In [44]:
tickers = stocks['Ticker']

tickers.head()

0    OEDV
1    AAPL
2     BAC
3    AMZN
4       T
Name: Ticker, dtype: object

In [45]:
def download_info(ticker):
    url = f"{BASEURL}/v7/finance/quote?symbols={ticker}"
    
    try:
        res = requests.get(url=url).json()
        quote = res.get('quoteResponse', {})
        result = quote.get('result', [None])[0]
        
        return result
    except:
        return None

In [63]:
with ThreadPoolExecutor() as executor:
    gen = executor.map(download_info, tickers)
    track = list(tqdm(gen, total=len(tickers)))
    infos = pd.DataFrame(filter(notnone, track))

infos.head()

Unnamed: 0,language,region,quoteType,quoteSourceName,exchangeDataDelayedBy,priceHint,exchange,fullExchangeName,tradeable,sourceInterval,...,priceToBook,ytdReturn,trailingThreeMonthReturns,trailingThreeMonthNavReturns,underlyingSymbol,openInterest,expireDate,expireIsoDate,headSymbolAsString,contractSymbol
0,en-US,US,MUTUALFUND,Delayed Quote,0.0,2.0,YHD,YHD,False,15.0,...,,,,,,,,,,
1,en-US,US,EQUITY,Delayed Quote,0.0,2.0,NMS,NasdaqGS,True,15.0,...,10.005637,,,,,,,,,
2,en-US,US,EQUITY,Nasdaq Real Time Price,0.0,2.0,NYQ,NYSE,True,15.0,...,1.017305,,,,,,,,,
3,en-US,US,EQUITY,Nasdaq Real Time Price,0.0,2.0,NMS,NasdaqGS,True,15.0,...,16.87937,,,,,,,,,
4,en-US,US,EQUITY,Nasdaq Real Time Price,0.0,2.0,NYQ,NYSE,True,15.0,...,1.395282,,,,,,,,,


In [62]:
for x in filter(notnone, track):
    try:
        x.keys()
    except:
        print(x)

In [64]:
extras = pd.merge(stocks, infos, how='right', left_on='Ticker', right_on='symbol')

extras.head()

Unnamed: 0,Ticker,Name,Exchange,Category Name,Country,language,region,quoteType,quoteSourceName,exchangeDataDelayedBy,...,priceToBook,ytdReturn,trailingThreeMonthReturns,trailingThreeMonthNavReturns,underlyingSymbol,openInterest,expireDate,expireIsoDate,headSymbolAsString,contractSymbol
0,OEDV,"Osage Exploration and Development, Inc.",PNK,,USA,en-US,US,MUTUALFUND,Delayed Quote,0.0,...,,,,,,,,,,
1,AAPL,Apple Inc.,NMS,Electronic Equipment,USA,en-US,US,EQUITY,Delayed Quote,0.0,...,10.005637,,,,,,,,,
2,BAC,Bank of America Corporation,NYQ,Money Center Banks,USA,en-US,US,EQUITY,Nasdaq Real Time Price,0.0,...,1.017305,,,,,,,,,
3,AMZN,"Amazon.com, Inc.",NMS,Catalog & Mail Order Houses,USA,en-US,US,EQUITY,Nasdaq Real Time Price,0.0,...,16.87937,,,,,,,,,
4,T,AT&T Inc.,NYQ,Telecom Services - Domestic,USA,en-US,US,EQUITY,Nasdaq Real Time Price,0.0,...,1.395282,,,,,,,,,


In [67]:
extras.to_csv('../datasets/extras.csv.gz')

In [154]:
def download_data(ticker, range='5y', interval='1d', events='div,splits'):
    url = f"{BASEURL}/v8/finance/chart/{ticker}"
    
    params = dict(
        range=range,
        events=events,
        interval=interval,
    )
    
    try:
        res = requests.get(url=url, params=params).json()
        chart = res.get('chart', {}).get('result', {})[0]
        quotes = chart['indicators']['quote'][0]
        dividends = chart['events']['dividends']
        
        indics = pd.DataFrame(quotes).assign(date=chart['timestamp'], ticker=ticker)
        events = pd.DataFrame(dividends.values()).assign(ticker=ticker)
        
        return indics, events
    except:
        return None

indics, events = download_data('aapl')

indics.head()

Unnamed: 0,close,low,high,open,volume,date,ticker
0,100.57,99.949997,101.089996,100.440002,52699000,1408541400,aapl
1,100.580002,100.110001,100.940002,100.57,33478000,1408627800,aapl
2,101.32,100.190002,101.470001,100.290001,44184000,1408714200,aapl
3,101.540001,101.279999,102.169998,101.790001,40270000,1408973400,aapl
4,100.889999,100.860001,101.5,101.419998,33152000,1409059800,aapl


In [156]:
with ThreadPoolExecutor() as executor:
    gen = executor.map(download_data, tickers)
    track = list(tqdm(gen, total=len(tickers)))
    indics, events = zip(*filter(notnone, track))

len(indics), len(events)

(6780, 6780)

In [158]:
indics = pd.concat(indics)
print(indics.shape)
indics.head()

Unnamed: 0,close,date,high,low,open,ticker,volume
0,100.57,1408541400,101.089996,99.949997,100.440002,AAPL,52699000.0
1,100.580002,1408627800,100.940002,100.110001,100.57,AAPL,33478000.0
2,101.32,1408714200,101.470001,100.190002,100.290001,AAPL,44184000.0
3,101.540001,1408973400,102.169998,101.279999,101.790001,AAPL,40270000.0
4,100.889999,1409059800,101.5,100.860001,101.419998,AAPL,33152000.0


In [159]:
indics.to_csv('../datasets/indics.csv.gz')

In [160]:
events = pd.concat(events)
print(events.shape)
events.head()

(103713, 3)


Unnamed: 0,amount,date,ticker
0,0.57,1478179800,AAPL
1,0.63,1494509400,AAPL
2,0.73,1541687400,AAPL
3,0.63,1510324200,AAPL
4,0.73,1526045400,AAPL


In [161]:
events.to_csv('../datasets/events.csv.gz')