In [46]:
import pandas as pd
import numpy as np
import os
from string import digits
import datetime as dt
from alpha_vantage.timeseries import TimeSeries
from multiprocessing import Pool, cpu_count
import logging as logger
# from constants import *

logging.basicConfig()
logging.root.setLevel(logging.NOTSET)
logging.basicConfig(level=logging.NOTSET)
logger = logging.getLogger("ExtractData")


In [3]:
# TODO: Add this to envy
# 5 API requests per minute; 500 API requests per day
os.environ["ALPHAVANTAGE_API_KEY"] = "100HED30C7HC9JMI" #alphavantage

In [93]:
# # Constants
# default_days_daily = 90
# default_days_intra = 14

In [199]:
class Stock(object):
    '''
    Extract stock tickers and historical prices.
    '''
    def __init__(self, tickers_list=[],stock_index=None,
                 price_type=None, ts=None, after_hours=False, 
                 start_date=None):
        self.tickers_df = None
        self.tickers_list = tickers_list
        self.stock_index = stock_index
        self.price_type = price_type
        self.ts = ts
        self.prices_df = None
        self.after_hours = after_hours
        self.start_date = start_date
        
    def _remove_digits(self, input_str):
        remove_digits = str.maketrans('', '', digits)
        res = input_str.translate(remove_digits)
        return res
        
    def get_tickers_index(self):
        '''
        Get the list of all stocks in a given index.
        TODO: Change this to a regular ETL that saves any changes to file system
              and read from there for stability.
        '''
        if not self.stock_index:
            self.stock_index = 'SP500'
            logger.warning('"stock_index" not specified, using default of "SP500".')
            
        if self.stock_index == 'SP500':
            logger.info('Getting stock tickers for SP500 from Wiki .....')
            payload = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies', header=0)
            self.tickers_df = payload[0]
            if len(self.tickers_df.index) >= 450:
                self.tickers_df = self.tickers_df.head(2) #TODO: remove this
                self.tickers_ds = self.tickers_df.rename({'Symbol':'Ticker'}, axis=1)
                self.tickers_list = list(self.tickers_df['Symbol'])
            else:
                ValueError('Check wikipedia data source for SP 500 at \
                             https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
            logger.info('Fetched stock tickers from SP500.')
            
        elif self.stock_index == 'NASDAQ':
            logger.info('Getting stock tickers for NASDAQ from Wiki .....')
            payload = pd.read_html('https://en.wikipedia.org/wiki/NASDAQ-100#Components', header=0)
            self.tickers_df = payload[3]
            if len(self.tickers_df.index) >= 90:
                self.tickers_df = self.tickers_df.head(2) #TODO: remove this
                self.tickers_list = list(self.tickers_df['Ticker'])
            else:
                ValueError('Check wikipedia data source for NASDAQ at \
                             https://en.wikipedia.org/wiki/NASDAQ-100#Components')
            logger.info('Fetched stock tickers from NASDAQ.')
            
        else:
            e = str('stock_index value '+ self.stock_index + ' not defined')
            raise ValueError(e)
        

    def get_list_stock_prices(self):
        '''
        Fetch stock price for list of tickers.
        '''
        if not self.tickers_list:
            raise KeyError('tickers_list not provided, either pass as argument or \
                call get_tickers_index() method with index name to fetch tickers.')
        self.pool = Pool(cpu_count()-1)
        prices_df_list = self.pool.map_async(self.get_individual_stock_price, self.tickers_list).get()
        self.prices_df = pd.concat(prices_df_list)
        
    
    def get_individual_stock_price(self, stock_ticker):
        '''
        Fetch stock price using the Alphavantage API. 
        '''
        if self.price_type == 'intraday':
            if not self.start_date:
                self.start_date = pd.Timestamp(dt.date.today()-dt.timedelta(days=default_days_intra))
                logger.info('''{ticker}: "start_date" not specified, using default of {dy}
                               (business) days, with start date of {dt}.
                               '''.format(ticker=stock_ticker, dy=default_days_intra, 
                                          dt=self.start_date.isoformat()))
            elif isinstance(self.start_date, pd.Timestamp):
                logger.info('''{ticker}: Using previously defined "start_date" of {dt}.
                               '''.format(ticker=stock_ticker, dt=self.start_date))
            else:
                self.start_date = pd.Timestamp(self.start_date)
            price, meta_data = self.ts.get_intraday(stock_ticker, outputsize='full')
            price_df = pd.DataFrame(price).transpose()
            price_df.index = pd.to_datetime(price_df.index)
            price_df = price_df[price_df.index >= self.start_date]
            if not self.after_hours:
                logger.info('''{ticker}: Truncating pre-market and after-hours data.
                            '''.format(ticker=stock_ticker))
                price_df = price_df[(price_df.index.time>=dt.time(9,30)) & 
                                    (price_df.index.time<=dt.time(16,0))]
            price_df.columns = [self._remove_digits(col) for col in price_df.columns]
            price_df.columns = [col.replace('. ', '') for col in price_df.columns]
            price_df['ticker'] = stock_ticker
            price_df = price_df.reset_index().rename({'index':'ts'}, axis=1) \
                .set_index(['ticker', 'ts'])
            return price_df
        
        elif self.price_type == 'daily':
            if not self.start_date:
                self.start_date = pd.Timestamp(dt.date.today()-dt.timedelta(days=default_days_daily))
                logger.info('''{ticker}: "start_date" not specified, using default of {dy}
                               (business) days, with start date of {dt}.
                               '''.format(ticker=stock_ticker, dy=default_days_daily,
                                          dt=self.start_date.isoformat()))
            elif isinstance(self.start_date, pd.Timestamp):
                logger.info('''{ticker}: Using previously defined "start_date" of {dt}.
                               '''.format(ticker=stock_ticker, dt=self.start_date))
            else:
                self.start_date = pd.Timestamp(self.start_date)
            price, meta_data = self.ts.get_daily(stock_ticker, outputsize='full')
            price_df = pd.DataFrame(price).transpose()
            price_df.index = pd.to_datetime(price_df.index)
            price_df = price_df[price_df.index >= self.start_date]
            price_df.columns = [self._remove_digits(col) for col in price_df.columns]
            price_df.columns = [col.replace('. ', '') for col in price_df.columns]
            price_df['ticker'] = stock_ticker
            price_df = price_df.reset_index().rename({'index':'ts'}, axis=1) \
                .set_index(['ticker', 'ts'])
            return price_df
        
        else:
            raise ValueError('"price_type" must be one of "daily" or "intraday"')
    
    
    def __getstate__(self):
        self_dict = self.__dict__.copy()
        del self_dict['pool']
        return self_dict

    def __setstate__(self, state):
        self.__dict__.update(state)

### MAIN

In [66]:
ts = TimeSeries()

In [46]:
s = Stock(price_type='intraday', ts=ts, stock_index='NASDAQ')

In [47]:
s.get_tickers_index()
s.tickers_list

['ATVI', 'ADBE']

In [48]:
s.get_list_stock_prices()

In [49]:
s.prices_df

Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,close,volume
ticker,ts,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ATVI,2020-07-31 16:00:00,82.5000,82.7500,82.3900,82.6200,926913
ATVI,2020-07-31 15:45:00,82.6200,82.6800,82.4800,82.4900,290119
ATVI,2020-07-31 15:30:00,82.6850,82.7300,82.5300,82.6150,312216
ATVI,2020-07-31 15:15:00,82.4700,82.7500,82.4350,82.6800,207790
ATVI,2020-07-31 15:00:00,82.2800,82.4900,82.2300,82.4600,131777
ATVI,2020-07-31 14:45:00,82.1000,82.3000,82.0150,82.2900,128427
ATVI,2020-07-31 14:30:00,81.9100,82.1200,81.8600,82.1200,110064
ATVI,2020-07-31 14:15:00,82.2500,82.3400,81.8700,81.9300,133366
ATVI,2020-07-31 14:00:00,82.2800,82.4300,82.2200,82.2450,113280
ATVI,2020-07-31 13:45:00,82.4200,82.4400,82.1800,82.2850,107389


### Scratch

In [36]:
meta_data

{'1. Information': 'Intraday (15min) open, high, low, close prices and volume',
 '2. Symbol': 'GOOGL',
 '3. Last Refreshed': '2020-07-31 19:30:00',
 '4. Interval': '15min',
 '5. Output Size': 'Compact',
 '6. Time Zone': 'US/Eastern'}

In [15]:
type(s.prices_df.index.get_level_values('ts'))

pandas.core.indexes.datetimes.DatetimeIndex

In [47]:
# x = s.prices_df[s.prices_df.index.get_level_values('ticker')=='MMM']

Sample new message

In [9]:
import alpha_vantage
print(alpha_vantage.__file__)

/anaconda3/lib/python3.7/site-packages/alpha_vantage/__init__.py
