In [1]:
import pandas as pd
import numpy as np
import os
from string import digits
import datetime as dt
from alpha_vantage.timeseries import TimeSeries
from multiprocessing import Pool, cpu_count

In [2]:
# TODO: Add this to envy
# quandl.ApiConfig.api_key = "s4sS8qy8LcvoW3Ls3dRT"
os.environ["ALPHAVANTAGE_API_KEY"] = "100HED30C7HC9JMI" #alphavantage

In [3]:
def remove_digits(input_str):
    remove_digits = str.maketrans('', '', digits)
    res = input_str.translate(remove_digits)
    return res

In [4]:
class Stock(object):
    '''
    
    '''
    def __init__(self, tickers_list=[],stock_index='SP500',
                 price_type=None, ts=None, after_hours=False):
        self.tickers_df = None
        self.tickers_list = tickers_list
        self.stock_index = stock_index
        self.price_type = price_type
        self.ts = ts
        self.prices_df = None
        self.after_hours = after_hours
        
        
    def get_tickers_index(self):
        '''
        Get the list of all stocks in a given index.
        '''
        if self.stock_index == 'SP500':
            # There are 2 tables on the Wikipedia page, the first contains tickers
            payload = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies', header=0)
            self.tickers_df = payload[0]
            if len(self.tickers_df.index) >= 450:
                self.tickers_df = self.tickers_df.head(2) #TODO: remove this
                self.tickers_list = list(self.tickers_df['Symbol'])
            else:
                ValueError('Check wikipedia data source for SP 500 at \
                             https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
        else:
            e = str('stock_index value '+ self.stock_index + ' not defined')
            raise ValueError(e)
        

    def get_list_stock_prices(self):
        '''
        Fetch stock price for list of tickers.
        '''
        if not self.tickers_list:
            raise KeyError('tickers_list not provided, either pass as argument or \
                call get_tickers_index() method with index name to fetch tickers.')
        self.pool = Pool(cpu_count()-1)
        prices_df_list = self.pool.map_async(self.get_individual_stock_price, self.tickers_list).get()
        self.prices_df = pd.concat(prices_df_list) \
            .reset_index() \
            .rename({'index':'ts'}, axis=1) \
            .set_index(['ticker', 'ts'])
        
    
    def get_individual_stock_price(self, stock_ticker):
        '''
        Fetch stock price using the Alphavantage API. 
        '''
        if self.price_type == 'intraday':
            price, meta_data = self.ts.get_intraday(stock_ticker)
            price_df = pd.DataFrame(price).transpose()
            price_df.index = pd.to_datetime(price_df.index)
            if not self.after_hours:
                price_df = price_df[(price_df.index.time>=dt.time(9,30)) & 
                                    (price_df.index.time<=dt.time(16,0))]
            price_df.columns = [remove_digits(col) for col in price_df.columns]
            price_df.columns = [col.replace('. ', '') for col in price_df.columns]
            price_df['ticker'] = stock_ticker
            return price_df
    
    
    def __getstate__(self):
        self_dict = self.__dict__.copy()
        del self_dict['pool']
        return self_dict

    def __setstate__(self, state):
        self.__dict__.update(state)

### MAIN

In [5]:
ts = TimeSeries()

In [6]:
s = Stock(price_type='intraday', ts=ts, stock_index='SP500')

In [7]:
s.get_tickers_index()
s.tickers_list

['MMM', 'ABT']

In [8]:
s.get_list_stock_prices()

In [9]:
s.prices_df

Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,close,volume
ticker,ts,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
MMM,2020-07-31 16:00:00,149.8600,150.6300,149.7700,150.5300,392033
MMM,2020-07-31 15:45:00,149.8800,149.9900,149.6200,149.8400,132858
MMM,2020-07-31 15:30:00,149.5200,150.0000,149.3800,149.8550,108522
MMM,2020-07-31 15:15:00,149.4600,149.7300,149.4201,149.5300,77637
MMM,2020-07-31 15:00:00,149.6200,149.6200,149.2600,149.4400,64927
MMM,2020-07-31 14:45:00,149.1200,149.6500,149.0800,149.6000,55627
MMM,2020-07-31 14:30:00,148.9400,149.1000,148.9300,149.0800,55737
MMM,2020-07-31 14:15:00,149.0600,149.3700,148.9350,148.9500,67152
MMM,2020-07-31 14:00:00,149.1100,149.2800,149.0300,149.1000,95505
MMM,2020-07-31 13:45:00,149.4300,149.5600,149.0941,149.1300,49801


### Scratch

In [36]:
meta_data

{'1. Information': 'Intraday (15min) open, high, low, close prices and volume',
 '2. Symbol': 'GOOGL',
 '3. Last Refreshed': '2020-07-31 19:30:00',
 '4. Interval': '15min',
 '5. Output Size': 'Compact',
 '6. Time Zone': 'US/Eastern'}

In [15]:
type(s.prices_df.index.get_level_values('ts'))

pandas.core.indexes.datetimes.DatetimeIndex

In [47]:
# x = s.prices_df[s.prices_df.index.get_level_values('ticker')=='MMM']