# Fetch trade data from Polygon API

Fetch market data and historical data from the [Polygon API](https://polygon.io/docs/) on tickers and trades.

In [126]:
import time
import datetime
import logging
import contextlib

import pandas as pd

import config

logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%H:%M:%S')
logger = logging.getLogger()
logger.setLevel(logging.INFO)


%run database.ipynb # import Database class

db = Database()

## DataFetcher class

`DataFetcher` talks to the Polygon API, fetching information on tickers and trades. Access to the API is granted through [Alpaca](https://alpaca.markets/https://alpaca.markets/). For trades, the ticker, time, price, and volume is stored in the database. Additional information exists in quotes and news, both of which are currently not fetched.

In [96]:
class DataFetcher:
    """ Fetch market data and historical data from the Polygon API.
    
    The frequency of requests is capped to avoid going beyond what Polygon 
    permits. The request is stalled upon reaching the maximum allowed frequency.
    
    """
    
    MAX_REQUEST_PER_MINUTE = 200
    STALL_TIME_UPON_MAX_REQUESTS = 3
    MAX_ATTEMPTS = 5
    
    def __init__(self):
        self._recent_requests = []
        
    def count_recent_requests(self):
        self._recent_requests = [r for r in self._recent_requests if time.time() - r < 60]
        return len(self._recent_requests)
        
    def _request(self, url, params={}, attempts_left=DataFetcher.MAX_ATTEMPTS):
        
        while self.count_recent_requests() >= self.MAX_REQUEST_PER_MINUTE:
            time.sleep(self.STALL_TIME_UPON_MAX_REQUESTS)
            logging.info('Stalled because of too many requests.')
        
        params['apiKey'] = config.api_key
        result = requests.get(f'https://api.polygon.io{url}', params=params)

        if result.status_code == 200:
            json = result.json()
            if json.get('success', True):
                return json
        
        if attempts_left == 0:
            return None
        
        logging.error(
            f'Could not complete request {url} '
            f'(Error: {result.status_code}, attempts left: {attempts_left})'
        )
        time.sleep(5)
        return self._request(url, params, attempts_left-1)
    
    def get_ticker_details(self, ticker):
        # https://polygon.io/docs/get_v1_meta_symbols__stocksTicker__company_anchor
        url = f'/v1/meta/symbols/{ticker}/company'
        return self._request(url)
    
    
    def get_daily_trades(self, ticker, date, start_time=0):
        # https://polygon.io/docs/get_v2_ticks_stocks_trades__ticker___date__anchor
        
        TRADES_PER_REQUEST = 50000
        
        if type(date) == datetime.date:
            date = date.strftime('%Y-%m-%d')
            
        url = f'/v2/ticks/stocks/trades/{ticker}/{date}'
        params = {
            'timestamp': start_time,
            'limit': TRADES_PER_REQUEST
        }

        response = self._request(url, params)
        if response is None:
            return None

        # Exclude first trade in responses as it was already present in the 
        # previous request.
        trades = response['results'][int(start_time > 0):]

        # Repeat requests until all daily trades have been fetched.
        if response['results_count'] >= TRADES_PER_REQUEST:
            trades.extend(self.get_daily_trades(ticker, date, start_time=trades[-1]['t']))
        
        return trades
        
        
api = DataFetcher()

## Fetch the data and store it in the database

In [None]:
ticker = 'MSFT'
date_from = '2015-01-01'
date_to = '2020-12-31'

ticker_details = db.get_ticker_details(ticker)
if ticker_details is None:
    ticker_details = db.store_ticker_details(api.get_ticker_details(ticker))

dates_with_trades = db.get_open_dates(ticker_details['exchange'], date_from, date_to)
dates_already_stored = db.get_stored_dates('trades', ticker)
dates_to_fetch = [d for d in dates_with_trades if d not in dates_already_stored]

logging.info(f'Fetching {len(dates_to_fetch)} days of {ticker} trades.')
for date in dates_to_fetch:
    
    time_before_fetch = time.time()
    trades = api.get_daily_trades(ticker, date)
    
    time_before_store = time.time()
    db.store_trades(ticker, date, trades)
    
    time_to_fetch = int(round(time_before_store - time_before_fetch))
    time_to_store = int(round(time.time() - time_before_store))
    logging.info(f'{ticker} {date} - fetch time: {time_to_fetch}s, store time: {time_to_store}s')

23:30:24 Fetching 1474 days of MSFT trades.
23:30:28 MSFT 2015-01-02 - fetch time: 2s, store time: 2s
23:30:33 MSFT 2015-01-05 - fetch time: 3s, store time: 2s
23:30:38 MSFT 2015-01-06 - fetch time: 3s, store time: 2s
23:30:41 MSFT 2015-01-07 - fetch time: 2s, store time: 2s
23:30:45 MSFT 2015-01-08 - fetch time: 2s, store time: 2s
23:30:48 MSFT 2015-01-09 - fetch time: 2s, store time: 1s
23:30:50 MSFT 2015-01-12 - fetch time: 1s, store time: 1s
23:30:55 MSFT 2015-01-13 - fetch time: 2s, store time: 2s
23:30:58 MSFT 2015-01-14 - fetch time: 2s, store time: 2s
23:31:02 MSFT 2015-01-15 - fetch time: 2s, store time: 2s
23:31:05 MSFT 2015-01-16 - fetch time: 2s, store time: 2s
23:31:09 MSFT 2015-01-20 - fetch time: 2s, store time: 2s
23:31:13 MSFT 2015-01-21 - fetch time: 2s, store time: 2s
23:31:17 MSFT 2015-01-22 - fetch time: 2s, store time: 2s
23:31:20 MSFT 2015-01-23 - fetch time: 1s, store time: 1s
23:31:24 MSFT 2015-01-26 - fetch time: 2s, store time: 2s
23:31:38 MSFT 2015-01-27 - f