In [None]:
#polygon.io API Key
import config
api_key = config.api_key

In [None]:
#lib
import pandas as pd
import matplotlib.pyplot as plt
import requests as req
import datetime
import time
from pandas_market_calendars import get_calendar

In [None]:
#ISSUE:
#1 lengh limit - only 2 years data
#2 call limit - only 5 calls / min
#3 query limit - Max 50000 data points
#4 2 Years Historical Data

#symbol: stock symbol
#date_A: from date
#date_B: to date
#timespan: minute, hour, day, week, month, quarter, year

In [None]:
def api_call(symbol, date_a, date_b, timespan, api_key):
    multiplier = 1
    adjusted = 'true'     #default
    sort = 'asc'        #ascending order (oldest at top)
    limit = 50000       #default 5000, must be <50000
    #request url
    url = f'https://api.polygon.io/v2/aggs/ticker/{symbol}/range/1/{timespan}/{date_a}/{date_b}?adjusted={adjusted}&sort={sort}&limit={limit}&apiKey={api_key}'
    #requesting
    data = req.get(url).json()
    return data


def json_to_df(data):
    results = data['results']
    df = pd.DataFrame(columns=['time', 'open', 'high', 'low', 'close', 'volume'])
    for result in results:
        df = df.append({
            'time': pd.to_datetime(result['t'], unit='ms'),
            'open': result['o'],
            'high': result['h'],
            'low': result['l'],
            'close': result['c'],
            'volume': result['v']
        }, ignore_index=True)
    return df

In [None]:
#Historical Data Limit check
def historicalDataLimit(day_a, day_b, year = 2):
    today = datetime.date.today()
    years_ago = today - datetime.timedelta(days = 365 * year) #NOTE: leap years issue
    day_a = datetime.datetime.strptime(day_a, '%Y-%m-%d').date()
    if day_a < years_ago:
        check = True
    else:
        check = False
    return check

In [None]:
def reqSplit(day_a, day_b):
    day_a = datetime.datetime.strptime(day_a, "%Y-%m-%d")
    day_b = datetime.datetime.strptime(day_b, "%Y-%m-%d")
    days = (day_b - day_a).days
    if days >= 35:
        return True
    else:
        return False

def splitTime(day_a, day_b):
    months = pd.date_range(day_a, day_b, freq='M')
    
    arranged = []
    i = 0
    
    while i <= len(months):
        group = []
        
        if i == 0:
            x = day_a
        else:
            x = (months[i - 1] + pd.DateOffset(days=1)).strftime('%Y-%m-%d')
        
        if i == len(months):
            y = day_b
        else:
            y = months[i].strftime('%Y-%m-%d')
        
        group.append(x)
        group.append(y)
        arranged.append(group)
        i += 1
    return arranged

In [None]:
def pricedownload(symbol, date_A, date_B, timespan, api_key, maxCall = 5):
    if historicalDataLimit(date_A, date_B):
        return print('requested date range before the API limit.')
    elif reqSplit(date_A, date_B):
        stopTime = 60 // maxCall
        splitted = splitTime(date_A, date_B)
        df = pd.DataFrame(columns=['time', 'open', 'high', 'low', 'close', 'volume'])
        for i in range(len(splitted)):
            print(f'Downloading {symbol} stock price data from {splitted[i][0]} to {splitted[i][1]}.')
            df0 = json_to_df(api_call(symbol, splitted[i][0], splitted[i][1], timespan, api_key))
            df = df.append(df0, ignore_index=True)
            print(f'API call completed. Waiting {stopTime} seconds for next call.')
            time.sleep(stopTime)
        return df
    else:  
        return json_to_df(api_call(symbol, date_A, date_B, timespan, api_key))

In [None]:
#Download
day1 = (datetime.date.today() - datetime.timedelta(days=365*2)).strftime('%Y-%m-%d')
day2 = datetime.date.today().strftime('%Y-%m-%d')
timespan = 'minute'
symbol = 'VOO'

print(f'From: {day1}')
print(f'To: {day2}')
print()

voo_data = pricedownload(symbol, day1, day2, timespan, api_key)
voo_data.to_csv(r'voo_price.csv')

In [None]:
voo_data