In [1]:
import datetime as dt
import pandas as pd
import requests
import re
import time
import calendar as cal
from io import StringIO

class Fetcher:
    api_url = "https://query1.finance.yahoo.com/v7/finance/download/%s?period1=%s&period2=%s&interval=%s&events=%s&crumb=%s"
    def __init__(self, ticker, start, end=None, interval="1d"):
        """Initializes class variables and formats api_url string"""
        self.ticker = ticker.upper()
        self.interval = interval
        self.cookie, self.crumb = self.init()
        self.start = int(cal.timegm(dt.datetime(*start).timetuple()))

        if end is not None:
            self.end = int(cal.timegm(dt.datetime(*end).timetuple()))
        else:
            self.end = int(time.time())

    def init(self):
        """Returns a tuple pair of cookie and crumb used in the request"""
        url = 'https://finance.yahoo.com/quote/%s/history' % (self.ticker)
        r = requests.get(url)
        txt = r.content
        cookie = r.cookies['B']
        pattern = re.compile('.*"CrumbStore":\{"crumb":"(?P<crumb>[^"]+)"\}')

        for line in txt.splitlines():
            m = pattern.match(line.decode("utf-8"))
            if m is not None:
                crumb = m.groupdict()['crumb']
                crumb = crumb.replace(u'\\u002F', '/')
        return cookie, crumb  # return a tuple of crumb and cookie

    def getData(self, events):
        """Returns a list of historical data from Yahoo Finance"""
        if self.interval not in ["1d", "1wk", "1mo"]:
            raise ValueError("Incorrect interval: valid intervals are 1d, 1wk, 1mo")

        url = self.api_url % (self.ticker, self.start, self.end, self.interval, events, self.crumb)

        data = requests.get(url, cookies={'B':self.cookie})
        content = StringIO(data.content.decode("utf-8"))
        return pd.read_csv(content, sep=',')

    def getHistorical(self, events='history'):
        """Returns a list of historical price data from Yahoo Finance"""
        return self.getData('history')

    def getDividends(self):
        """Returns a list of historical dividends data from Yahoo Finance"""
        return self.getData('div')

    def getSplits(self):
        """Returns a list of historical splits data from Yahoo Finance"""
        return self.getData('split')

    def getDatePrice(self):
        """Returns a DataFrame for Date and Price from getHistorical()"""
        return self.getHistorical().ix[:,[0,4]]

    def getDateVolume(self):
        """Returns a DataFrame for Date and Volume from getHistorical()"""
        return self.getHistorical().ix[:,[0,6]]

In [2]:
import datetime as datetime
import numpy as np

today_date = datetime.datetime.today()
today_date += datetime.timedelta(days = 1)

today_date = str(today_date).split()[0]
year=int(today_date[0:4])
month=int(today_date[5:7])
day=int(today_date[8:10])

In [3]:
data = Fetcher('SPY', [year-30,month,day], [year,month,day])
original_data=data.getHistorical()

print(len(original_data['Close']))
original_data.to_csv('SPY.csv',index=False)

7118


In [4]:
original_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,1993-01-29,43.96875,43.96875,43.75,43.9375,25.884184,1003200
1,1993-02-01,43.96875,44.25,43.96875,44.25,26.068277,480500
2,1993-02-02,44.21875,44.375,44.125,44.34375,26.123499,201300
3,1993-02-03,44.40625,44.84375,44.375,44.8125,26.399649,529400
4,1993-02-04,44.96875,45.09375,44.46875,45.0,26.510111,531500
