# Yahoo! Finance Scraper
Extract historical stock data

In [1]:
import csv
from datetime import datetime
import requests

### Setting up the api call

In [2]:
url = 'https://query1.finance.yahoo.com/v8/finance/chart/'

In [3]:
params = {
    'symbol': 'F',
    'region': 'US',
    'lang': 'en-US',
    'includePrePost': 'false',
    'interval': '1d',
    'range': '5y',
    'corsDomain': 'finance.yahoo.com',
    '.tsrc': 'finance'    
}

In [4]:
headers = {
    'accept': '*/*',
    'accept-encoding': 'gzip, deflate, br',
    'accept-language': 'en-US,en;q=0.9',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-site',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36 Edg/85.0.564.44'    
}

In [5]:
# requests data from Yahoo! api
response = requests.get(url, params=params, headers=headers)

In [6]:
# extract json data
json_data = response.json()

### Explore the data dictionary to find the data that you want

In [7]:
json_data.keys()

dict_keys(['chart'])

In [8]:
json_data['chart'].keys()

dict_keys(['result', 'error'])

In [9]:
json_data['chart']['result'][0].keys()

dict_keys(['meta', 'timestamp', 'indicators'])

In [10]:
json_data['chart']['result'][0]['indicators'].keys()

dict_keys(['quote', 'adjclose'])

In [11]:
json_data['chart']['result'][0]['indicators']['quote'][0].keys()

dict_keys(['volume', 'high', 'low', 'close', 'open'])

In [13]:
json_data['chart']['result'][0]['indicators']['quote'][0]['high'][:10]

[13.789999961853027,
 14.369999885559082,
 14.760000228881836,
 14.880000114440918,
 14.479999542236328,
 14.430000305175781,
 14.050000190734863,
 13.949999809265137,
 13.699999809265137,
 13.869999885559082]

### Extract the data into a list

In [14]:
# get quote information
v = json_data['chart']['result'][0]['indicators']['quote'][0]['volume']
h = json_data['chart']['result'][0]['indicators']['quote'][0]['high']
l = json_data['chart']['result'][0]['indicators']['quote'][0]['low']
o = json_data['chart']['result'][0]['indicators']['quote'][0]['open']
c = json_data['chart']['result'][0]['indicators']['quote'][0]['close']

# get timestamps and convert to standard date format
close_dates = [datetime.utcfromtimestamp(ts).strftime('%Y-%m-%d') for ts in json_data['chart']['result'][0]['timestamp']]

In [15]:
# consolidate into list
stock_data = list(zip(close_dates, v, h, l, o, c))

### Save the data to file (database or whatever)

In [16]:
# show first 5 records
for row in stock_data[:5]:
    print(row)

('2015-09-14', 26093500, 13.789999961853027, 13.630000114440918, 13.720000267028809, 13.779999732971191)
('2015-09-15', 46666700, 14.369999885559082, 13.789999961853027, 13.800000190734863, 14.3100004196167)
('2015-09-16', 41675000, 14.760000228881836, 14.25, 14.319999694824219, 14.640000343322754)
('2015-09-17', 37709000, 14.880000114440918, 14.460000038146973, 14.609999656677246, 14.600000381469727)
('2015-09-18', 40712200, 14.479999542236328, 14.210000038146973, 14.4399995803833, 14.279999732971191)


In [17]:
# save data to csv file
with open('ford_stock_prices_5y_daily.csv', 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerow(['CloseDate', 'Volume', 'High', 'Low', 'Open', 'Close'])
    writer.writerows(stock_data)