In [8]:
#https://www.scrapehero.com/scrape-yahoo-finance-stock-market-data/
#Construct the URL of the search results page from Yahoo Finance. 
#   For example, here is the one for Apple-http://finance.yahoo.com/quote/AAPL?p=AAPL
#Step 1) Download HTML of the search result page using Python Requests
#Step 2) Inspect Data Source via web browser (IE) and Python (Print, BS4 Lib)
#Step 3) Parse the page using BS4 HTML Parser + HTML Tag Information
#Step 4) Save the data for Storage - CSV & JSON files.

import requests  #GET/POST/PUT API requests
from contextlib import closing  #utilities for common tasks involving the "with" statement.
from bs4 import BeautifulSoup #BeautifulSoup4 - HTML Web Scraping #Scrapy
import csv #write stock information to csv file
import json #write stock informaton to json file, for future API use
#more info on bs4:  https://realpython.com/python-web-scraping-practical-introduction/

In [9]:
#Attempts to get the content at `url` by making an HTTP GET request.
#If the content-type of response is some kind of HTML/XML, return the
#text content, otherwise return None.
def simple_get(url):
    try:
        with closing(requests.get(url, stream=True)) as resp:
            if is_good_response(resp):
                print('http request successful')
                return resp.content
            else:
                return None

    except RequestException as e:
        print('Error during requests to {0} : {1}'.format(url, str(e)))
        return None

In [10]:
#Returns True if the response seems to be HTML, False otherwise.
def is_good_response(resp):

    content_type = resp.headers['Content-Type'].lower()
    return (resp.status_code == 200 
            and content_type is not None 
            and content_type.find('html') > -1)

In [11]:
#Web Scrape - bloomberg stock market data - S&P 500 example:  https://www.bloomberg.com/quote/SPX:IND
raw_html = simple_get('https://finance.yahoo.com/quote/AAPL/history?p=AAPL')

http request successful


In [12]:
#using BeautifulSoap to scrape and inspect web page
html = BeautifulSoup(raw_html, 'html.parser')
print(html.prettify())

<!DOCTYPE html>
<html class="NoJs featurephone" id="atomic" lang="en-US">
 <head prefix="og: http://ogp.me/ns#">
  <script>
   window.performance && window.performance.mark && window.performance.mark('PageStart');
  </script>
  <meta charset="utf-8"/>
  <title>
   Apple Inc. (AAPL) Stock Historical Prices &amp; Data
  </title>
  <meta content="AAPL, Apple Inc., AAPL historical prices, Apple Inc. historical prices, historical prices, stocks, quotes, finance" name="keywords"/>
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <meta content="on" property="twitter:dnt"/>
  <meta content="90376669494" property="fb:app_id"/>
  <meta content="#400090" name="theme-color"/>
  <meta content="width=device-width, initial-scale=1" name="viewport"/>
  <meta content="Discover historical prices for AAPL stock on Yahoo Finance. View daily, weekly or monthly format back to when Apple Inc. stock was issued." lang="en-US" name="description"/>
  <meta content="guce.yahoo.com" name="oath:guce:con

In [13]:
# Take out the <div> of name and get the stock's value
stock_date = html.findAll("td", attrs={"class": "Py(10px) Ta(start) Pend(10px)"})
#print(stock_date)

In [14]:
#iterate through elements to get stock dates
stock_date = []
for i, text in enumerate(html.findAll("td", attrs={"class": "Py(10px) Ta(start) Pend(10px)"})):
    stock_date.insert(i,text.span.string)
#print(stock_date)

#create list to store other stock data
stock_data = []
for i, text in enumerate(html.findAll("td", attrs={"class": "Py(10px) Pstart(10px)"})):
    stock_data.insert(i,text.span.string)
#print(stock_data)

In [15]:
#combine date and stock information into dictionary data structure
i = 0
stock_info = {}
for i in range(len(stock_date)-1):  #get stock info for every day
    stock_info[i] = {"date": stock_date[i], "open": stock_data[(i*6)+0], "close": stock_data[(i*6)+3], "volume": stock_data[(i*6)+5]}
    print(stock_info[i])

{'date': 'Jun 12, 2019', 'open': '193.95', 'close': '194.47', 'volume': '12,210,229'}
{'date': 'Jun 11, 2019', 'open': '194.86', 'close': '194.81', 'volume': '26,910,000'}
{'date': 'Jun 10, 2019', 'open': '191.81', 'close': '192.58', 'volume': '26,220,900'}
{'date': 'Jun 07, 2019', 'open': '186.51', 'close': '190.15', 'volume': '30,684,400'}
{'date': 'Jun 06, 2019', 'open': '183.08', 'close': '185.22', 'volume': '22,526,300'}
{'date': 'Jun 05, 2019', 'open': '184.28', 'close': '182.54', 'volume': '29,773,400'}
{'date': 'Jun 04, 2019', 'open': '175.44', 'close': '179.64', 'volume': '30,968,000'}
{'date': 'Jun 03, 2019', 'open': '175.60', 'close': '173.30', 'volume': '40,396,100'}
{'date': 'May 31, 2019', 'open': '176.23', 'close': '175.07', 'volume': '27,043,600'}
{'date': 'May 30, 2019', 'open': '177.95', 'close': '178.30', 'volume': '21,218,400'}
{'date': 'May 29, 2019', 'open': '176.42', 'close': '177.38', 'volume': '28,481,200'}
{'date': 'May 28, 2019', 'open': '178.92', 'close': '1

IndexError: list index out of range

In [16]:
#export stock information to csv
i = 0
with open('C:\\Users\\JTB Ventures LLC\\Documents\\GitHub\\ODSA-PythonAdvModels\\Data\\stock_hist.csv', 'w') as f:  # Just use 'w' mode in 3.x
    w = csv.DictWriter(f, stock_info[i].keys(), lineterminator = '\n')
    w.writeheader() 
    while(i<len(stock_info)):
        w.writerow(stock_info[i])
        i += 1

In [None]:
#export stock information to json
jsonarray = json.dumps(stock_info)
print(jsonarray)

#write json file to local drive, for future use
with open('C:\\Python\\Data\\stock_hist.json', 'w') as f:
    json.dump(jsonarray, f)
.

In [18]:
#Purpose: REST API Example using Stored stock market data, along with Google Fusion Tables
import json
import csv
import requests  #GET/POST/PUT API requests
from contextlib import closing  #utilities for common tasks involving the "with" statement

#Step 1 - Identify Resource: set url for json request
view_url = 'https://accounts.google.com/o/oauth2/token'
#Step 2 & 3 - Identify Endpoints, Methods and Set request parameters
client_id='615800458288-5fktbjo6kmu18bpgl18glnj2l12mvg8i.apps.googleusercontent.com'
client_secret='QowLGwV4wueqHpV_t1sotQVh'
refresh_token = '1/vz7AvnbcT05ZJI-SQcFACqhaif9hXCfeuTM8n1DI12E'
refresh_body = "refresh_token=" + refresh_token + '&client_id=' + client_id + '&client_secret=' + client_secret + '&grant_type=refresh_token'

#Step 1-3) Google Fusion Tables OAuth keys
def API_POST_OAuth():
    #set url for json request, then obtain json response/payload
    headers = {'Content-type': 'application/x-www-form-urlencoded', 'Accept': 'text/plain'}
    myResponse = requests.post(view_url, data=refresh_body, headers=headers)
    # For successful API call, response code will be 200 (OK)
    if(myResponse.ok):
        # Loads (Load String) takes a Json file and converts into python data structure 
        # (dict or list, depending on JSON structure and number of records returned
        jData = myResponse.content 
        # Loading the response data into a dict variable
        jData = json.loads(jData) 
        print("The API Get Request Was Successful")
        print("\n")
        return jData
        
    else:
        # If response code is not ok (200), print the resulting http error code with description
        print('API Error')
        return ''

#Step 1-3) Google Fusion Table - Replace
def API_POST_Data(access_token, table_id, data):
    data_view_url = "https://www.googleapis.com/upload/fusiontables/v2/tables/" + table_id + '/replace?&access_token=' + access_token + '&isStrict=false'
    headers = {'Content-type': 'application/octet-stream'}
    #create post response body - comma seperated string of all obs
    print(range(len(data)))
    print('iterate list')
    data2 = ""
    for i in range(len(data)):
        dict1 = data[i]
        dict2 = dict1['date'].replace(",","") + ', ' + dict1['open'] + ', ' + dict1['close'] + ', ' + dict1['volume'].replace(",","") + '\n'
        data2 = data2 + dict2  
    myResponse = requests.post(data_view_url, data=data2, headers=headers)
    print (myResponse.status_code)
    # For successful API call, response code will be 200 (OK)
    if(myResponse.status_code == 200):
        return "The API Post Request Was Successful"
    else:
        #If response code is not ok (200), print the resulting http error code with description
        return "The API Post Request Was Not Successful"

#step 4 - create request data structure from csv file, to replace data in the "OSDA Stock History" google fusion table
#https://fusiontables.google.com/data?docid=1a8EPfomscPkMYksFrlGoyU4utoT0QdLCpH9tySDP#rows:id=1 
reader = csv.DictReader(open('C:\\Users\\JTB Ventures LLC\\Documents\\GitHub\\ODSA-PythonAdvModels\\Data\\stock_hist.csv'))
stock_data = list(reader)
print(type(stock_data)) #list of dictionaries
print(stock_data[0])
#Obtain Google Fusion OAuth token 
data = API_POST_OAuth()
access_token = data['access_token']

#Put data in google fusion table
table_id = '1a8EPfomscPkMYksFrlGoyU4utoT0QdLCpH9tySDP'
#convert stock_data to json field
status = API_POST_Data(access_token, table_id, stock_data)  #"put" data using API class
print(status)

<class 'list'>
OrderedDict([('date', 'Jun 12, 2019'), ('open', '193.95'), ('close', '194.47'), ('volume', '12,210,229')])
The API Get Request Was Successful


range(0, 98)
iterate list
200
The API Post Request Was Successful
