{"payload":{"allShortcutsEnabled":false,"fileTree":{"":{"items":[{"name":"Pricingpost71019.ipynb","path":"Pricingpost71019.ipynb","contentType":"file"}],"totalCount":1}},"fileTreeProcessingTime":3.295516,"foldersToFetch":[],"repo":{"id":196313551,"defaultBranch":"master","name":"Pricing-Post","ownerLogin":"faroanalytics","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2019-07-11T03:29:34.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/51890640?v=4","public":true,"private":false,"isOrgOwned":false},"symbolsExpanded":false,"treeExpanded":true,"refInfo":{"name":"master","listCacheKey":"v0:1562815828.0","canEdit":false,"refType":"branch","currentOid":"504e6f7a230fcf6f482df215518b74977d6d2bfb"},"path":"Pricingpost71019.ipynb","currentUser":null,"blob":{"rawLines":["{"," \"cells\": ["," {"," \"cell_type\": \"code\","," \"execution_count\": 1,"," \"metadata\": {},"," \"outputs\": [],"," \"source\": ["," \"import pandas as pd\\n\","," \"import numpy as np\\n\","," \"from datetime import date\\n\","," \"from datetime import datetime\\n\","," \"from selenium import webdriver\\n\","," \"from bs4 import BeautifulSoup\\n\","," \"import urllib3\\n\","," \"from requests import get\\n\","," \"import time\\n\","," \"import re\\n\","," \"import os\\n\","," \"import math\\n\","," \"\\n\","," \"pd.options.display.max_rows = 8\""," ]"," },"," {"," \"cell_type\": \"markdown\","," \"metadata\": {},"," \"source\": ["," \"## Auto pull oil futures price data from CME\""," ]"," },"," {"," \"cell_type\": \"markdown\","," \"metadata\": {},"," \"source\": ["," \"### Open web browser, navigate to site, pull raw data\""," ]"," },"," {"," \"cell_type\": \"code\","," \"execution_count\": null,"," \"metadata\": {},"," \"outputs\": [],"," \"source\": ["," \"#write url based on current time\\n\","," \"url = \\\"https://www.cmegroup.com/CmeWS/mvc/Quotes/Future/425/G?pageSize=50&_=\\\"\\n\","," \"urlext = np.round(time.time(), decimals=0)\\n\","," \"urlext = str(urlext)\\n\","," \"url = url + urlext\\n\","," \"\\n\","," \"# create a new Firefox session\\n\","," \"driver = webdriver.Firefox(executable_path=r'C:/Users/yourdirectory/geckodriver-v0.23.0-win64/geckodriver.exe')\\n\","," \"driver.get(url)\\n\","," \"soup = BeautifulSoup(url)\\n\","," \"\\n\","," \"#scrape site source page\\n\","," \"urltext = soup.findAll(text=True)\\n\","," \"urltext = driver.current_url\\n\","," \"httptext = urllib3.PoolManager()\\n\","," \"responsetext = httptext.request('GET', urltext)\\n\","," \"\\n\","," \"#close browser\\n\","," \"driver.close()\\n\","," \"\\n\","," \"#convert BeautifulSoup object to text\\n\","," \"souptext = BeautifulSoup(responsetext.data)\\n\","," \"souptext2 = str(souptext)\""," ]"," },"," {"," \"cell_type\": \"markdown\","," \"metadata\": {},"," \"source\": ["," \"### Parse and clean pulled raw text\""," ]"," },"," {"," \"cell_type\": \"code\","," \"execution_count\": 3,"," \"metadata\": {},"," \"outputs\": [],"," \"source\": ["," \"#create lists for pricing and dates\\n\","," \"settlePri = []\\n\","," \"settleDate = []\\n\","," \"\\n\","," \"#state the patterns to search on within raw data\\n\","," \"pricetxt2 = '\\\"priorSettle\\\":\\\"\\\\d+.\\\\d\\\\d\\\"'\\n\","," \"pricetxt3 = '\\\"priorSettle\\\":\\\"-\\\"'\\n\","," \"\\n\","," \"#find the above patterns in the text and append them to their corresponding lists from above\\n\","," \"p = re.compile(\\\"(%s|%s)\\\" % (pricetxt2, pricetxt3)).findall(souptext2) \\n\","," \"d = re.findall('\\\"expirationDate\\\":\\\"\\\\d{8}\\\"', souptext2)\\n\","," \"if p:\\n\","," \" settlePri.append(p)\\n\","," \"if d:\\n\","," \" settleDate.append(d)\\n\","," \"\\n\","," \"#combine those two lists\\n\","," \"pricelist = {'Date':d,'Oil_Price':p}\\n\","," \"\\n\","," \"#convert to dataframe\\n\","," \"pricelist = pd.DataFrame(pricelist)\\n\","," \"\\n\","," \"#remove unnecessary text, format numbers and dates, remove non-numeric price place holders from web site\\n\","," \"pricelist['Date'] = pricelist['Date'].map(lambda x: x.replace('\\\"expirationDate\\\":\\\"', \\\"\\\"))\\n\","," \"pricelist['Date'] = [datetime(year=int(x[0:4]), month=int(x[4:6]), day=int(x[6:8])) for x in pricelist['Date']]\\n\","," \"pricelist['Month'] = pricelist['Date'].dt.month.astype(int)\\n\","," \"pricelist['Year'] = pricelist['Date'].dt.year.astype(int)\\n\","," \"pricelist['Oil_Price'] = pricelist['Oil_Price'].map(lambda x: x.replace('\\\"priorSettle\\\":\\\"', \\\"\\\").rstrip('\\\"'))\\n\","," \"pricelist['Oil_Price'] = pricelist['Oil_Price'].replace('-', '0.00')\\n\","," \"pricelist['Oil_Price'] = pricelist['Oil_Price'].astype(float)\\n\","," \"\\n\","," \"#final dataframe\\n\","," \"pricelist = pricelist[['Year', 'Month', 'Oil_Price']]\""," ]"," },"," {"," \"cell_type\": \"code\","," \"execution_count\": 4,"," \"metadata\": {},"," \"outputs\": ["," {"," \"data\": {"," \"text/html\": ["," \"
\\n\","," \" | Year | \\n\","," \"Month | \\n\","," \"Oil_Price | \\n\","," \"
---|---|---|---|
0 | \\n\","," \"2019 | \\n\","," \"8 | \\n\","," \"60.43 | \\n\","," \"
1 | \\n\","," \"2019 | \\n\","," \"9 | \\n\","," \"60.52 | \\n\","," \"
2 | \\n\","," \"2019 | \\n\","," \"10 | \\n\","," \"60.46 | \\n\","," \"
3 | \\n\","," \"2019 | \\n\","," \"11 | \\n\","," \"60.34 | \\n\","," \"
... | \\n\","," \"... | \\n\","," \"... | \\n\","," \"... | \\n\","," \"
123 | \\n\","," \"2029 | \\n\","," \"11 | \\n\","," \"55.62 | \\n\","," \"
124 | \\n\","," \"2029 | \\n\","," \"12 | \\n\","," \"55.62 | \\n\","," \"
125 | \\n\","," \"2030 | \\n\","," \"1 | \\n\","," \"55.62 | \\n\","," \"
126 | \\n\","," \"2030 | \\n\","," \"2 | \\n\","," \"55.62 | \\n\","," \"
127 rows × 3 columns
\\n\","," \"\\n\","," \" | Year | \\n\","," \"Oil_Price | \\n\","," \"
---|---|---|
0 | \\n\","," \"2019 | \\n\","," \"60.378000 | \\n\","," \"
1 | \\n\","," \"2020 | \\n\","," \"58.386667 | \\n\","," \"
2 | \\n\","," \"2021 | \\n\","," \"56.021667 | \\n\","," \"
3 | \\n\","," \"2022 | \\n\","," \"55.008333 | \\n\","," \"
... | \\n\","," \"... | \\n\","," \"... | \\n\","," \"
8 | \\n\","," \"2027 | \\n\","," \"55.553333 | \\n\","," \"
9 | \\n\","," \"2028 | \\n\","," \"55.566667 | \\n\","," \"
10 | \\n\","," \"2029 | \\n\","," \"55.620000 | \\n\","," \"
11 | \\n\","," \"2030 | \\n\","," \"55.620000 | \\n\","," \"
12 rows × 2 columns
\\n\","," \"\\n\","," \" | Year | \\n\","," \"Month | \\n\","," \"Oil_Price | \\n\","," \"
---|---|---|---|
0 | \\n\","," \"1986 | \\n\","," \"1 | \\n\","," \"22.93 | \\n\","," \"
1 | \\n\","," \"1986 | \\n\","," \"2 | \\n\","," \"15.46 | \\n\","," \"
2 | \\n\","," \"1986 | \\n\","," \"3 | \\n\","," \"12.61 | \\n\","," \"
3 | \\n\","," \"1986 | \\n\","," \"4 | \\n\","," \"12.84 | \\n\","," \"
... | \\n\","," \"... | \\n\","," \"... | \\n\","," \"... | \\n\","," \"
398 | \\n\","," \"2019 | \\n\","," \"3 | \\n\","," \"58.15 | \\n\","," \"
399 | \\n\","," \"2019 | \\n\","," \"4 | \\n\","," \"63.86 | \\n\","," \"
400 | \\n\","," \"2019 | \\n\","," \"5 | \\n\","," \"60.83 | \\n\","," \"
401 | \\n\","," \"2019 | \\n\","," \"6 | \\n\","," \"54.66 | \\n\","," \"
402 rows × 3 columns
\\n\","," \"\\n\","," \" | Year | \\n\","," \"Oil_Price | \\n\","," \"
---|---|---|
0 | \\n\","," \"1986 | \\n\","," \"15.036667 | \\n\","," \"
1 | \\n\","," \"1987 | \\n\","," \"19.171667 | \\n\","," \"
2 | \\n\","," \"1988 | \\n\","," \"15.982500 | \\n\","," \"
3 | \\n\","," \"1989 | \\n\","," \"19.640833 | \\n\","," \"
... | \\n\","," \"... | \\n\","," \"... | \\n\","," \"
30 | \\n\","," \"2016 | \\n\","," \"43.144167 | \\n\","," \"
31 | \\n\","," \"2017 | \\n\","," \"50.884167 | \\n\","," \"
32 | \\n\","," \"2018 | \\n\","," \"64.938333 | \\n\","," \"
33 | \\n\","," \"2019 | \\n\","," \"57.305000 | \\n\","," \"
34 rows × 2 columns
\\n\","," \"