## Extracting Stock Data By Web Scraping



###  Netlfix's Stock Data

In [17]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from datetime import datetime

In [18]:
url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/netflix_data_webpage.html"

data  = requests.get(url).text

In [19]:
#Parsing the text using BeautifulSoup
soup = BeautifulSoup(data, 'html5lib')


In [20]:
#Converting the html table into a pandas dataframe
netflix_data = pd.DataFrame(columns=["Date", "Open", "High", "Low", "Close", "Volume"])

#Using for loop to extract information from the table
for row in soup.find("tbody").find_all('tr'):
    col = row.find_all("td")
    date = col[0].text
    Open = col[1].text
    high = col[2].text
    low = col[3].text
    close = col[4].text
    adj_close = col[5].text
    volume = col[6].text
    netflix_data = netflix_data.append({"Date":date, "Open":Open, "High":high, "Low":low, "Close":close, "Adj Close":adj_close, "Volume":volume}, ignore_index=True)    

In [21]:
#Checking if the dataframe has been extracted successfully
netflix_data.head()


Unnamed: 0,Date,Open,High,Low,Close,Volume,Adj Close
0,"Jun 01, 2021",504.01,536.13,482.14,528.21,78560600,528.21
1,"May 01, 2021",512.65,518.95,478.54,502.81,66927600,502.81
2,"Apr 01, 2021",529.93,563.56,499.0,513.47,111573300,513.47
3,"Mar 01, 2021",545.57,556.99,492.85,521.66,90183900,521.66
4,"Feb 01, 2021",536.79,566.65,518.28,538.85,61902300,538.85


###  Amazon's Stock Data

In [22]:
url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/amazon_data_webpage.html"
html_data = requests.get(url).text

Parse the html data using `beautiful_soup`.


In [23]:
soup = BeautifulSoup(html_data, 'html5lib')

In [24]:
soup.find('title')

<title>Amazon.com, Inc. (AMZN) Stock Historical Prices &amp; Data - Yahoo Finance</title>

In [25]:
#By using the same method we used previously, we extract the information into a pandas dataframe
amazon_data = pd.DataFrame(columns=["Date", "Open", "High", "Low", "Close","Adj Close", "Volume"])

for row in soup.find("tbody").find_all("tr"):
    col = row.find_all("td")
    date = col[0].text
    Open = col[1].text
    high = col[2].text
    low = col[3].text
    close = col[4].text
    adj_close = col[5].text
    volume = col[6].text
    
    amazon_data = amazon_data.append({"Date":date, "Open":Open, "High":high, "Low":low, "Close":close, "Adj Close":adj_close, "Volume":volume}, ignore_index=True)

In [26]:
amazon_data

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,"Jan 01, 2021",3270.00,3363.89,3086.00,3206.20,3206.20,71528900
1,"Dec 01, 2020",3188.50,3350.65,3072.82,3256.93,3256.93,77556200
2,"Nov 01, 2020",3061.74,3366.80,2950.12,3168.04,3168.04,90810500
3,"Oct 01, 2020",3208.00,3496.24,3019.00,3036.15,3036.15,116226100
4,"Sep 01, 2020",3489.58,3552.25,2871.00,3148.73,3148.73,115899300
...,...,...,...,...,...,...,...
56,"May 01, 2016",663.92,724.23,656.00,722.79,722.79,90614500
57,"Apr 01, 2016",590.49,669.98,585.25,659.59,659.59,78464200
58,"Mar 01, 2016",556.29,603.24,538.58,593.64,593.64,94009500
59,"Feb 01, 2016",578.15,581.80,474.00,552.52,552.52,124144800


In [27]:
amazon_data.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')

In [28]:
amazon_data.loc[60,'Open']

'656.29'

## Stock Data Visualised


In [29]:
#Creating a candlestick chart to visualise the stock information for both the companies
import plotly.graph_objects as go

In [30]:
fig = go.Figure(data=[go.Candlestick(x=netflix_data['Date'],
                open=netflix_data['Open'],
                high=netflix_data['High'],
                low=netflix_data['Low'],
                close=netflix_data['Close'])])
                
fig.update_layout(xaxis_rangeslider_visible=False)
fig.show()


In [33]:
#We create another one for Amazon's data

fig1 = go.Figure(data=[go.Candlestick(x=amazon_data['Date'],
                open=amazon_data['Open'],
                high=amazon_data['High'],
                low=amazon_data['Low'],
                close=amazon_data['Close'])])

fig1.update_layout(xaxis_rangeslider_visible=False)
fig1.show()