In [8]:
#Project Target: At the automobile sector, Comparison of Revenues of two pioneer company which are Tesla and Ford Motors. 
#Required Data for satisfy project target: Revenues, stock prices
#Desired Outputs: Plots of two figures which are Revenue vs Date and Stock price vs Date for each company.

from bs4 import BeautifulSoup
import yfinance as yf
import pandas as pd
import requests
import warnings
#Future warnings are suppressed
warnings.simplefilter(action='ignore', category=FutureWarning)

#1) DATA COLLECTION PHASE

#1.A)Stock data of Tesla and Ford Motors are extracted from Yahoo Finance with yfinance function 

#Data Extraction of Tesla
data=yf.Ticker("TSLA")
tesla_data=data.history(period="max")
tesla_data.reset_index(inplace=True)


#Data Extraction of Ford Motors
data=yf.Ticker("F")
ford_data=data.history(period="max")
ford_data.reset_index(inplace=True)


#1.B)Stock data of Tesla and Ford Motors are extracted with webscrapting

#Revenue data of Tesla is extracted with using Webscrabting.
url1 = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/revenue.htm"
html_data  = requests.get(url1).text
soup = BeautifulSoup(html_data, 'html5lib')
tesla_revenue = pd.DataFrame(columns=["Date", "Revenue"])

# First we isolate the body of the table which contains all the information
# Then we loop through each row and find all the column values for each row
for row in soup.find("tbody").find_all('tr'):
    col = row.find_all("td")
    date = col[0].text
    revenue= col[1].text
    
    # Finally we append the data of each row to the table
    tesla_revenue = tesla_revenue.append({"Date":date, "Revenue":revenue}, ignore_index=True)    

#Revenue data of Ford motors is extracted with using Webscrabting.
url1 = 'https://www.macrotrends.net/stocks/charts/F/ford-motor/stock-price-history#:~:text=The%20Ford%20Motor%2052%2Dweek,last%2052%20weeks%20is%2013.57.'

html_data  = requests.get(url1).text
soup = BeautifulSoup(html_data, 'html5lib')
ford_revenue = pd.DataFrame(columns=["Date", "Revenue"])

# First we isolate the body of the table which contains all the information
# Then we loop through each row and find all the column values for each row
for row in soup.find("tbody").find_all('tr'):
    col = row.find_all("td")
    date = col[0].text
    revenue= col[1].text
    
    # Finally we append the data of each row to the table
    ford_revenue = ford_revenue.append({"Date":date, "Revenue":revenue}, ignore_index=True)    

In [10]:
#2) DATA PREPERATION PHASE

#Future warnings are suppressed
warnings.simplefilter(action='ignore', category=FutureWarning)

display("Tesla Stock Prices")
display(tesla_data.head())
display("Ford Motors Stock Prices")
display(ford_data.head())

display("Tesla Revenue")
display(tesla_revenue.head())
display("Ford Motors Revenue")
display(ford_revenue.head())


#Values at Revenue column have dollar signs and commas instead of dots. 
#Data need to be manipulated for numerical calculations such as finding mean value of Revenue.
#For this reason dolar sign ("$") is neglected and commas (",") are replaced with dot (".")


tesla_revenue["Revenue"] = tesla_revenue['Revenue'].str.replace('|\$',"")
tesla_revenue["Revenue"] = tesla_revenue['Revenue'].str.replace(',',".")
tesla_revenue.dropna(inplace=True)
tesla_revenue = tesla_revenue[tesla_revenue['Revenue'] != ""] 

display("Manipulated Data")


display("Tesla Revenue")
display(tesla_revenue.head())

'Tesla Stock Prices'

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2010-06-29 00:00:00-04:00,1.266667,1.666667,1.169333,1.592667,281494500,0.0,0.0
1,2010-06-30 00:00:00-04:00,1.719333,2.028,1.553333,1.588667,257806500,0.0,0.0
2,2010-07-01 00:00:00-04:00,1.666667,1.728,1.351333,1.464,123282000,0.0,0.0
3,2010-07-02 00:00:00-04:00,1.533333,1.54,1.247333,1.28,77097000,0.0,0.0
4,2010-07-06 00:00:00-04:00,1.333333,1.333333,1.055333,1.074,103003500,0.0,0.0


'Ford Motors Stock Prices'

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,1972-06-01 00:00:00-04:00,0.0,0.253818,0.250977,0.25145,1091238,0.0,0.0
1,1972-06-02 00:00:00-04:00,0.25145,0.253818,0.25003,0.250977,1174468,0.0,0.0
2,1972-06-05 00:00:00-04:00,0.250977,0.253344,0.25003,0.250977,5209582,0.0,0.0
3,1972-06-06 00:00:00-04:00,0.250977,0.251924,0.247188,0.248135,1424158,0.0,0.0
4,1972-06-07 00:00:00-04:00,0.248135,0.249556,0.246715,0.246715,675088,0.0,0.0


'Tesla Revenue'

Unnamed: 0,Date,Revenue
0,2021,53.823
1,2020,31.536
2,2019,24.578
3,2018,21.461
4,2017,11.759


'Ford Motors Revenue'

Unnamed: 0,Date,Revenue
0,2023,12.3351
1,2022,13.7362
2,2021,12.9056
3,2020,6.379
4,2019,8.0179


'Manipulated Data'

'Tesla Revenue'

Unnamed: 0,Date,Revenue
0,2021,53.823
1,2020,31.536
2,2019,24.578
3,2018,21.461
4,2017,11.759


In [4]:
#3) DATA VISUALIZATION PHASE

#Visualization function is created
import plotly.graph_objects as go
from plotly.subplots import make_subplots
def make_graph(stock_data, revenue_data, stock):
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True, subplot_titles=("Historical Share Price", "Historical Revenue"), vertical_spacing = .3)
    stock_data_specific = stock_data[stock_data.Date <= '2021--06-14']
    revenue_data_specific = revenue_data[revenue_data.Date <= '2021-04-30']
    fig.add_trace(go.Scatter(x=pd.to_datetime(stock_data_specific.Date, infer_datetime_format=True), y=stock_data_specific.Close.astype("float"), name="Share Price"), row=1, col=1)
    fig.add_trace(go.Scatter(x=pd.to_datetime(revenue_data_specific.Date, infer_datetime_format=True), y=revenue_data_specific.Revenue.astype("float"), name="Revenue"), row=2, col=1)
    fig.update_xaxes(title_text="Date", row=1, col=1)
    fig.update_xaxes(title_text="Date", row=2, col=1)
    fig.update_yaxes(title_text="Price ($US)", row=1, col=1)
    fig.update_yaxes(title_text="Revenue ($US Millions)", row=2, col=1)
    fig.update_layout(showlegend=False,
    height=900,
    title=stock,
    xaxis_rangeslider_visible=True)
    fig.show()

#Desired outputs are created.
make_graph(tesla_data, tesla_revenue, 'Tesla')
make_graph(ford_data, ford_revenue, 'Ford Motors')