In [2]:
!pip install yfinance
!pip install bs4
!pip install nbformat
!pip install --upgrade plotly

Collecting bs4
  Downloading bs4-0.0.2-py2.py3-none-any.whl.metadata (411 bytes)
Downloading bs4-0.0.2-py2.py3-none-any.whl (1.2 kB)
Installing collected packages: bs4
Successfully installed bs4-0.0.2
Collecting plotly
  Downloading plotly-6.0.1-py3-none-any.whl.metadata (6.7 kB)
Collecting narwhals>=1.15.1 (from plotly)
  Downloading narwhals-1.35.0-py3-none-any.whl.metadata (9.2 kB)
Downloading plotly-6.0.1-py3-none-any.whl (14.8 MB)
   ---------------------------------------- 0.0/14.8 MB ? eta -:--:--
   ---------------- ----------------------- 6.3/14.8 MB 32.2 MB/s eta 0:00:01
   -------------------------------------- - 14.4/14.8 MB 34.9 MB/s eta 0:00:01
   ---------------------------------------- 14.8/14.8 MB 32.1 MB/s eta 0:00:00
Downloading narwhals-1.35.0-py3-none-any.whl (325 kB)
Installing collected packages: narwhals, plotly
  Attempting uninstall: plotly
    Found existing installation: plotly 5.24.1
    Uninstalling plotly-5.24.1:
      Successfully uninstalled plotly-5.24

In [4]:
import yfinance as yf
import pandas as pd
import requests
from bs4 import BeautifulSoup
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [5]:
import plotly.io as pio
pio.renderers.default = "iframe"

In [6]:
import warnings
# Ignore all warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [7]:
def make_graph(stock_data, revenue_data, stock):
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True, subplot_titles=("Historical Share Price", "Historical Revenue"), vertical_spacing = .3)
    stock_data_specific = stock_data[stock_data.Date <= '2021-06-14']
    revenue_data_specific = revenue_data[revenue_data.Date <= '2021-04-30']
    fig.add_trace(go.Scatter(x=pd.to_datetime(stock_data_specific.Date, infer_datetime_format=True), y=stock_data_specific.Close.astype("float"), name="Share Price"), row=1, col=1)
    fig.add_trace(go.Scatter(x=pd.to_datetime(revenue_data_specific.Date, infer_datetime_format=True), y=revenue_data_specific.Revenue.astype("float"), name="Revenue"), row=2, col=1)
    fig.update_xaxes(title_text="Date", row=1, col=1)
    fig.update_xaxes(title_text="Date", row=2, col=1)
    fig.update_yaxes(title_text="Price ($US)", row=1, col=1)
    fig.update_yaxes(title_text="Revenue ($US Millions)", row=2, col=1)
    fig.update_layout(showlegend=False,
    height=900,
    title=stock,
    xaxis_rangeslider_visible=True)
    fig.show()
    from IPython.display import display, HTML
    fig_html = fig.to_html()
    display(HTML(fig_html))

In [8]:
import yfinance as yf

# Create a ticker object for Tesla
tesla = yf.Ticker("TSLA")

In [9]:
# Get historical market data
tesla_data = tesla.history(period="max")

# Display the first few rows
print(tesla_data.head())

                               Open      High       Low     Close     Volume  \
Date                                                                           
2010-06-29 00:00:00-04:00  1.266667  1.666667  1.169333  1.592667  281494500   
2010-06-30 00:00:00-04:00  1.719333  2.028000  1.553333  1.588667  257806500   
2010-07-01 00:00:00-04:00  1.666667  1.728000  1.351333  1.464000  123282000   
2010-07-02 00:00:00-04:00  1.533333  1.540000  1.247333  1.280000   77097000   
2010-07-06 00:00:00-04:00  1.333333  1.333333  1.055333  1.074000  103003500   

                           Dividends  Stock Splits  
Date                                                
2010-06-29 00:00:00-04:00        0.0           0.0  
2010-06-30 00:00:00-04:00        0.0           0.0  
2010-07-01 00:00:00-04:00        0.0           0.0  
2010-07-02 00:00:00-04:00        0.0           0.0  
2010-07-06 00:00:00-04:00        0.0           0.0  


In [17]:
# Reset the index of the DataFrame so 'Date' becomes a column
tesla_data.reset_index(inplace=True)

# Display the first five rows
print(tesla_data.head())

                       Date      Open      High       Low     Close  \
0 2010-06-29 00:00:00-04:00  1.266667  1.666667  1.169333  1.592667   
1 2010-06-30 00:00:00-04:00  1.719333  2.028000  1.553333  1.588667   
2 2010-07-01 00:00:00-04:00  1.666667  1.728000  1.351333  1.464000   
3 2010-07-02 00:00:00-04:00  1.533333  1.540000  1.247333  1.280000   
4 2010-07-06 00:00:00-04:00  1.333333  1.333333  1.055333  1.074000   

      Volume  Dividends  Stock Splits  
0  281494500        0.0           0.0  
1  257806500        0.0           0.0  
2  123282000        0.0           0.0  
3   77097000        0.0           0.0  
4  103003500        0.0           0.0  


In [19]:
import requests

# Define the URL
url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/revenue.htm"

# Make a GET request to the URL
response = requests.get(url)

# Save the response text to a variable
html_data = response.text

In [21]:
from bs4 import BeautifulSoup

# Parse the HTML data using BeautifulSoup
soup = BeautifulSoup(html_data, "html.parser")

In [23]:
import pandas as pd

# Extract all tables from the HTML using pandas
tables = pd.read_html(html_data)

# Find the table that contains "Tesla Revenue"
for table in tables:
    if "Revenue" in table.columns:
        tesla_revenue = table
        break

# Display the first few rows
print(tesla_revenue.head())


                                              Sector  \
0                                  Auto/Tires/Trucks   
1  Tesla is the market leader in battery-powered ...   

                                            Industry  \
0                      Auto Manufacturers - Domestic   
1  Tesla is the market leader in battery-powered ...   

                                          Market Cap  \
0                                          $549.575B   
1  Tesla is the market leader in battery-powered ...   

                                             Revenue  
0                                           $53.823B  
1  Tesla is the market leader in battery-powered ...  


In [23]:
import pandas as pd

# Extract all tables from the HTML using pandas
tables = pd.read_html(html_data)

# Find the table that contains "Tesla Revenue"
for table in tables:
    if "Revenue" in table.columns:
        tesla_revenue = table
        break

# Display the first few rows
print(tesla_revenue.head())


                                              Sector  \
0                                  Auto/Tires/Trucks   
1  Tesla is the market leader in battery-powered ...   

                                            Industry  \
0                      Auto Manufacturers - Domestic   
1  Tesla is the market leader in battery-powered ...   

                                          Market Cap  \
0                                          $549.575B   
1  Tesla is the market leader in battery-powered ...   

                                             Revenue  
0                                           $53.823B  
1  Tesla is the market leader in battery-powered ...  


In [25]:
print(tesla_revenue.tail())


                                              Sector  \
0                                  Auto/Tires/Trucks   
1  Tesla is the market leader in battery-powered ...   

                                            Industry  \
0                      Auto Manufacturers - Domestic   
1  Tesla is the market leader in battery-powered ...   

                                          Market Cap  \
0                                          $549.575B   
1  Tesla is the market leader in battery-powered ...   

                                             Revenue  
0                                           $53.823B  
1  Tesla is the market leader in battery-powered ...  


In [27]:
import yfinance as yf

# Create the Ticker object for GameStop
gme = yf.Ticker("GME")


In [29]:
# Extract historical stock data for GameStop (GME)
gme_data = gme.history(period="max")

# Display the first few rows of the data
print(gme_data.head())


                               Open      High       Low     Close    Volume  \
Date                                                                          
2002-02-13 00:00:00-05:00  1.620129  1.693350  1.603296  1.691667  76216000   
2002-02-14 00:00:00-05:00  1.712707  1.716073  1.670626  1.683250  11021600   
2002-02-15 00:00:00-05:00  1.683250  1.687458  1.658001  1.674834   8389600   
2002-02-19 00:00:00-05:00  1.666418  1.666418  1.578047  1.607504   7410400   
2002-02-20 00:00:00-05:00  1.615920  1.662210  1.603296  1.662210   6892800   

                           Dividends  Stock Splits  
Date                                                
2002-02-13 00:00:00-05:00        0.0           0.0  
2002-02-14 00:00:00-05:00        0.0           0.0  
2002-02-15 00:00:00-05:00        0.0           0.0  
2002-02-19 00:00:00-05:00        0.0           0.0  
2002-02-20 00:00:00-05:00        0.0           0.0  


In [31]:
# Reset the index of the gme_data DataFrame
gme_data.reset_index(inplace=True)

# Display the first five rows of the DataFrame
print(gme_data.head())


                       Date      Open      High       Low     Close    Volume  \
0 2002-02-13 00:00:00-05:00  1.620129  1.693350  1.603296  1.691667  76216000   
1 2002-02-14 00:00:00-05:00  1.712707  1.716073  1.670626  1.683250  11021600   
2 2002-02-15 00:00:00-05:00  1.683250  1.687458  1.658001  1.674834   8389600   
3 2002-02-19 00:00:00-05:00  1.666418  1.666418  1.578047  1.607504   7410400   
4 2002-02-20 00:00:00-05:00  1.615920  1.662210  1.603296  1.662210   6892800   

   Dividends  Stock Splits  
0        0.0           0.0  
1        0.0           0.0  
2        0.0           0.0  
3        0.0           0.0  
4        0.0           0.0  


In [33]:
import requests

# URL of the webpage
url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/stock.html"

# Use requests to get the webpage content
response = requests.get(url)

# Save the text of the response as html_data_2
html_data_2 = response.text

# Print the first 500 characters to verify it's working
print(html_data_2[:500])


<!DOCTYPE html>
<!-- saved from url=(0105)https://web.archive.org/web/20200814131437/https://www.macrotrends.net/stocks/charts/GME/gamestop/revenue -->
<html class=" js flexbox canvas canvastext webgl no-touch geolocation postmessage websqldatabase indexeddb hashchange history draganddrop websockets rgba hsla multiplebgs backgroundsize borderimage borderradius boxshadow textshadow opacity cssanimations csscolumns cssgradients cssreflections csstransforms csstransforms3d csstransitions fontface g


In [35]:
from bs4 import BeautifulSoup

# Parse the HTML data using BeautifulSoup with html.parser
soup = BeautifulSoup(html_data_2, 'html.parser')

# If you want to use html5lib, make sure it's installed:
# pip install html5lib
# soup = BeautifulSoup(html_data_2, 'html5lib')

# Print the parsed HTML to check
print(soup.prettify()[:500])  # Prints the first 500 characters to verify the content


<!DOCTYPE html>
<!-- saved from url=(0105)https://web.archive.org/web/20200814131437/https://www.macrotrends.net/stocks/charts/GME/gamestop/revenue -->
<html class="js flexbox canvas canvastext webgl no-touch geolocation postmessage websqldatabase indexeddb hashchange history draganddrop websockets rgba hsla multiplebgs backgroundsize borderimage borderradius boxshadow textshadow opacity cssanimations csscolumns cssgradients cssreflections csstransforms csstransforms3d csstransitions fontface ge


In [41]:
import pandas as pd
from bs4 import BeautifulSoup

# Parse the HTML data using BeautifulSoup with html.parser
soup = BeautifulSoup(html_data_2, 'html.parser')

# Find the table that contains the "GameStop Revenue" information
# Let's assume the table is the first table in the HTML content.
table = soup.find('table')

# Use pandas read_html to extract the table directly into a DataFrame
gme_revenue = pd.read_html(str(table))[0]

# Print the column names to inspect the structure
print("Columns in the dataframe:", gme_revenue.columns)

# Clean the 'Revenue' column: remove dollar signs and commas
gme_revenue['Revenue'] = gme_revenue['Revenue'].replace({'\$': '', ',': ''}, regex=True).astype(float)

# Display the first few rows of the gme_revenue dataframe
print(gme_revenue.head())


Columns in the dataframe: Index(['GameStop Annual Revenue (Millions of US $)', 'GameStop Annual Revenue (Millions of US $).1'], dtype='object')



invalid escape sequence '\$'


invalid escape sequence '\$'


invalid escape sequence '\$'


invalid escape sequence '\$'



KeyError: 'Revenue'

In [43]:
# Display the last five rows of the gme_revenue dataframe
print(gme_revenue.tail())


    GameStop Annual Revenue (Millions of US $)  \
11                                        2009   
12                                        2008   
13                                        2007   
14                                        2006   
15                                        2005   

   GameStop Annual Revenue (Millions of US $).1  
11                                       $8,806  
12                                       $7,094  
13                                       $5,319  
14                                       $3,092  
15                                       $1,843  


In [49]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Assuming tesla_data contains the stock data, which includes a 'Date' column and a 'Close' column

def make_graph(stock_data, stock):
    fig = make_subplots(rows=1, cols=1, subplot_titles=("Historical Share Price"), vertical_spacing=0.3)

    # Filter data to show only up to June 2021
    stock_data_specific = stock_data[stock_data['Date'] <= '2021-06-14']

    # Add trace for stock prices
    fig.add_trace(go.Scatter(x=pd.to_datetime(stock_data_specific['Date'], infer_datetime_format=True), 
                             y=stock_data_specific['Close'].astype("float"), 
                             name="Share Price"))

    # Update the axis and layout
    fig.update_xaxes(title_text="Date")
    fig.update_yaxes(title_text="Price ($US)")
    fig.update_layout(
        showlegend=False,
        height=600,
        title=f"{stock} Stock Price History",
        xaxis_rangeslider_visible=True
    )

    # Display the graph
    fig.show()

# Call the make_graph function for Tesla data
make_graph(tesla_data, "Tesla")

    



The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.



In [51]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Assuming gme_data contains the stock data with 'Date' and 'Close' columns

def make_graph(stock_data, revenue_data, stock):
    fig = make_subplots(rows=1, cols=1, subplot_titles=("Historical Share Price"), vertical_spacing=0.3)

    # Filter data to show only up to June 2021
    stock_data_specific = stock_data[stock_data['Date'] <= '2021-06-14']

    # Add trace for stock prices
    fig.add_trace(go.Scatter(x=pd.to_datetime(stock_data_specific['Date'], infer_datetime_format=True),
                             y=stock_data_specific['Close'].astype("float"),
                             name="Share Price"))

    # Update the axis and layout
    fig.update_xaxes(title_text="Date")
    fig.update_yaxes(title_text="Price ($US)")
    fig.update_layout(
        showlegend=False,
        height=600,
        title=f"{stock} Stock Price History",
        xaxis_rangeslider_visible=True
    )

    # Display the graph
    fig.show()

# Call the make_graph function for GameStop data
make_graph(gme_data, gme_revenue, 'GameStop')



The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.

