# **Analyzing Historical Stock/Revenue Data and Building a Dashboard by Sudarshan Gnanavendan**

# **Question 1: Extracting Tesla Stock Data Using yfinance**

In [None]:
# Install necessary libraries
!pip install yfinance --quiet

# Import libraries
import yfinance as yf
import pandas as pd

# Extract Tesla stock data
tesla = yf.Ticker("TSLA")
tesla_stock = tesla.history(period="max")
tesla_stock.reset_index(inplace=True)

# Display the last few rows
print("Tesla Stock Data:")
display(tesla_stock.head())


Tesla Stock Data:


Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2010-06-29 00:00:00-04:00,1.266667,1.666667,1.169333,1.592667,281494500,0.0,0.0
1,2010-06-30 00:00:00-04:00,1.719333,2.028,1.553333,1.588667,257806500,0.0,0.0
2,2010-07-01 00:00:00-04:00,1.666667,1.728,1.351333,1.464,123282000,0.0,0.0
3,2010-07-02 00:00:00-04:00,1.533333,1.54,1.247333,1.28,77097000,0.0,0.0
4,2010-07-06 00:00:00-04:00,1.333333,1.333333,1.055333,1.074,103003500,0.0,0.0


# **Question 2: Extracting Tesla Revenue Data Using Webscraping**

In [None]:
# Install required packages
!pip install requests beautifulsoup4 lxml --quiet

import requests
from bs4 import BeautifulSoup
import pandas as pd

# Wikipedia URL for Tesla
url = "https://en.wikipedia.org/wiki/Tesla,_Inc."

# Get page content
response = requests.get(url)
soup = BeautifulSoup(response.content, "lxml")

# Find all tables
tables = soup.find_all("table", {"class": "wikitable"})

# Loop through tables to find the one with revenue
for table in tables:
    if "Revenue" in str(table):
        target_table = table
        break

# Read into pandas
tesla_revenue = pd.read_html(str(target_table))[0]

# Display last 5 rows
tesla_revenue.tail()

  tesla_revenue = pd.read_html(str(target_table))[0]


Unnamed: 0,Year,Revenue (US$ m),Net income (US$ m),Total assets (US$ m),Employees,Sources
15,2020,31536,721,52148,70757.0,[564]
16,2021,53823,5519,62131,99290.0,[564]
17,2022,81462,12556,82338,127855.0,[564]
18,2023,96773,14997,106618,140473.0,[564]
19,2024,97690,7091,122070,125665.0,[564]


# **Question 3: Extracting GameStop Stock Data Using yfinance**

In [None]:
# Extract GameStop stock data
gme = yf.Ticker("GME")
gme_stock = gme.history(period="max")
gme_stock.reset_index(inplace=True)

print("GameStop Stock Data:")
display(gme_stock.head())


GameStop Stock Data:


Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2002-02-13 00:00:00-05:00,1.620129,1.69335,1.603296,1.691667,76216000,0.0,0.0
1,2002-02-14 00:00:00-05:00,1.712707,1.716073,1.670626,1.68325,11021600,0.0,0.0
2,2002-02-15 00:00:00-05:00,1.683251,1.687459,1.658002,1.674834,8389600,0.0,0.0
3,2002-02-19 00:00:00-05:00,1.666417,1.666417,1.578047,1.607504,7410400,0.0,0.0
4,2002-02-20 00:00:00-05:00,1.61592,1.66221,1.603296,1.66221,6892800,0.0,0.0


# **Question 4: Extracting GameStop Revenue Data Using Webscraping**

In [None]:
# Install required libraries
!pip install requests beautifulsoup4 lxml --quiet

import requests
from bs4 import BeautifulSoup
import pandas as pd

# Define the URL
url = "https://www.stockanalysis.com/stocks/gme/revenue/"

# Use headers to simulate a real browser
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}

# Make the HTTP request
response = requests.get(url, headers=headers)

# Parse the HTML content
soup = BeautifulSoup(response.text, "lxml")

# Find the revenue table (should be the first table on the page)
table = soup.find("table")

# Extract column headers
columns = [th.text.strip() for th in table.find_all("th")]

# Extract data rows
data = []
for row in table.find_all("tr")[1:]:
    cells = row.find_all("td")
    row_data = [cell.text.strip() for cell in cells]
    if row_data:
        data.append(row_data)

# Create DataFrame
gamestop_revenue = pd.DataFrame(data, columns=columns)

# Display the last five rows
print("Last 5 rows of GameStop revenue data:")
print(gamestop_revenue.tail())


Last 5 rows of GameStop revenue data:
  Fiscal Year End Revenue Change Growth
5     Feb 1, 2020     Pro    Pro    Pro
6     Feb 2, 2019     Pro    Pro    Pro
7     Feb 3, 2018     Pro    Pro    Pro
8    Jan 28, 2017     Pro    Pro    Pro
9    Jan 30, 2016     Pro    Pro    Pro


# **Question 5: Tesla Stock and Revenue Dashboard**

In [None]:
import yfinance as yf
import pandas as pd
import requests
from bs4 import BeautifulSoup
import plotly.graph_objs as go

# Fetch Tesla stock data using yfinance
tesla = yf.Ticker('TSLA')
tesla_data = tesla.history(period="max")
tesla_data.reset_index(inplace=True)

# Filter for required columns
tesla_stock = tesla_data[["Date", "Close"]]
tesla_stock["Date"] = pd.to_datetime(tesla_stock["Date"])

# Wikipedia page for Tesla
url = "https://en.wikipedia.org/wiki/Tesla,_Inc."

# Get the page content and parse
response = requests.get(url)
soup = BeautifulSoup(response.content, "lxml")

# Find all wikitable tables
tables = soup.find_all("table", {"class": "wikitable"})

# Find the first table containing "Revenue"
target_table = None
for table in tables:
    if "Revenue" in str(table):
        target_table = table
        break

# Convert table to DataFrame
tesla_revenue = pd.read_html(str(target_table))[0]

# Display and clean data
tesla_revenue.columns = tesla_revenue.columns.droplevel(0) if isinstance(tesla_revenue.columns, pd.MultiIndex) else tesla_revenue.columns
tesla_revenue.rename(columns={tesla_revenue.columns[0]: "Year", tesla_revenue.columns[1]: "Revenue"}, inplace=True)

# Clean and convert revenue
tesla_revenue = tesla_revenue[["Year", "Revenue"]]
tesla_revenue = tesla_revenue.dropna()
tesla_revenue["Year"] = tesla_revenue["Year"].astype(str).str[:4]  # keep only year digits
tesla_revenue["Date"] = pd.to_datetime(tesla_revenue["Year"] + "-12-31")
tesla_revenue["Revenue"] = tesla_revenue["Revenue"].replace({'\$': '', ',': '', ' billion': ''}, regex=True).astype(float) * 1000  # Convert billions to millions

# Final format
tesla_revenue = tesla_revenue[["Date", "Revenue"]].sort_values("Date")
tesla_revenue.tail()

def plot_stock_and_revenue(stock_df, revenue_df, title):
    fig = go.Figure()

    # Plot Tesla stock price
    fig.add_trace(go.Scatter(
        x=stock_df["Date"], y=stock_df["Close"],
        mode="lines", name="Stock Price", line=dict(color="cyan")
    ))

    # Plot Tesla revenue
    fig.add_trace(go.Scatter(
        x=revenue_df["Date"], y=revenue_df["Revenue"],
        mode="lines+markers", name="Revenue", line=dict(color="orange")
    ))

    fig.update_layout(
        title=title,
        xaxis_title="Date",
        yaxis_title="USD",
        template="plotly_dark"
    )

    fig.show()

# Call the function
plot_stock_and_revenue(tesla_stock, tesla_revenue, "Tesla Stock Price and Revenue")




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.



# **Question 6: GameStop Stock and Revenue Dashboard**

In [None]:
# Install Required Libraries
!pip install yfinance plotly requests beautifulsoup4 lxml pandas --quiet

# Import Libraries
import yfinance as yf
import pandas as pd
import requests
from bs4 import BeautifulSoup
import plotly.graph_objs as go

# Step 1: Download GameStop Stock Data
gme = yf.Ticker('GME')
gme_data = gme.history(period="max").reset_index()
gme_stock = gme_data[["Date", "Close"]]
gme_stock["Date"] = pd.to_datetime(gme_stock["Date"])

# Step 2: Scrape GameStop Revenue from StockAnalysis.com
url = "https://www.stockanalysis.com/stocks/gme/revenue/"
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "lxml")

# Find revenue table
table = soup.find("table")
columns = [th.text.strip() for th in table.find_all("th")]

# Extract table rows
data = []
for row in table.find_all("tr")[1:]:
    cells = row.find_all("td")
    row_data = [cell.text.strip() for cell in cells]
    if row_data:
        data.append(row_data)

# Create DataFrame
gamestop_revenue = pd.DataFrame(data, columns=columns)
gamestop_revenue = gamestop_revenue.rename(columns={gamestop_revenue.columns[0]: "Date", gamestop_revenue.columns[1]: "Revenue"})

# Clean revenue values
gamestop_revenue["Date"] = pd.to_datetime(gamestop_revenue["Date"])

def convert_revenue(value):
    try:
        value = value.replace("$", "").replace(",", "").strip()
        if value in ["", "—", "N/A", "-", None]:
            return None
        if value.endswith("B"):
            return float(value[:-1]) * 1000  # billions to millions
        elif value.endswith("M"):
            return float(value[:-1])         # millions
        else:
            return float(value)
    except Exception as e:
        print(f"Skipping invalid value: {value} — {e}")
        return None

gamestop_revenue["Revenue"] = gamestop_revenue["Revenue"].apply(convert_revenue)
gamestop_revenue = gamestop_revenue.dropna(subset=["Revenue"])
gamestop_revenue = gamestop_revenue[["Date", "Revenue"]].sort_values("Date")

# Step 3: Plot Dashboard
def plot_stock_and_revenue(stock_df, revenue_df, title):
    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=stock_df["Date"], y=stock_df["Close"],
        mode="lines", name="Stock Price", line=dict(color="lime")
    ))

    fig.add_trace(go.Scatter(
        x=revenue_df["Date"], y=revenue_df["Revenue"],
        mode="lines+markers", name="Revenue", line=dict(color="orange")
    ))

    fig.update_layout(
        title=title,
        xaxis_title="Date",
        yaxis_title="USD (Millions)",
        template="plotly_dark"
    )

    fig.show()

# Display Dashboard
plot_stock_and_revenue(gme_stock, gamestop_revenue, "GameStop Stock Price and Revenue (StockAnalysis Source)")




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Skipping invalid value: Pro — could not convert string to float: 'Pro'
Skipping invalid value: Pro — could not convert string to float: 'Pro'
Skipping invalid value: Pro — could not convert string to float: 'Pro'
Skipping invalid value: Pro — could not convert string to float: 'Pro'
Skipping invalid value: Pro — could not convert string to float: 'Pro'
