In [1]:
pip install yfinance pandas requests beautifulsoup4 plotly

Note: you may need to restart the kernel to use updated packages.


In [2]:
import yfinance as yf
import pandas as pd

# Function to get stock data
def get_stock_data(ticker_symbol):
    # Create a ticker object for the company (e.g., 'TSLA' or 'GME')
    ticker = yf.Ticker(ticker_symbol)

    # Get all available historical data (period="max")
    stock_data = ticker.history(period="max")

    # Reset the index so 'Date' becomes a regular column
    stock_data.reset_index(inplace=True)

    # Convert the 'Date' column to a proper date format
    stock_data['Date'] = pd.to_datetime(stock_data['Date'])

    return stock_data

# --- Get Tesla Stock Data ---
print("Getting Tesla stock data...")
tesla_stock = get_stock_data("TSLA")
print("Tesla Stock Data (first 5 rows):")
print(tesla_stock.head())
print("\n")

# --- Get GameStop Stock Data ---
print("Getting GameStop stock data...")
gamestop_stock = get_stock_data("GME")
print("GameStop Stock Data (first 5 rows):")
print(gamestop_stock.head())
print("\n")

Getting Tesla stock data...


YFRateLimitError: Too Many Requests. Rate limited. Try after a while.

In [None]:
import yfinance as yf
import pandas as pd
import requests
from bs4 import BeautifulSoup
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# --- (Other functions like get_stock_data and create_dashboard remain the same) ---

# Corrected function to get quarterly/annual revenue data by web scraping Macrotrends
def get_revenue_data(ticker_symbol):
    """
    Scrapes revenue data from Macrotrends for a given ticker symbol using pandas.read_html().
    This method is often more robust for table extraction than manual BeautifulSoup parsing.

    Args:
        ticker_symbol (str): The stock ticker symbol (e.g., "TSLA", "GME").

    Returns:
        pandas.DataFrame: A DataFrame containing revenue data.
    """
    # Construct the correct URL for the company's revenue page on Macrotrends
    if ticker_symbol == "TSLA":
        url = "https://www.macrotrends.net/stocks/charts/TSLA/tesla/revenue"
    elif ticker_symbol == "GME":
        url = "https://www.macrotrends.net/stocks/charts/GME/gamestop/revenue"
    else:
        print(f"Revenue data scraping not implemented for {ticker_symbol}. Returning empty DataFrame.")
        return pd.DataFrame()

    try:
        # Use pandas.read_html to directly read tables from the URL
        # Macrotrends often has the relevant financial data in the second table found (index 1)
        tables = pd.read_html(url)
        revenue_df = tables[1] # Try to get the second table
    except IndexError:
        print(f"Could not find the expected revenue table (index 1) for {ticker_symbol} at {url}")
        return pd.DataFrame()
    except Exception as e:
        print(f"An error occurred while reading HTML for {ticker_symbol} from {url}: {e}")
        return pd.DataFrame()

    # Rename columns for clarity if needed. Macrotrends often names them directly.
    # Check what columns are actually returned by print(revenue_df.columns)
    # The columns might be like ['Year', 'Revenue', 'Earnings', ...]
    # Let's assume the first column is date-like and the second is Revenue.
    # You might need to inspect the DataFrame after read_html to confirm actual column names.

    # Identify the date column (usually the first one)
    date_col_name = revenue_df.columns[0]
    revenue_col_name = revenue_df.columns[1] # Assuming revenue is the second column

    # Rename columns to standard 'Date' and 'Revenue'
    revenue_df.rename(columns={date_col_name: 'Date', revenue_col_name: 'Revenue'}, inplace=True)

    # Clean and convert the 'Revenue' column to numeric (float)
    # This part remains similar to before, handling '$' and ','
    # It's important to convert to string first to use .str.replace
    revenue_df['Revenue'] = revenue_df['Revenue'].astype(str).str.replace('$', '', regex=False).str.replace(',', '', regex=False)
    revenue_df['Revenue'] = pd.to_numeric(revenue_df['Revenue'], errors='coerce') # Convert to numeric, turn errors into NaN

    # Convert the date column to datetime
    revenue_df['Date'] = pd.to_datetime(revenue_df['Date'], errors='coerce')

    # Drop rows where Date or Revenue couldn't be parsed
    revenue_df.dropna(subset=['Date', 'Revenue'], inplace=True)

    # Select only the 'Date' and 'Revenue' columns and sort by date
    revenue_df = revenue_df[['Date', 'Revenue']]
    revenue_df.sort_values('Date', inplace=True)
    revenue_df.set_index('Date', inplace=True) # Set 'Date' as the DataFrame index
    return revenue_df

# --- (Rest of your main execution code would follow here, calling this corrected function) ---

# Example of how you would integrate this:
if __name__ == "__main__":
    # --- STEP 1: Install Libraries (run this once) ---
    # !pip install yfinance pandas requests beautifulsoup4 plotly

    # --- STEP 2 & 3: Define Functions (as provided in previous response, with corrected get_revenue_data) ---
    # (Assume get_stock_data and create_dashboard are defined above/imported)

    # --- MAIN EXECUTION ---
    print("--- Starting Data Extraction and Dashboard Generation ---")

    # --- Tesla Data ---
    print("\nFetching Tesla data...")
    tesla_stock = get_stock_data("TSLA")
    tesla_revenue = get_revenue_data("TSLA")

    if not tesla_stock.empty and not tesla_revenue.empty:
        if 'Date' in tesla_stock.columns:
            tesla_stock.set_index('Date', inplace=True)
        tesla_stock = tesla_stock.dropna()

        print("Generating Tesla Stock and Revenue Dashboard...")
        create_dashboard(tesla_stock, tesla_revenue, "Tesla")
    else:
        print("Skipping Tesla dashboard due to missing stock or revenue data.")


    # --- GameStop Data ---
    print("\nFetching GameStop data...")
    gamestop_stock = get_stock_data("GME")
    gamestop_revenue = get_revenue_data("GME")

    if not gamestop_stock.empty and not gamestop_revenue.empty:
        if 'Date' in gamestop_stock.columns:
            gamestop_stock.set_index('Date', inplace=True)
        gamestop_stock = gamestop_stock.dropna()

        print("Generating GameStop Stock and Revenue Dashboard...")
        create_dashboard(gamestop_stock, gamestop_revenue, "GameStop")
    else:
        print("Skipping GameStop dashboard due to missing stock or revenue data.")

    print("\n--- Dashboard Generation Complete ---")

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Function to create the dashboard
def create_dashboard(stock_data, revenue_data, company_name):
    # Create a figure with two rows of subplots, sharing the same date axis
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True,
                        subplot_titles=(f"{company_name} Historical Share Price",
                                        f"{company_name} Historical Revenue"),
                        vertical_spacing = 0.3) # Space between plots

    # Add the stock price as a line chart in the first subplot (row 1, column 1)
    fig.add_trace(go.Scatter(x=stock_data.index, y=stock_data['Close'],
                             name="Share Price", line=dict(color='blue')),
                  row=1, col=1)

    # Add the revenue as a bar chart in the second subplot (row 2, column 1)
    fig.add_trace(go.Bar(x=revenue_data.index, y=revenue_data['Revenue'],
                         name="Revenue", marker_color='green'),
                  row=2, col=1)

    # Set titles for the x and y axes
    fig.update_xaxes(title_text="Date", row=1, col=1)
    fig.update_xaxes(title_text="Date", row=2, col=1)
    fig.update_yaxes(title_text="Price (USD)", row=1, col=1)
    fig.update_yaxes(title_text="Revenue (Millions USD)", row=2, col=1)

    # Add an interactive date range slider
    fig.update_layout(xaxis_rangeslider_visible=True,
                      title_text=f"{company_name} Stock Price vs. Revenue Dashboard",
                      height=800, # Set dashboard height
                      showlegend=False) # No need for separate legends as titles are clear

    # Show the dashboard
    fig.show()

# --- Create Tesla Dashboard ---
print("Creating Tesla dashboard...")
if not tesla_stock.empty and not tesla_revenue.empty:
    # Ensure stock_data index is datetime for plotting
    tesla_stock.set_index('Date', inplace=True)
    tesla_stock = tesla_stock.dropna() # Remove any rows with missing values that might interfere

    create_dashboard(tesla_stock, tesla_revenue, "Tesla")
else:
    print("Cannot create Tesla dashboard: Missing stock or revenue data.")
print("\n")

# --- Create GameStop Dashboard ---
print("Creating GameStop dashboard...")
if not gamestop_stock.empty and not gamestop_revenue.empty:
    # Ensure stock_data index is datetime for plotting
    gamestop_stock.set_index('Date', inplace=True)
    gamestop_stock = gamestop_stock.dropna() # Remove any rows with missing values

    create_dashboard(gamestop_stock, gamestop_revenue, "GameStop")
else:
    print("Cannot create GameStop dashboard: Missing stock or revenue data.")
print("\n")