In [27]:
import pandas as pd
import os
import yfinance as yf
from datetime import datetime, timedelta
import plotly
import plotly.graph_objects as go
import plotly.express as px
import sys

In [18]:
def get_combined_sp_tickers():
    """Fetch stock tickers from S&P indices and return as a combined list"""
    print("Fetching ticker lists...")
    try:
        # Fetch tickers from Wikipedia
        sp500_tickers = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]['Symbol'].tolist()
        sp400_tickers = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_400_companies')[0]['Symbol'].tolist()
        sp600_tickers = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_600_companies')[0]['Symbol'].tolist()
        
        # Clean tickers: remove any that contain dots
        all_tickers = [ticker for ticker in sp400_tickers + sp600_tickers + sp500_tickers
                       if '.' not in ticker and len(ticker) > 0]
        
        # Remove duplicates
        all_tickers = list(set(all_tickers))
        print(f"Found {len(all_tickers)} unique tickers")
        
        return all_tickers
    except Exception as e:
        print(f"Error fetching tickers: {e}")
        return []


In [19]:
def get_stock_data(ticker, data_dir=None):
    """
    Get historical stock price data for a given ticker and save to a CSV file.
    
    Args:
        ticker (str): Stock ticker symbol
        data_dir (str, optional): Directory to save data. Defaults to script directory.
        
    Returns:
        str: Path to the saved CSV file
    """
    if data_dir is None:
        data_dir = os.path.dirname(os.path.abspath(__file__))
    
    # Create the data directory if it doesn't exist
    os.makedirs(data_dir, exist_ok=True)
    
    file_path = os.path.join(data_dir, f"{ticker}_price_data.csv")
    
    # Default start date is Jan 1, 2000
    start_date = "2000-01-01"
    end_date = datetime.now().strftime("%Y-%m-%d")
    
    # Check if file exists and get the last date
    if os.path.exists(file_path):
        try:
            existing_data = pd.read_csv(file_path)
            if not existing_data.empty:
                # Get the last date and add one day to avoid duplication
                last_date = pd.to_datetime(existing_data['Date'], format='%d/%m/%Y').max()
                start_date = (last_date + timedelta(days=1)).strftime("%Y-%m-%d")
                
                # If the last date is today or in the future, no need to update
                if start_date > end_date:
                    print(f"Data for {ticker} is already up to date.")
                    return file_path
                
                print(f"Updating {ticker} data from {start_date} to {end_date}")
            else:
                print(f"Existing file for {ticker} is empty. Fetching all data.")
        except Exception as e:
            print(f"Error reading existing file for {ticker}: {e}")
            print(f"Fetching all data for {ticker}")
    else:
        print(f"Fetching historical data for {ticker} from {start_date} to {end_date}")
    
    # Fetch data from Yahoo Finance
    try:
        stock = yf.Ticker(ticker)
        hist_data = stock.history(start=start_date, end=end_date)
        
        if hist_data.empty:
            print(f"No data available for {ticker} in the specified date range.")
            return file_path
        
        # Get the stock name
        try:
            stock_name = stock.info.get('shortName', ticker)
        except:
            stock_name = ticker
        
        # Process the data
        df = pd.DataFrame({
            'Date': hist_data.index.strftime('%d/%m/%Y'),
            'Stock_Ticker': ticker,
            'Stock_Name': stock_name,
            'Price': hist_data['Close']
        })
        
        # Save or append to file
        if os.path.exists(file_path) and not existing_data.empty:
            df.to_csv(file_path, mode='a', header=False, index=False)
            print(f"Appended {len(df)} new rows to {file_path}")
        else:
            df.to_csv(file_path, index=False)
            print(f"Saved {len(df)} rows to {file_path}")
        
        return file_path
    
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return None

In [21]:
tickers = get_combined_sp_tickers()
# Export tickers to CSV using the current working directory (Jupyter notebooks don't define __file__)
data_dir = os.getcwd()
tickers_df = pd.DataFrame(tickers, columns=['Ticker'])
csv_path = os.path.join(data_dir, 'sp_tickers.csv')
tickers_df.to_csv(csv_path, index=False)

Fetching ticker lists...
Found 1502 unique tickers


In [22]:
AAPL_stock_data = get_stock_data('AAPL', data_dir=data_dir)

$AAPL: possibly delisted; no price data found  (1d 2025-03-27 -> 2025-03-27)


Updating AAPL data from 2025-03-27 to 2025-03-27
No data available for AAPL in the specified date range.


In [35]:
def visualize_stock(ticker):
    """
    Visualize stock price data for a given ticker.
    
    Args:
        ticker (str): Stock ticker symbol
        
    Returns:
        plotly.graph_objs._figure.Figure or None: Plotly figure object if successful, otherwise None.
    """
    # Get stock data file path
    stock_file = get_stock_data(ticker, data_dir=data_dir)
    if stock_file is None or not os.path.exists(stock_file):
        print(f"Could not retrieve data for {ticker}")
        return None

    # Read the stock data and ensure it is not empty
    try:
        stock_df = pd.read_csv(stock_file)
    except Exception as e:
        print(f"Error reading CSV file for {ticker}: {e}")
        return None

    if stock_df.empty:
        print(f"No data available in {stock_file}")
        return None

    # Check that expected columns exist
    required_columns = ['Date', 'Stock_Ticker', 'Stock_Name', 'Price']
    missing = [col for col in required_columns if col not in stock_df.columns]
    if missing:
        print(f"Missing columns in the data: {missing}")
        return None

    # Convert date to datetime format
    try:
        stock_df['Date'] = pd.to_datetime(stock_df['Date'], format='%d/%m/%Y')
    except Exception as e:
        print(f"Error converting Date column for {ticker}: {e}")
        return None

    # Create an interactive plot
    fig = go.Figure()

    # Add price line
    try:
        fig.add_trace(go.Scatter(
            x=stock_df['Date'],
            y=stock_df['Price'],
            mode='lines',
            name=stock_df['Stock_Ticker'].iloc[0],
            line=dict(color='rgb(67, 133, 254)', width=2)
        ))
    except Exception as e:
        print(f"Error adding trace: {e}")
        return None

    # Update layout with interactive features
    fig.update_layout(
        title=f"{stock_df['Stock_Name'].iloc[0]} ({stock_df['Stock_Ticker'].iloc[0]}) Stock Price",
        xaxis_title="Date",
        yaxis_title="Price ($)",
        hovermode="x unified",
        xaxis=dict(
            rangeselector=dict(
                buttons=[
                    dict(count=1, label="1m", step="month", stepmode="backward"),
                    dict(count=6, label="6m", step="month", stepmode="backward"),
                    dict(count=1, label="YTD", step="year", stepmode="todate"),
                    dict(count=1, label="1y", step="year", stepmode="backward"),
                    dict(step="all")
                ]
            ),
            rangeslider=dict(visible=True),
            type="date"
        )
    )

    # Try to display the figure
    try:
        fig.show()
    except Exception as e:
        print(f"Error displaying figure: {e}")
        import plotly.io as pio
        output_file = f'{ticker}_stock_chart.html'
        pio.write_html(fig, file=output_file, auto_open=True)
        print(f"Figure has been saved as '{output_file}' and should open in your browser")
    
    return fig


In [36]:
visualize_stock('AAPL')

$AAPL: possibly delisted; no price data found  (1d 2025-03-27 -> 2025-03-27)


Updating AAPL data from 2025-03-27 to 2025-03-27
No data available for AAPL in the specified date range.
Error displaying figure: Mime type rendering requires nbformat>=4.2.0 but it is not installed
Figure has been saved as 'AAPL_stock_chart.html' and should open in your browser


ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed