In [3]:
import os
import pandas as pd
import nasdaqdatalink as ndl
from dotenv import load_dotenv
import tkinter as tk
from tkinter import ttk, messagebox
import datetime
import schedule

In [4]:
env_path = '/Users/paigeblackstone/Desktop/Portfolio29/Portfolio29/env/nasdaq.env'
load_dotenv(env_path)

True

In [6]:
api_key = os.getenv('NASDAQ_API_KEY')
if not api_key:
    raise ValueError("API key not found. Please check your nasdaq.env file.")

In [4]:
import requests
from bs4 import BeautifulSoup
import os

def get_tickers_from_wikipedia(url):
    """Fetch all tickers from a Wikipedia page."""
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    tickers = []

    # Find all tables on the page
    tables = soup.find_all('table', {'class': 'wikitable'})
    for table in tables:
        # Process each row in the table
        for row in table.find_all('tr')[1:]:  # Skip the header row
            cells = row.find_all('td')
            if len(cells) > 0:
                # Check if the cell content might be a ticker symbol
                for cell in cells:
                    ticker = cell.get_text(strip=True)
                    if ticker and ticker.isupper() and len(ticker) <= 5:
                        tickers.append(ticker)
    
    # Remove duplicates and sort tickers
    tickers = sorted(set(tickers))
    return tickers

def print_and_store_tickers_from_indices():
    """Print tickers from multiple indices and store them in environment variables."""
    indices = {
        "S&P_500": "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies",
        "DJIA": "https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average",
        "NASDAQ_100": "https://en.wikipedia.org/wiki/NASDAQ-100",
        "Russell_2000": "https://en.wikipedia.org/wiki/Russell_2000_Index",
        "FTSE_100": "https://en.wikipedia.org/wiki/FTSE_100_Index",
        "DAX_30": "https://en.wikipedia.org/wiki/DAX",
        "CAC_40": "https://en.wikipedia.org/wiki/CAC_40",
        "IBEX_35": "https://en.wikipedia.org/wiki/IBEX_35",
        "NIKKEI_225": "https://en.wikipedia.org/wiki/Nikkei_225",
        "Hang_Seng_Index": "https://en.wikipedia.org/wiki/Hang_Seng_Index",
        "MSCI_World_Index": "https://en.wikipedia.org/wiki/MSCI_World_Index",
        "MSCI_Emerging_Markets_Index": "https://en.wikipedia.org/wiki/MSCI_Emerging_Markets_Index",
        "Wilshire_5000": "https://en.wikipedia.org/wiki/Wilshire_5000"
    }

    for index_name, url in indices.items():
        print(f"Fetching tickers for {index_name}...")
        tickers = get_tickers_from_wikipedia(url)
        print(f"Tickers for {index_name}:")
        print(tickers)  # Print all tickers

        # Convert the list of tickers to a comma-separated string
        tickers_string = ','.join(tickers)

        # Set the environment variable (note: this will only affect the current process)
        os.environ[f'TICKERS_{index_name}'] = tickers_string
        print(f"Environment variable TICKERS_{index_name} set.")
        print()

# Example usage
if __name__ == "__main__":
    print_and_store_tickers_from_indices()


Fetching tickers for S&P_500...
Tickers for S&P_500:
['3M', 'A', 'AA', 'AAL', 'AAP', 'AAPL', 'ABBV', 'ABK', 'ABMD', 'ABNB', 'ABS', 'ABT', 'ACAS', 'ACE', 'ACGL', 'ACN', 'ADBE', 'ADI', 'ADM', 'ADP', 'ADS', 'ADSK', 'ADT', 'AEE', 'AEP', 'AES', 'AET', 'AFL', 'AGN', 'AIG', 'AIV', 'AIZ', 'AJG', 'AKAM', 'AKS', 'ALB', 'ALGN', 'ALK', 'ALL', 'ALLE', 'ALTR', 'ALXN', 'AMAT', 'AMCR', 'AMD', 'AME', 'AMG', 'AMGN', 'AMP', 'AMT', 'AMZN', 'AN', 'ANDV', 'ANET', 'ANF', 'ANR', 'ANSS', 'AON', 'AOS', 'APA', 'APC', 'APD', 'APH', 'APOL', 'APTV', 'ARE', 'ARG', 'ARNC', 'AT&T', 'ATI', 'ATO', 'ATVI', 'AV', 'AVB', 'AVGO', 'AVP', 'AVY', 'AWK', 'AXON', 'AXP', 'AYE', 'AYI', 'AZO', 'BA', 'BAC', 'BALL', 'BAX', 'BBBY', 'BBWI', 'BBY', 'BC', 'BCR', 'BDX', 'BEAM', 'BEN', 'BF.B', 'BG', 'BHF', 'BIG', 'BIIB', 'BIO', 'BJS', 'BK', 'BKNG', 'BKR', 'BLDR', 'BLK', 'BMC', 'BMS', 'BMY', 'BR', 'BRCM', 'BRK.B', 'BRO', 'BS', 'BSX', 'BTU', 'BWA', 'BX', 'BXLT', 'BXP', 'C', 'CA', 'CAG', 'CAH', 'CAM', 'CARR', 'CAT', 'CB', 'CBE', 'CBOE', 'CBRE

In [7]:
# Function to fetch dataset list
def fetch_datasets():
    # For demonstration, we'll list some datasets; update based on your needs
    try:
        datasets = ndl.get('WIKI/AAPL', api_key=api_key)
        return datasets.head()  # Preview the first few rows of the dataset
    except Exception as e:
        messagebox.showerror("Error", str(e))
        return pd.DataFrame()

# Test fetching datasets
datasets_preview = fetch_datasets()
print(datasets_preview)

             Open   High    Low  Close     Volume  Ex-Dividend  Split Ratio  \
Date                                                                          
1980-12-12  28.75  28.87  28.75  28.75  2093900.0          0.0          1.0   
1980-12-15  27.38  27.38  27.25  27.25   785200.0          0.0          1.0   
1980-12-16  25.37  25.37  25.25  25.25   472000.0          0.0          1.0   
1980-12-17  25.87  26.00  25.87  25.87   385900.0          0.0          1.0   
1980-12-18  26.63  26.75  26.63  26.63   327900.0          0.0          1.0   

            Adj. Open  Adj. High  Adj. Low  Adj. Close  Adj. Volume  
Date                                                                 
1980-12-12   0.422706   0.424470  0.422706    0.422706  117258400.0  
1980-12-15   0.402563   0.402563  0.400652    0.400652   43971200.0  
1980-12-16   0.373010   0.373010  0.371246    0.371246   26432000.0  
1980-12-17   0.380362   0.382273  0.380362    0.380362   21610400.0  
1980-12-18   0.391536   0.

In [6]:
import os
import yfinance as yf
import pandas as pd

# Retrieve the list of tickers from the environment variable
tickers_string = os.environ.get('TICKERS_S&P_500', '')
tickers = tickers_string.split(',')

def fetch_trade_volume(tickers):
    """Fetch trade volume for a list of tickers."""
    data = {}
    for ticker in tickers:
        try:
            stock = yf.Ticker(ticker)
            # Fetch historical data
            hist = stock.history(period='1d')
            # Use the latest volume data
            volume = hist['Volume'].iloc[-1] if not hist.empty else None
            data[ticker] = volume
        except Exception as e:
            print(f"Error fetching data for {ticker}: {e}")
            data[ticker] = None
    return data

def get_top_tickers_by_volume(volume_data, top_n=5):
    """Get the top N tickers by trade volume."""
    # Convert the volume data to a DataFrame
    df = pd.DataFrame(list(volume_data.items()), columns=['Ticker', 'Volume'])
    # Drop rows where volume is None
    df = df.dropna(subset=['Volume'])
    # Sort by volume in descending order and get the top N
    top_tickers = df.sort_values(by='Volume', ascending=False).head(top_n)
    return top_tickers

def main():
    print("Fetching trade volume data...")
    volume_data = fetch_trade_volume(tickers)
    print("Identifying top tickers...")
    top_tickers = get_top_tickers_by_volume(volume_data)
    print("Top 5 tickers by trade volume:")
    print(top_tickers)

if __name__ == "__main__":
    main()


Fetching trade volume data...


3M: No data found, symbol may be delisted
ABK: Period '1d' is invalid, must be one of ['1mo', '3mo', '6mo', 'ytd', '1y', '2y', '5y', '10y', 'max']
$ABMD: possibly delisted; No price data found  (period=1d)


$ABMD: possibly delisted; No price data found  (period=1d)


ABS: Period '1d' is invalid, must be one of ['1mo', '3mo', '6mo', 'ytd', '1y', '2y', '5y', '10y', 'max']
ACAS: Period '1d' is invalid, must be one of ['1mo', '3mo', '6mo', 'ytd', '1y', '2y', '5y', '10y', 'max']
ACE: Period '1d' is invalid, must be one of ['1mo', '3mo', '6mo', 'ytd', '1y', '2y', '5y', '10y', 'max']
ADS: No data found, symbol may be delisted
AET: Period '1d' is invalid, must be one of ['1mo', '3mo', '6mo', 'ytd', '1y', '2y', '5y', '10y', 'max']
AGN: No data found, symbol may be delisted
AKS: No data found, symbol may be delisted
ALXN: No data found, symbol may be delisted
ANDV: Period '1d' is invalid, must be one of ['1mo', '3mo', '6mo', 'ytd', '1y', '2y', '5y', '10y', 'max']
ANR: Period '1d' is invalid, must be one of ['1mo', '3mo', '6mo', 'ytd', '1y', '2y', '5y', '10y', 'max']
APC: No data found, symbol may be delisted
APOL: Period '1d' is invalid, must be one of ['1mo', '3mo', '6mo', 'ytd', '1y', '2y', '5y', '10y', 'max']
ARG: Period '1d' is invalid, must be one of ['

$BMS: possibly delisted; No price data found  (period=1d)


BRCM: Period '1d' is invalid, must be one of ['1mo', '3mo', '6mo', 'ytd', '1y', '2y', '5y', '10y', 'max']
BRK.B: No data found, symbol may be delisted
BS: Period '1d' is invalid, must be one of ['1mo', '3mo', '6mo', 'ytd', '1y', '2y', '5y', '10y', 'max']
BXLT: Period '1d' is invalid, must be one of ['1mo', '3mo', '6mo', 'ytd', '1y', '2y', '5y', '10y', 'max']
CA: Period '1d' is invalid, must be one of ['1mo', '3mo', '6mo', 'ytd', '1y', '2y', '5y', '10y', 'max']
CAM: Period '1d' is invalid, must be one of ['1mo', '3mo', '6mo', 'ytd', '1y', '2y', '5y', '10y', 'max']
CBE: Period '1d' is invalid, must be one of ['1mo', '3mo', '6mo', 'ytd', '1y', '2y', '5y', '10y', 'max']
CCE: Period '1d' is invalid, must be one of ['1mo', '3mo', '6mo', 'ytd', '1y', '2y', '5y', '10y', 'max']
CDAY: No data found, symbol may be delisted
CELG: No data found, symbol may be delisted
CEPH: Period '1d' is invalid, must be one of ['1mo', '3mo', '6mo', 'ytd', '1y', '2y', '5y', '10y', 'max']
CERN: No data found, symbo

In [9]:
def download_dataset(symbol, start_date, end_date):
    try:
        df = ndl.get(f'WIKI/{symbol}', start_date=start_date, end_date=end_date, api_key=api_key)
        filename = f'data_{symbol}_{datetime.datetime.now().strftime("%Y%m%d_%H%M%S")}.csv'
        df.to_csv(filename, index=False)
        messagebox.showinfo("Success", f"Data for {symbol} saved to {filename}")
    except Exception as e:
        messagebox.showerror("Error", f"Error fetching data for {symbol}: {str(e)}")

class App:
    def __init__(self, root):
        self.root = root
        self.root.title("NASDAQ Data Browser")

        # Ticker entry
        ttk.Label(root, text="Dataset Tickers (comma-separated):").grid(column=0, row=0, padx=10, pady=5)
        self.ticker_entry = ttk.Entry(root, width=50)
        self.ticker_entry.grid(column=1, row=0, padx=10, pady=5)

        # Start date entry
        ttk.Label(root, text="Start Date (YYYY-MM-DD):").grid(column=0, row=1, padx=10, pady=5)
        self.start_date_entry = ttk.Entry(root)
        self.start_date_entry.grid(column=1, row=1, padx=10, pady=5)

        # End date entry
        ttk.Label(root, text="End Date (YYYY-MM-DD):").grid(column=0, row=2, padx=10, pady=5)
        self.end_date_entry = ttk.Entry(root)
        self.end_date_entry.grid(column=1, row=2, padx=10, pady=5)

        # Download button
        self.download_button = ttk.Button(root, text="Download Data", command=self.download)
        self.download_button.grid(column=1, row=3, padx=10, pady=10)

    def download(self):
        tickers = [ticker.strip() for ticker in self.ticker_entry.get().split(',')]
        start_date = self.start_date_entry.get()
        end_date = self.end_date_entry.get()
        for ticker in tickers:
            download_dataset(ticker, start_date, end_date)



In [11]:
def job():
    tickers = ['AAPL', 'GOOGL', 'MSFT']  # Replace with your list of tickers
    start_date = '2023-01-01'
    end_date = '2024-01-01'
    for ticker in tickers:
        download_dataset(ticker, start_date, end_date)
