In [1]:
pip install requests pandas numpy matplotlib python-dotenv --user


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
# ========================
# 📦 PART 1: DATA PREPARATION (NASDAQ COMPANIES)
# ========================

import os
import time
import requests
import pandas as pd
from dotenv import load_dotenv
import os

load_dotenv()

# CONFIG
API_KEY = os.getenv('API_KEY')  # Replace with your actual API key
home = os.getenv("HOME")  # Get the home directory
DATA_DIR = "historical_data"
DATA_PATH = os.path.join(home, "test-rwx", DATA_DIR)
os.makedirs(DATA_PATH, exist_ok=True)




# Fetch all NASDAQ companies
def fetch_nasdaq_companies(api_key):
    url = f"https://financialmodelingprep.com/api/v3/nasdaq_constituent?apikey={api_key}"
    response = requests.get(url)
    response.raise_for_status()
    companies = response.json()
    df = pd.DataFrame(companies)
    output_path = os.path.join(home, "test-rwx", "nasdaq_companies.csv")
    df.to_csv(output_path, index=False)
    return df

# Download historical price data for a company
def download_historical_data(symbol, api_key):
    url = f"https://financialmodelingprep.com/api/v3/historical-price-full/{symbol}?serietype=line&timeseries=1000&apikey={api_key}"
    response = requests.get(url)
    if response.status_code != 200:
        return None
    data = response.json()
    if "historical" in data:
        df = pd.DataFrame(data["historical"])
        df['date'] = pd.to_datetime(df['date'])
        df = df.sort_values('date')
        df.to_csv(f"{DATA_PATH}/{symbol}.csv", index=False)
        return df
    return None

# Run this to prepare data
nasdaq_df = fetch_nasdaq_companies(API_KEY)
symbols = nasdaq_df['symbol'].tolist()

for symbol in symbols:
    if not os.path.exists(f"{DATA_PATH}/{symbol}.csv"):
        print(f"Downloading: {symbol}")
        download_historical_data(symbol, API_KEY)
        time.sleep(1)  # Avoid hitting API rate limits

Downloading: ADBE
Downloading: AMAT
Downloading: CSCO
Downloading: FAST
Downloading: MSFT
Downloading: PAYX
Downloading: QCOM
Downloading: AXON
Downloading: MSTR
Downloading: PLTR
Downloading: APP
Downloading: ARM
Downloading: LIN
Downloading: CCEP
Downloading: DASH
Downloading: MDB
Downloading: ROP
Downloading: TTD
Downloading: ON
Downloading: GEHC
Downloading: BKR
Downloading: FANG
Downloading: GFS
Downloading: WBD
Downloading: AZN
Downloading: CEG
Downloading: ODFL
Downloading: TEAM
Downloading: ABNB
Downloading: FTNT
Downloading: PANW
Downloading: ZS
Downloading: DDOG
Downloading: CRWD
Downloading: HON
Downloading: AEP
Downloading: NFLX
Downloading: KDP
Downloading: PDD
Downloading: DXCM
Downloading: ANSS
Downloading: CDW
Downloading: CPRT
Downloading: CSGP
Downloading: EXC
Downloading: AMD
Downloading: LULU
Downloading: XEL
Downloading: PEP
Downloading: ASML
Downloading: SNPS
Downloading: TTWO
Downloading: WDAY
Downloading: MELI
Downloading: IDXX
Downloading: CSX
Downloading: TMUS

In [None]:
# ========================
# 📊 PART 2: SIMULATION & VISUALIZATION
# ========================

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
home = os.getenv("HOME")
DATA_PATH = os.path.join(home, "test-rwx", "historical_data")
PLOTS_DIR = os.path.join(home, "test-rwx", "plots")
CSV_OUTPUT = os.path.join(home, "test-rwx", "nasdaq_simulation_summary.csv")

# Ensure directories exist
os.makedirs(PLOTS_DIR, exist_ok=True)
os.makedirs(DATA_PATH, exist_ok=True)

# CONFIG
N_SIMULATIONS = 10000
N_DAYS = 252  # ~1 trading year

# Load list of symbols
nasdaq_df = pd.read_csv(os.path.join(home, "test-rwx", "nasdaq_companies.csv"))
symbols = nasdaq_df['symbol'].tolist()

# Run Monte Carlo simulation for a symbol
def run_simulation(symbol):
    try:
        df = pd.read_csv(f"{DATA_PATH}/{symbol}.csv", parse_dates=["date"])
        prices = df['close']
        returns = prices.pct_change().dropna()
        mu = returns.mean()
        sigma = returns.std()
        last_price = prices.iloc[-1]

        simulations = np.zeros((N_DAYS, N_SIMULATIONS))
        for i in range(N_SIMULATIONS):
            price = last_price
            for t in range(N_DAYS):
                price *= (1 + np.random.normal(mu, sigma))
                simulations[t, i] = price
        return simulations
    except Exception as e:
        print(f"[ERROR] {symbol}: {e}")
        return None

# Plot final price distribution
def plot_distribution(symbol, ending_prices):
    plt.figure(figsize=(10, 5))
    plt.hist(ending_prices, bins=50, alpha=0.75, color='skyblue', edgecolor='black')
    plt.title(f"{symbol} - Final Price Distribution (1 Year)")
    plt.xlabel("Price")
    plt.ylabel("Frequency")
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f"{PLOTS_DIR}/{symbol}_distribution.png")
    plt.close()

# Plot all simulation paths for a symbol
def plot_simulation_paths(symbol, simulations):
    plt.figure(figsize=(12, 6))
    for i in range(min(100, simulations.shape[1])):  # Limit to 100 paths
        plt.plot(simulations[:, i], linewidth=0.5, alpha=0.6)
    plt.title(f"{symbol} - Monte Carlo Simulation Paths (1 Year)")
    plt.xlabel("Trading Days")
    plt.ylabel("Price")
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f"{PLOTS_DIR}/{symbol}_paths.png")
    plt.close()

# Start timer
start_time = datetime.now()

# Run all simulations & collect results
results = []
all_final_prices = []
all_simulations = []

for symbol in symbols:
    print(f"Simulating: {symbol}")
    sims = run_simulation(symbol)
    if sims is None:
        continue

    ending_prices = sims[-1, :]
    initial_price = sims[0, 0]
    expected_price = np.mean(ending_prices)
    expected_return = (expected_price - initial_price) / initial_price
    std_dev = np.std(ending_prices)

    results.append({
        "symbol": symbol,
        "initial_price": initial_price,
        "expected_price": expected_price,
        "expected_return_1yr": expected_return,
        "std_dev_1yr": std_dev
    })

    all_final_prices.append(ending_prices)
    all_simulations.append(sims)

    # Generate plots
    plot_distribution(symbol, ending_prices)
    plot_simulation_paths(symbol, sims)

# Save results to CSV (write once at end)
results_df = pd.DataFrame(results)
results_df.sort_values(by="expected_return_1yr", ascending=False, inplace=True)
results_df.to_csv(CSV_OUTPUT, index=False)

# Plot and save portfolio distribution
if all_final_prices:
    portfolio_distribution = np.mean(all_final_prices, axis=0)
    plt.figure(figsize=(12, 6))
    plt.hist(portfolio_distribution, bins=60, color='lightgreen', edgecolor='black', alpha=0.8)
    plt.title("Equal-Weighted NASDAQ Portfolio – Final Price Distribution (1 Year)")
    plt.xlabel("Portfolio Value (Average of All Stocks)")
    plt.ylabel("Frequency")
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f"{PLOTS_DIR}/composite/nasdaq_portfolio_distribution.png")
    plt.close()

# Plot aggregate index as average of all simulations (simulated index)
if all_simulations:
    aggregate_index = np.mean(all_simulations, axis=0)  # shape: (252, N_SIMULATIONS)
    plt.figure(figsize=(12, 6))
    for i in range(min(100, aggregate_index.shape[1])):
        plt.plot(aggregate_index[:, i], linewidth=0.5, alpha=0.6)
    plt.title("Simulated NASDAQ Index (Equal-Weighted, 1 Year)")
    plt.xlabel("Trading Days")
    plt.ylabel("Index Value")
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f"{PLOTS_DIR}/composite/nasdaq_simulated_index_paths.png")
    plt.close()

# End timer
end_time = datetime.now()
duration = end_time - start_time
print(f"\n✅ All simulations complete in {duration}. Check CSV and plots folder at {PLOTS_DIR}.")


Simulating: ADBE
Simulating: AMAT
Simulating: CSCO
Simulating: FAST
Simulating: MSFT
Simulating: PAYX
Simulating: QCOM
Simulating: AXON
Simulating: MSTR
Simulating: PLTR
Simulating: APP
Simulating: ARM
Simulating: LIN
Simulating: CCEP
Simulating: DASH
Simulating: MDB
Simulating: ROP
Simulating: TTD
Simulating: ON
Simulating: GEHC
Simulating: BKR
Simulating: FANG
Simulating: GFS
Simulating: WBD
Simulating: AZN
Simulating: CEG
Simulating: ODFL
Simulating: TEAM
Simulating: ABNB
Simulating: FTNT
Simulating: PANW
Simulating: ZS
Simulating: DDOG
Simulating: CRWD
Simulating: HON
Simulating: AEP
Simulating: NFLX
Simulating: KDP
Simulating: PDD
Simulating: DXCM
Simulating: ANSS
Simulating: CDW
Simulating: CPRT
Simulating: CSGP
Simulating: EXC
Simulating: AMD
Simulating: LULU
Simulating: XEL
Simulating: PEP
Simulating: ASML
Simulating: SNPS
Simulating: TTWO
Simulating: WDAY
Simulating: MELI
Simulating: IDXX
Simulating: CSX
Simulating: TMUS
Simulating: PYPL
Simulating: KHC
Simulating: GOOG
Simula