In [140]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

import pandas as pd
from pandas.tseries.holiday import USFederalHolidayCalendar
from datetime import datetime, timezone, timedelta
import logging
import pickle
import math
import asyncio


logging.basicConfig(
    level=logging.INFO,  # Set the logging level
    format='%(asctime)s - %(levelname)s - %(message)s',  # Format for the log messages
    handlers=[
        logging.StreamHandler()  # Log to the console
    ]
)

%reload_ext autoreload
%autoreload 2
from data.raw.retrievers.alpaca_markets_retriever import AlpacaMarketsRetriever
from config.constants import *
from data.raw.retrievers.alpaca_markets_utils import get_daily_stats, select_portfolio
from data.processed.dataset_creation import DatasetCreator
from data.processed.indicators import *
from data.processed.targets import Balanced3ClassClassification
from data.processed.normalization import ZScoreOverWindowNormalizer, ZScoreNormalizer, MinMaxNormalizer
from data.processed.missing_values_handling import DummyMissingValuesHandler
from data.processed.dataset_pytorch import DatasetPytorch
from modeling.trainer import Trainer
from observability.mlflow_integration import log_experiment

from config.experiments.cur_experiment import config
from config.constants import *


In [2]:
from alpaca.data.timeframe import TimeFrame

retriever = AlpacaMarketsRetriever()
all_symbols = retriever.get_all_symbols()
len(all_symbols)

4140

In [None]:
daily_stats, skipped_symbols = await get_daily_stats(
    all_symbols, 
    date=datetime(2026, 1, 14)
)

In [132]:
portfolio = await select_portfolio(
    all_symbols, 
    start_date=datetime(2025, 11, 1), 
    end_date=datetime(2025, 12, 1), 
    portfolio_size=100, 
    criteria='E_1m'
)

2026-01-16 15:28:01,656 - INFO - Starting performance sweep for 19 days...
2026-01-16 15:28:01,657 - INFO - Processing day 2025-11-03 00:00:00
2026-01-16 15:29:25,046 - INFO - Processing day 2025-11-04 00:00:00
2026-01-16 15:30:54,781 - INFO - Processing day 2025-11-05 00:00:00
2026-01-16 15:32:13,349 - INFO - Retrying in 5 seconds...
2026-01-16 15:33:53,619 - INFO - Processing day 2025-11-06 00:00:00
2026-01-16 15:35:24,303 - INFO - Processing day 2025-11-07 00:00:00
2026-01-16 15:37:07,411 - INFO - Processing day 2025-11-10 00:00:00
2026-01-16 15:38:52,400 - INFO - Processing day 2025-11-12 00:00:00
2026-01-16 15:40:31,407 - INFO - Processing day 2025-11-13 00:00:00
2026-01-16 15:42:25,036 - INFO - Processing day 2025-11-14 00:00:00
2026-01-16 15:44:27,611 - INFO - Processing day 2025-11-17 00:00:00
2026-01-16 15:46:02,978 - INFO - Processing day 2025-11-18 00:00:00
2026-01-16 15:47:46,000 - INFO - Processing day 2025-11-19 00:00:00
2026-01-16 15:49:17,063 - INFO - Processing day 202

In [133]:
portfolio

[('SPY', 8.18363474204872),
 ('QQQ', 7.934694240224004),
 ('NVDA', 6.5201487856798295),
 ('IWM', 4.244290950704236),
 ('DIA', 3.7565078971540817),
 ('XLK', 3.655747243793037),
 ('TSLA', 3.4107324019505496),
 ('TQQQ', 3.1739125625259996),
 ('SOXL', 2.878788064575373),
 ('IVV', 2.8403873541685067),
 ('VOO', 2.653083892765402),
 ('AMZN', 2.5592582700926343),
 ('XLY', 2.5137933693414296),
 ('QQQM', 2.439419113800385),
 ('GOOGL', 2.2883690904255767),
 ('RSP', 2.2559186626498224),
 ('AAPL', 2.2474007358011363),
 ('IBIT', 2.1233273635752914),
 ('PLTR', 2.12250678075985),
 ('UPRO', 2.0562202612075686),
 ('XLI', 2.0187208040288454),
 ('XLV', 2.002494924855493),
 ('GDX', 1.9897654481542872),
 ('FBTC', 1.949414144843745),
 ('GOOG', 1.922922361895664),
 ('RGTI', 1.8680747202060526),
 ('XBI', 1.8250465738320822),
 ('SPXL', 1.8046329155386769),
 ('IWD', 1.7873919811514165),
 ('CIFR', 1.756881460374934),
 ('MSFT', 1.7524127617728187),
 ('SOFI', 1.7137868562368723),
 ('TNA', 1.707006528183861),
 ('VTI

In [138]:
symbols = [symbol for symbol, stat in portfolio]
print(sorted(symbols))

['AAPL', 'ACWI', 'AMD', 'AMZN', 'APLD', 'AVGO', 'BAC', 'BITB', 'BITU', 'BMY', 'BOIL', 'C', 'CIFR', 'CLSK', 'CSCO', 'DIA', 'DKNG', 'ETHA', 'EWY', 'FBTC', 'GBTC', 'GDX', 'GLD', 'GOOG', 'GOOGL', 'HOOD', 'IBIT', 'IGV', 'IJR', 'INTC', 'IONQ', 'IREN', 'ITOT', 'IVV', 'IVW', 'IWD', 'IWM', 'IYR', 'KO', 'KRE', 'MARA', 'META', 'MRK', 'MRVL', 'MSFT', 'MSTR', 'MU', 'NFLX', 'NKE', 'NVDA', 'NVO', 'ORCL', 'PLTR', 'PYPL', 'QBTS', 'QLD', 'QQQ', 'QQQM', 'QUAL', 'RGTI', 'RIOT', 'RSP', 'SLV', 'SMCI', 'SMH', 'SMR', 'SOFI', 'SOXL', 'SOXX', 'SPXL', 'SPY', 'SPYG', 'SSO', 'TNA', 'TQQQ', 'TSLA', 'TSM', 'UBER', 'UPRO', 'VONG', 'VOO', 'VT', 'VTI', 'VTV', 'VTWO', 'VYM', 'WFC', 'WMT', 'WULF', 'XBI', 'XLB', 'XLC', 'XLE', 'XLI', 'XLK', 'XLU', 'XLV', 'XLY', 'XOM', 'XRT']


In [134]:
# portfolio = [value[0] for value in sorted(values, key=lambda x: x[1], reverse=True)[:100]]
cap = sum(daily_stats[symbol]['price'] * daily_stats[symbol]['daily_volume'] for symbol, stat in portfolio)
total_cap = sum(daily_stats[symbol]['price'] * daily_stats[symbol]['daily_volume'] for symbol in daily_stats)
cap / total_cap

0.4177936957656109