In [2]:
# --- SETUP & ENGINE IGNITION ---
import sys 
import os 
import pandas as pd 
import numpy as np 
import datetime

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..', 'src')))

from metrics_engine import FinancialMetrics
from genetic_engine import GeneticOptimizer 

# 1. Initialize Engine 
engine = FinancialMetrics() 

# 2. Configuration 
# We use a "Modern Veteran" window (2015-Present) to capture AI shift 
LOOKBACK_START = '2015-01-01'
RISK_FREE_RATE = 0.045 # Approx. current US 10Y Yield (4.5%) 

# 3. The "Blocklist" (Manual Overrides) 
BLOCKLIST = ['EQIX', 'CSGP']

print(">>> Engine Initialized.")
print(f"Targeting Regime: {LOOKBACK_START} to Present")
print(f"Blocklisted Assets: {BLOCKLIST}") 

>>> Engine Initialized.
Targeting Regime: 2015-01-01 to Present
Blocklisted Assets: ['EQIX', 'CSGP']


In [None]:
# --- CELL 2: LOAD & FILTER UNIVERSE ---
print(">>> Loading fresh market data...")

# 1. Load Data
full_data = engine.load_prices(start_date=LOOKBACK_START)

if 'date' in full_data.columns:
    full_data.set_index('date', inplace=True)

full_data.index = pd.to_datetime(full_data.index)

# Remove Duplicate Columns
full_data = full_data.loc[:, ~full_data.columns.duplicated()]

print(f"   -> Data Dimensions: {full_data.shape} (Rows, Tickers)")
print(f"   -> Date Range: {full_data.index.min().date()} to {full_data.index.max().date()}")

# 2. Filter for "Tradeable History"
MIN_HISTORY = pd.Timestamp('2018-01-01')

# Calculate start dates
valid_starts = full_data.apply(pd.Series.first_valid_index)

valid_starts = pd.to_datetime(valid_starts, errors='coerce')

# Drop NaTs (Tickers with NO valid dates)
valid_starts = valid_starts.dropna()

# Filter
tradeable_assets = valid_starts[valid_starts <= MIN_HISTORY].index.tolist()

# 3. Segment into Stocks vs. REITs
meta = pd.read_sql("SELECT ticker, sector FROM assets_metadata", engine.engine)
# Drop duplicates in metadata too just in case
meta = meta.drop_duplicates(subset=['ticker'])

all_reits = meta[meta['sector'] == 'Real Estate']['ticker'].tolist()
exclude_etfs = ['VOO', 'VEA', 'VWO', 'VNQ', 'VNQI', 'VGK', 'VPL', 'VT', 'cetes']

# 4. Create Final Pools
pool_stocks = [
    t for t in tradeable_assets 
    if t not in all_reits 
    and t not in exclude_etfs 
    and t not in BLOCKLIST
]

pool_reits = [
    t for t in tradeable_assets 
    if t in all_reits 
    and t not in exclude_etfs 
    and t not in BLOCKLIST
]

# 5. Extract Data
df_stocks = full_data[pool_stocks].dropna()
df_reits = full_data[pool_reits].dropna()

print(f"--- Production Universe Ready ---")
print(f"Stocks Available: {len(df_stocks.columns)}")
print(f"REITs Available:  {len(df_reits.columns)}")

>>> Loading fresh market data...
Fetching data from database...
Loaded prices for 511 assets over 2787 days.
   -> Data Dimensions: (2787, 511) (Rows, Tickers)
   -> Date Range: 2015-01-02 to 2026-02-02
--- Production Universe Ready ---
Stocks Available: 446
REITs Available:  28


In [5]:
# OPTIMIZATION EXPERIMENT (15 VS 30)
print(">>> Running Concentration Test...")
print("Checking if a concentrated portfolio (15) beats a diversified one (30)...")

# initialize optimizer
ga_test = GeneticOptimizer(df_stocks, risk_free_rate = RISK_FREE_RATE)
SEED = 927 

# Run Scenario A: 30 stocks 
print("  > Optimizing for 30 stocks...")
port_30 = ga_test.run_optimization(population_size = 60, generations = 30, portfolio_size=30, seed = SEED)
score_30 = ga_test.fitness_function([ga_test.tickers.index(t) for t in port_30])

# Run Scenario B: 15 Stocks
print("  > Optimizing for 15 Stocks...")
port_15 = ga_test.run_optimization(population_size=60, generations=30, portfolio_size=15, seed=SEED)
score_15 = ga_test.fitness_function([ga_test.tickers.index(t) for t in port_15])

print("\n" + "="*40)
print(f"SCENARIO RESULTS (Sharpe Ratio):")
print(f"  > 30 Stocks: {score_30:.4f}")
print(f"  > 15 Stocks: {score_15:.4f}")

# auto-Decision Logic
if score_15 >= score_30:
    TARGET_STOCK_COUNT = 15
    print(">>> DECISION: GO TIGHT. 15 Stocks provide better/equal risk-adjusted returns.")
else:
    TARGET_STOCK_COUNT = 30
    print(">>> DECISION: GO WIDE. 30 Stocks are needed for safety.")
print("="*40)

>>> Running Concentration Test...
Checking if a concentrated portfolio (15) beats a diversified one (30)...
  > Optimizing for 30 stocks...
--- Starting Evolution: 30 Gen, 60 Pop, 30 Assets ---
Gen 0: Best Sharpe = 0.5446
Gen 5: Best Sharpe = 0.6942
Gen 10: Best Sharpe = 0.7108
Gen 15: Best Sharpe = 0.7306
Gen 20: Best Sharpe = 0.7874
Gen 25: Best Sharpe = 0.8014
--- Evolution Complete. Top Sharpe: 0.8378 ---
  > Optimizing for 15 Stocks...
--- Starting Evolution: 30 Gen, 60 Pop, 15 Assets ---
Gen 0: Best Sharpe = 0.5959
Gen 5: Best Sharpe = 0.7881
Gen 10: Best Sharpe = 0.8764
Gen 15: Best Sharpe = 0.9127
Gen 20: Best Sharpe = 0.9219
Gen 25: Best Sharpe = 0.9442
--- Evolution Complete. Top Sharpe: 0.9705 ---

SCENARIO RESULTS (Sharpe Ratio):
  > 30 Stocks: 0.8378
  > 15 Stocks: 0.9705
>>> DECISION: GO TIGHT. 15 Stocks provide better/equal risk-adjusted returns.


In [8]:
# ---  GENERATE OFFICIAL BUY LIST ---
print(f">>> Generating final Orders (Targeting {TARGET_STOCK_COUNT} Stocks, 5 REITs)")

# 1. Final Stock Optimization (Using the decided count)
ga_stocks = GeneticOptimizer(df_stocks, risk_free_rate=RISK_FREE_RATE)
final_stocks = ga_stocks.run_optimization(population_size = 100, generations = 50, portfolio_size = TARGET_STOCK_COUNT, seed = SEED)

# 2. Final REIT optimization (Fixed at 5 for ease of management)
ga_reits = GeneticOptimizer(df_reits, risk_free_rate=RISK_FREE_RATE)
final_reits = ga_reits.run_optimization(population_size = 100, generations = 50, portfolio_size = 5, seed = SEED)

# 3. Formatting Output
def print_ticket(title, tickers):
    print("\n" + "="*50)
    print(f"   {title} ({len(tickers)} Tickers)")
    print("="*50)
    # Sort alphabetically for easier entry into broker
    sorted_tickers = sorted(tickers) 

    # Print in groups of 5 
    for i in range(0, len(sorted_tickers), 5):
        print(", ".join(sorted_tickers[i:i+5]))

    print("-" * 50)
    print("STRATEGY: EQUAL WEIGHT")
    print(f"Allocation per ticker: {100/len(tickers):.2f}% of category capital.")

# 4. Display
print_ticket("OFFICIAL STOCK BUY LIST", final_stocks)
print_ticket("OFFICIAL REIT BUY LIST", final_reits)

print("\n>>> EXECUTION NOTES:")
print("1. For tickers like 'BF-B', search as 'BFB' or 'BF B' in GBM.")
print("2. For low volume stocks (LII, TDG), use LIMIT ORDERS.")
print("3. Re-run this notebook in 3 months (Quarterly Rebalance).")

>>> Generating final Orders (Targeting 15 Stocks, 5 REITs)
--- Starting Evolution: 50 Gen, 100 Pop, 15 Assets ---
Gen 0: Best Sharpe = 0.5959
Gen 5: Best Sharpe = 0.8655
Gen 10: Best Sharpe = 0.9050
Gen 15: Best Sharpe = 0.9217
Gen 20: Best Sharpe = 0.9615
Gen 25: Best Sharpe = 1.0620
Gen 30: Best Sharpe = 1.0758
Gen 35: Best Sharpe = 1.1023
Gen 40: Best Sharpe = 1.1049
Gen 45: Best Sharpe = 1.1123
--- Evolution Complete. Top Sharpe: 1.1183 ---
--- Starting Evolution: 50 Gen, 100 Pop, 5 Assets ---
Gen 0: Best Sharpe = 0.2645
Gen 5: Best Sharpe = 0.4373
Gen 10: Best Sharpe = 0.4539
Gen 15: Best Sharpe = 0.4539
Gen 20: Best Sharpe = 0.4539
Gen 25: Best Sharpe = 0.4539
Gen 30: Best Sharpe = 0.4539
Gen 35: Best Sharpe = 0.4539
Gen 40: Best Sharpe = 0.4539
Gen 45: Best Sharpe = 0.4539
--- Evolution Complete. Top Sharpe: 0.4539 ---

   OFFICIAL STOCK BUY LIST (15 Tickers)
ABBV, AJG, ANET, COST, FIX
GOOG, GWW, IBKR, LLY, NEM
NVDA, STX, TKO, TPL, WRB
-------------------------------------------