# Install Packages

In [102]:
# ===========================
# Suppress Warnings
# ===========================
import warnings
warnings.filterwarnings("ignore")

# ===========================
# Standard Libraries
# ===========================
import os
import sys
import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# matplotlib.use('Agg')  

# ===========================
# Enable Inline Plotting (Jupyter)
# ===========================
%matplotlib inline

# ===========================
# FinRL Imports
# ===========================
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split

# ===========================
# Custom Imports (model.py)
# ===========================
sys.path.append(os.path.abspath("."))  
from models import DRLEnsembleAgent

sys.path.append("../FinRL-Library")


## `process_csv_to_features(csv_path)`

Processes financial data from a CSV by adding technical indicators and turbulence features.

### **Parameters**
- `csv_path` *(str)*: Path to the raw financial data CSV.

### **Workflow**
1. Load data.
2. Identify 5-day and 7-day tickers.
3. Apply technical indicators.
4. Combine datasets.
5. Add turbulence feature.
6. Clean `NaN` and infinite values.

### **Returns**
- `processed` *(DataFrame)*: Feature-enhanced, cleaned DataFrame for modeling.


In [103]:
def process_csv_to_features(csv_path):
    # Step 1: Load Data
    df = pd.read_csv(csv_path)

    # Step 2: Identify 5-day and 7-day tickers
    day_values_per_tic = df.groupby('tic')['day'].apply(lambda x: sorted(x.unique())).reset_index()
    day_values_per_tic.columns = ['tic', 'unique_days']

    tics_5day = day_values_per_tic[day_values_per_tic['unique_days'].apply(lambda x: x == list(range(5)))]['tic']
    tics_7day = day_values_per_tic[day_values_per_tic['unique_days'].apply(lambda x: x == list(range(7)))]['tic']

    df_5day_full = df[df['tic'].isin(tics_5day)]
    df_7day_full = df[df['tic'].isin(tics_7day)]

    # Step 3: Apply Technical Indicators
    fe_ti = FeatureEngineer(
        use_technical_indicator=True,
        use_turbulence=False,
        user_defined_feature=False
    )
    df_5day_full = fe_ti.preprocess_data(df_5day_full)
    if not df_7day_full.empty:
        df_7day_full = fe_ti.preprocess_data(df_7day_full)
    else:
        print("[Info] df_7day_full is empty. Skipping technical indicators.")

    # Step 4: Combine and Clean Index
    combined_df = pd.concat([df_5day_full, df_7day_full], ignore_index=False)
    combined_df.index = range(len(combined_df))

    # Step 5: Remove dates with only one ticker
    combined_df['date'] = pd.to_datetime(combined_df['date'])
    combined_df = combined_df[combined_df.groupby('date')['date'].transform('count') > 1]
    combined_df = combined_df.sort_values(['date', 'tic']).reset_index(drop=True)

    # Step 6: Apply Turbulence Feature
    fe_turb = FeatureEngineer(
        use_technical_indicator=False,
        use_turbulence=True,
        user_defined_feature=False
    )
    processed = fe_turb.preprocess_data(combined_df)

    # Step 7: Final Cleaning
    processed = processed.copy()
    processed = processed.fillna(0)
    processed = processed.replace(np.inf, 0)

    return processed


## Data Processing

Apply `process_csv_to_features` to prepare datasets with technical indicators, returns, and covariance matrices.

### **Datasets Processed**
- `processed_0` : `2007-2025_no_crypto.csv`
- `processed_1` : `2015-2025_crypto.csv`
- `processed_2` : `2015-2025_no_crypto.csv`

In [104]:
processed_0 = process_csv_to_features('2007-2025_no_crypto.csv')
processed_1 = process_csv_to_features('2015-2025_crypto.csv')
processed_2 = process_csv_to_features('2015-2025_no_crypto.csv')

Successfully added technical indicators
[Info] df_7day_full is empty. Skipping technical indicators.
Successfully added turbulence index
Successfully added technical indicators
Successfully added technical indicators
Successfully added turbulence index
Successfully added technical indicators
[Info] df_7day_full is empty. Skipping technical indicators.
Successfully added turbulence index


## `process_df_for_mvo(df)`

Prepares price data for Mean-Variance Optimization (MVO) by converting a DataFrame into a structured format with dates as index and tickers as columns.

### **Features**
- Sorts data by `date` and `tic`.
- Generates a price matrix (`DataFrame`) for MVO.
- Includes only dates where all tickers have available prices.

### **Returns**
- `mvo` *(DataFrame)*: Cleaned price matrix for MVO, with tickers as columns and dates as index.


In [105]:
def process_df_for_mvo(df):
    df = df.sort_values(['date', 'tic'], ignore_index=True)[['date', 'tic', 'close']]
    all_tickers = sorted(df['tic'].unique())
    ticker_index = {tic: idx for idx, tic in enumerate(all_tickers)}
    stock_dimension = len(all_tickers)

    mvo = pd.DataFrame(columns=all_tickers)

    grouped = df.groupby('date')
    for date, group in grouped:
        row = [np.nan] * stock_dimension
        for _, row_data in group.iterrows():
            row[ticker_index[row_data['tic']]] = row_data['close']
        if not any(pd.isna(row)):  # only include dates with all tickers
            mvo.loc[date] = row

    return mvo


## `StockReturnsComputing(StockPrice, Rows, Columns)`

Calculates daily percentage returns from a stock price matrix.

### **Features**
- Computes daily returns for each asset using price differences.
- Returns percentage values.

### **Parameters**
- `StockPrice`: 2D array of stock prices.
- `Rows`: Number of rows (days).
- `Columns`: Number of assets.

### **Returns**
- `StockReturn` *(ndarray)*: Daily percentage return matrix.

> *Note: Inspired by Dr. G A Vijayalakshmi Pai's MVO implementation.*


In [106]:
# Codes in this section partially refer to Dr G A Vijayalakshmi Pai

# https://www.kaggle.com/code/vijipai/lesson-5-mean-variance-optimization-of-portfolios/notebook

def StockReturnsComputing(StockPrice, Rows, Columns):
  import numpy as np
  StockReturn = np.zeros([Rows-1, Columns])
  for j in range(Columns):        # j: Assets
    for i in range(Rows-1):     # i: Daily Prices
      StockReturn[i,j]=((StockPrice[i+1, j]-StockPrice[i,j])/StockPrice[i,j])* 100

  return StockReturn

## `run_naive_portfolio_pipeline(...)`

Executes a naive equal-weighted portfolio strategy, holding fixed shares throughout the period.

### **Features**
- Allocates initial capital equally across all assets.
- Calculates portfolio value and daily returns over time.
- Saves both daily returns and fixed positions to CSV.
- Outputs organized in: `<dataset>/naive/`

### **Parameters**
- `df`: Processed DataFrame.
- `start_date`, `end_date`: Trading period.
- `initial_fund`: Starting capital.
- `buy_cost_pct`: Transaction cost percentage.
- `original_csv_path`: For folder structuring.

### **Outputs**
- `df_daily_return`: Daily return CSV.
- `df_positions`: Fixed shares held (positions) CSV.

### **Returns**
- Tuple: `(df_daily_return, df_positions)`


In [107]:
import os
import pandas as pd
import numpy as np

def run_naive_portfolio_pipeline(df, 
                                  start_date, 
                                  end_date, 
                                  initial_fund=1_000_000, 
                                  buy_cost_pct=0.0, 
                                  output_return_csv='df_daily_return_naive.csv',
                                  output_position_csv='df_positions_naive.csv',
                                  original_csv_path='data.csv',
                                  model_name='naive'):
    """
    Compute naive equal-weighted portfolio returns and save both daily returns and positions.
    """

    # === Step 0: Setup Folder Structure ===
    base_name = os.path.splitext(os.path.basename(original_csv_path))[0]
    target_folder = os.path.join(base_name, model_name)
    if not os.path.exists(target_folder):
        os.makedirs(target_folder)
        print(f"[INFO] Created folder: {target_folder}")

    # === Step 1: Filter trade data ===
    trade_df = data_split(df, start_date, end_date).reset_index(drop=True)
    trade_df = trade_df.sort_values(['date', 'tic']).reset_index(drop=True)

    # === Step 2: Convert to MVO-like structure ===
    trade_mvo = process_df_for_mvo(trade_df)

    if trade_mvo.empty or len(trade_mvo) < 1:
        raise ValueError("Insufficient data for naive portfolio.")

    stock_dimension = len(trade_mvo.columns)
    first_prices = trade_mvo.iloc[0].to_numpy()

    if np.any(first_prices == 0):
        raise ValueError("Zero price detected in first trading day.")

    # === Step 3: Equal Weight Allocation in Shares ===
    equal_weight = 1.0 / stock_dimension
    allocation_per_asset = initial_fund * (1 - buy_cost_pct) * equal_weight
    shares = allocation_per_asset / first_prices

    # === Step 4: Calculate Portfolio Value Over Time ===
    portfolio_values = trade_mvo @ shares
    result_df = pd.DataFrame({
        "date": trade_mvo.index,
        "account_value": portfolio_values
    })

    # === Step 5: Compute Daily Returns ===
    result_df["date"] = pd.to_datetime(result_df["date"])
    result_df.set_index("date", inplace=True)

    df_daily_return = result_df.copy()
    df_daily_return["daily_return"] = df_daily_return["account_value"].pct_change()
    df_daily_return = df_daily_return.reset_index()

    df_daily_return.loc[0, "daily_return"] = 0.0
    df_daily_return = df_daily_return[["date", "daily_return"]]

    # === Step 6: Save Daily Returns ===
    csv_return_path = os.path.join(target_folder, output_return_csv)
    df_daily_return.to_csv(csv_return_path, index=False)
    print(f"[INFO] Naive portfolio daily returns saved to {csv_return_path}")

    # === Step 7: Generate Positions as Percentages for All Dates ===
    total_value = np.sum(shares * first_prices)
    percentage_allocations = (shares * first_prices) / total_value  # Fixed percentages

    # Repeat the same allocation for every trading date
    dates = pd.to_datetime(trade_mvo.index)
    df_positions = pd.DataFrame([percentage_allocations] * len(dates), columns=range(stock_dimension))
    df_positions.insert(0, 'date', dates)

    # === Step 8: Save Positions ===
    csv_position_path = os.path.join(target_folder, output_position_csv)
    df_positions.to_csv(csv_position_path, index=False)
    print(f"[INFO] Naive portfolio positions saved to {csv_position_path}")

    return df_daily_return, df_positions


## Naive Portfolio Execution

Runs the `run_naive_portfolio_pipeline` across three datasets using an equal-weighted buy-and-hold strategy.

### **Workflow**
- Allocates \$1,000,000 equally across assets.
- Applies a **0.1% buy transaction cost**.
- Holds fixed shares throughout the trading period (2023-04-05 to 2025-04-10).
- Saves daily returns and initial positions in `/naive/` folder for each dataset.

### **Datasets Processed**
1. `2007-2025_no_crypto.csv`
2. `2015-2025_crypto.csv`
3. `2015-2025_no_crypto.csv`

### **Outputs**
- `df_daily_return_naive.csv` : Daily returns  
- `df_positions_naive.csv` : Fixed positions (shares held)


In [108]:
TRADE_START_DATE = '2023-04-05'
TRADE_END_DATE = '2025-04-10'

df_daily_return_naive = run_naive_portfolio_pipeline(
    df=processed_0,
    start_date=TRADE_START_DATE,
    end_date=TRADE_END_DATE,
    initial_fund=1_000_000,
    buy_cost_pct=0.001,  
    output_return_csv='df_daily_return_naive.csv',
    output_position_csv='df_positions_naive.csv',
    original_csv_path='2007-2025_no_crypto.csv',   
    model_name='naive'             
)
df_daily_return_naive = run_naive_portfolio_pipeline(
    df=processed_1,
    start_date=TRADE_START_DATE,
    end_date=TRADE_END_DATE,
    initial_fund=1_000_000,
    buy_cost_pct=0.001,  
    output_return_csv='df_daily_return_naive.csv',
    output_position_csv='df_positions_naive.csv',
    original_csv_path='2015-2025_crypto.csv',   
    model_name='naive'             
)
df_daily_return_naive = run_naive_portfolio_pipeline(
    df=processed_2,
    start_date=TRADE_START_DATE,
    end_date=TRADE_END_DATE,
    initial_fund=1_000_000,
    buy_cost_pct=0.001,  
    output_return_csv='df_daily_return_naive.csv',
    output_position_csv='df_positions_naive.csv',
    original_csv_path='2015-2025_no_crypto.csv',   
    model_name='naive'             
)



[INFO] Naive portfolio daily returns saved to 2007-2025_no_crypto/naive/df_daily_return_naive.csv
[INFO] Naive portfolio positions saved to 2007-2025_no_crypto/naive/df_positions_naive.csv
[INFO] Naive portfolio daily returns saved to 2015-2025_crypto/naive/df_daily_return_naive.csv
[INFO] Naive portfolio positions saved to 2015-2025_crypto/naive/df_positions_naive.csv
[INFO] Naive portfolio daily returns saved to 2015-2025_no_crypto/naive/df_daily_return_naive.csv
[INFO] Naive portfolio positions saved to 2015-2025_no_crypto/naive/df_positions_naive.csv


## `run_mvo_portfolio_pipeline(...)`

Executes a Mean-Variance Optimization (MVO) strategy to compute optimal portfolio weights, track returns, and save outputs.

### **Features**
- Optimizes portfolio using **Max Sharpe Ratio** with `PyPortfolioOpt`.
- Static allocation based on training data.
- Calculates portfolio value and daily returns over trading period.
- Saves outputs in `<dataset>/mvo/` folder.

### **Parameters**
- `train_start_date`, `train_end_date`: Training period for optimization.
- `trade_start_date`, `trade_end_date`: Trading period for applying weights.
- `initial_fund`: Starting capital.
- `weight_bounds`: Constraints for asset weights.
- `original_csv_path`: For folder structuring.

### **Outputs**
- `df_daily_return_mvo.csv`: Daily returns.
- `df_positions_mvo.csv`: Static portfolio weights.

### **Returns**
- Tuple: `(df_daily_return, df_positions)`


In [109]:
import os
import pandas as pd
import numpy as np
from pypfopt.efficient_frontier import EfficientFrontier

def run_mvo_portfolio_pipeline(df, 
                                train_start_date, 
                                train_end_date, 
                                trade_start_date, 
                                trade_end_date, 
                                initial_fund=1_000_000, 
                                weight_bounds=(0.01, 0.25), 
                                output_return_csv='df_daily_return_mvo.csv',
                                output_position_csv='df_positions_mvo.csv',
                                original_csv_path='data.csv'):
    """
    Compute MVO portfolio returns and positions. Save outputs in structured folders.

    Outputs:
    - Daily return CSV
    - Static position (weights) CSV
    """

    # === Step 0: Setup Folder Structure ===
    base_name = os.path.splitext(os.path.basename(original_csv_path))[0]
    model_name = "mvo"
    target_folder = os.path.join(base_name, model_name)

    if not os.path.exists(target_folder):
        os.makedirs(target_folder)
        print(f"[INFO] Created folder: {target_folder}")

    output_csv_full_path = os.path.join(target_folder, output_return_csv)
    output_position_csv_full_path = os.path.join(target_folder, output_position_csv)

    # === Step 1: Split Data ===
    train = data_split(df, train_start_date, train_end_date).reset_index(drop=True)
    trade = data_split(df, trade_start_date, trade_end_date).reset_index(drop=True)

    # === Step 2: Process Data for MVO ===
    StockData = process_df_for_mvo(train)
    TradeData = process_df_for_mvo(trade)

    # === Step 3: Compute Returns, Mean, Covariance ===
    arStockPrices = np.asarray(StockData)
    Rows, Cols = arStockPrices.shape
    arReturns = StockReturnsComputing(arStockPrices, Rows, Cols)

    meanReturns = np.mean(arReturns, axis=0)
    covReturns = np.cov(arReturns, rowvar=False)

    # === Step 4: Perform Mean-Variance Optimization ===
    ef_mean = EfficientFrontier(meanReturns, covReturns, weight_bounds=weight_bounds)
    ef_mean.max_sharpe()
    cleaned_weights_mean = ef_mean.clean_weights()

    # Save weights JSON (optional)
    # weights_path = os.path.join(target_folder, "optimized_weights.json")
    # pd.Series(cleaned_weights_mean).to_json(weights_path)
    # print(f"[INFO] Optimized weights saved to {weights_path}")

    # === Step 5: Allocate Capital ===
    allocation = np.array([initial_fund * cleaned_weights_mean[tic] for tic in cleaned_weights_mean.keys()])
    first_prices = TradeData.iloc[0].to_numpy()
    shares = allocation / first_prices

    # === Step 6: Compute Portfolio Value Over Time ===
    portfolio_values = TradeData @ shares
    MVO_result = pd.DataFrame({
        "date": TradeData.index,
        "account_value": portfolio_values
    })

    # === Step 7: Calculate Daily Returns ===
    MVO_result["date"] = pd.to_datetime(MVO_result["date"])
    MVO_result.set_index("date", inplace=True)

    df_daily_return = MVO_result.copy()
    df_daily_return["daily_return"] = df_daily_return["account_value"].pct_change()

    df_daily_return = df_daily_return.reset_index()
    df_daily_return.loc[0, "daily_return"] = 0.0
    df_daily_return = df_daily_return[["date", "daily_return"]]

    # === Step 8: Save Daily Return CSV ===
    df_daily_return.to_csv(output_csv_full_path, index=False)
    print(f"[INFO] MVO daily returns saved to {output_csv_full_path}")

    # === Step 9: Save Positions CSV ===
    # Since MVO is static allocation, repeat weights for each trade date
    df_positions = pd.DataFrame([cleaned_weights_mean] * len(df_daily_return))
    df_positions.insert(0, 'date', df_daily_return['date'])

    df_positions.to_csv(output_position_csv_full_path, index=False)
    print(f"[INFO] MVO portfolio positions saved to {output_position_csv_full_path}")

    return df_daily_return, df_positions


## Mean-Variance Optimization (MVO) Execution

Applies the `run_mvo_portfolio_pipeline` across three datasets using a static Max Sharpe portfolio strategy.

### **Workflow**
- Trains on historical data to compute optimal weights (bounded between **1%** and **25%** per asset).
- Applies static allocation over the trading period (**2023-04-05** to **2025-04-10**).
- Initial fund: **\$1,000,000**.
- Outputs saved in `/mvo/` folder within each dataset directory.

### **Datasets Processed**
1. `2007-2025_no_crypto.csv`  
   - **Train**: 2007-06-01 to 2023-04-04
2. `2015-2025_crypto.csv`  
   - **Train**: 2015-02-02 to 2023-04-04
3. `2015-2025_no_crypto.csv`  
   - **Train**: 2015-02-02 to 2023-04-04

### **Outputs**
- `df_daily_return_mvo.csv` : Daily portfolio returns  
- `df_positions_mvo.csv` : Static optimized weights


In [110]:
TRAIN_START_DATE = '2015-02-02'
TRAIN_END_DATE = '2023-04-04'
TRADE_START_DATE = '2023-04-05'
TRADE_END_DATE = '2025-04-10'

df_daily_return_mvo = run_mvo_portfolio_pipeline(
    df=processed_0,
    train_start_date='2007-06-01',
    train_end_date=TRAIN_END_DATE,
    trade_start_date=TRADE_START_DATE,
    trade_end_date=TRADE_END_DATE,
    initial_fund=1_000_000,
    weight_bounds=(0.01, 0.25),
    output_return_csv='df_daily_return_mvo.csv',
    output_position_csv='df_positions_mvo.csv',
    original_csv_path='2007-2025_no_crypto.csv'
)

df_daily_return_mvo = run_mvo_portfolio_pipeline(
    df=processed_1,
    train_start_date=TRAIN_START_DATE,
    train_end_date=TRAIN_END_DATE,
    trade_start_date=TRADE_START_DATE,
    trade_end_date=TRADE_END_DATE,
    initial_fund=1_000_000,
    weight_bounds=(0.01, 0.25),
    output_return_csv='df_daily_return_mvo.csv',
    output_position_csv='df_positions_mvo.csv',
    original_csv_path='2015-2025_crypto.csv'
)

df_daily_return_mvo = run_mvo_portfolio_pipeline(
    df=processed_2,
    train_start_date=TRAIN_START_DATE,
    train_end_date=TRAIN_END_DATE,
    trade_start_date=TRADE_START_DATE,
    trade_end_date=TRADE_END_DATE,
    initial_fund=1_000_000,
    weight_bounds=(0.01, 0.25),
    output_return_csv='df_daily_return_mvo.csv',
    output_position_csv='df_positions_mvo.csv',
    original_csv_path='2015-2025_no_crypto.csv'
)


[INFO] MVO daily returns saved to 2007-2025_no_crypto/mvo/df_daily_return_mvo.csv
[INFO] MVO portfolio positions saved to 2007-2025_no_crypto/mvo/df_positions_mvo.csv
[INFO] MVO daily returns saved to 2015-2025_crypto/mvo/df_daily_return_mvo.csv
[INFO] MVO portfolio positions saved to 2015-2025_crypto/mvo/df_positions_mvo.csv
[INFO] MVO daily returns saved to 2015-2025_no_crypto/mvo/df_daily_return_mvo.csv
[INFO] MVO portfolio positions saved to 2015-2025_no_crypto/mvo/df_positions_mvo.csv


## `run_rolling_mvo_pipeline(...)`

Implements a **Rolling Mean-Variance Optimization (MVO)** strategy with periodic rebalancing based on a sliding window.

### **Features**
- Dynamically recalculates optimal portfolio weights every `window_size` days.
- Uses **Max Sharpe Ratio** or falls back to **Min Volatility** if needed.
- Supports cumulative training data extension (`train_window_extend`).
- Tracks daily portfolio returns and expands positions across rebalancing periods.
- Saves outputs in `<dataset>/adaptive_mvo/` folder.

### **Parameters**
- `train_start_date`, `train_end_date`: Initial training period.
- `trade_start_date`, `trade_end_date`: Trading & rebalancing period.
- `window_size`: Days per rebalance window.
- `weight_bounds`: Allocation constraints per asset.
- `initial_fund`: Starting capital.
- `buy_cost_pct`, `sell_cost_pct`: Transaction costs.

### **Outputs**
- `df_daily_return_adaptive_mvo.csv`: Daily returns.
- `df_positions_adaptive_mvo.csv`: Expanded daily positions (weights).

### **Returns**
- Tuple: `(df_daily_return, df_positions)`


In [111]:
import os
import pandas as pd
import numpy as np
from pypfopt.efficient_frontier import EfficientFrontier

def run_rolling_mvo_pipeline(df,
                              train_start_date,
                              train_end_date,
                              trade_start_date,
                              trade_end_date,
                              window_size=63,
                              train_window_extend=True,
                              initial_fund=1_000_000,
                              weight_bounds=(0.0, 0.5),
                              buy_cost_pct=0.0,
                              sell_cost_pct=0.0,
                              original_csv_path='data.csv',
                              model_name='adaptive_mvo',
                              output_return_csv='df_daily_return_adaptive_mvo.csv',
                              output_position_csv='df_positions_adaptive_mvo.csv'):
    """
    Run Rolling Mean-Variance Optimization (MVO) with rebalancing.
    Outputs:
      - Daily returns CSV
      - Expanded daily positions CSV (weights repeated until next rebalance)
    """

    # === Step 1: Setup Folder Structure ===
    base_csv_name = os.path.splitext(os.path.basename(original_csv_path))[0]
    target_folder = os.path.join(base_csv_name, model_name)
    if not os.path.exists(target_folder):
        os.makedirs(target_folder)
        print(f"[INFO] Created folder: {target_folder}")

    output_return_csv_path = os.path.join(target_folder, output_return_csv)
    output_position_csv_path = os.path.join(target_folder, output_position_csv)

    # === Step 2: Data Preparation ===
    train_df = data_split(df, train_start_date, train_end_date).reset_index(drop=True)
    trade_df = data_split(df, trade_start_date, trade_end_date).reset_index(drop=True)

    unique_dates = trade_df.date.unique()
    total_windows = len(unique_dates) // window_size

    portfolio_values = pd.DataFrame(columns=["account_value"], dtype=float)
    portfolio_dates = []
    weights_log = []
    rebalance_dates = []

    train_df_window = train_df.copy()

    for w in range(total_windows):
        print(f"\n[Rebalancing] Window {w+1}/{total_windows}")

        start_idx = w * window_size
        end_idx = (w + 1) * window_size
        window_dates = unique_dates[start_idx:end_idx]
        trade_df_window = trade_df[trade_df['date'].isin(window_dates)].copy()

        train_mvo = process_df_for_mvo(train_df_window)
        trade_mvo = process_df_for_mvo(trade_df_window)

        if train_mvo.empty or len(train_mvo) < 2 or train_mvo.shape[1] < 3:
            print(f"[Window {w}] Skipped due to insufficient data.")
            continue

        arReturns = StockReturnsComputing(np.asarray(train_mvo), *train_mvo.shape)
        meanReturns = pd.Series(np.mean(arReturns, axis=0), index=train_mvo.columns)
        covReturns = pd.DataFrame(np.cov(arReturns, rowvar=False), index=train_mvo.columns, columns=train_mvo.columns)

        if meanReturns.std() < 1e-4:
            print(f"[Window {w}] Skipped: flat mean returns.")
            continue

        try:
            ef_mean = EfficientFrontier(meanReturns, covReturns, weight_bounds=weight_bounds)
            ef_mean.max_sharpe()
            cleaned_weights_mean = ef_mean.clean_weights()
        except:
            print(f"[Window {w}] Fallback to min_volatility.")
            ef_mean = EfficientFrontier(meanReturns, covReturns, weight_bounds=weight_bounds)
            ef_mean.min_volatility()
            cleaned_weights_mean = ef_mean.clean_weights()

        weights_log.append(cleaned_weights_mean)
        rebalance_dates.append(window_dates[0])  # First date of each window

        mvo_weights = np.array([
            initial_fund * (1 - buy_cost_pct) * cleaned_weights_mean[key]
            for key in cleaned_weights_mean.keys()
        ])

        first_prices = trade_mvo.head(1).to_numpy()[0]
        if np.any(first_prices == 0):
            print(f"[Window {w}] Skipped: zero prices detected.")
            continue

        shares = mvo_weights / first_prices
        portfolio_series = trade_mvo @ shares
        MVO_result = pd.DataFrame(portfolio_series, columns=["account_value"])

        dates_in_window = trade_df_window['date'].drop_duplicates().sort_values().tolist()
        portfolio_dates.extend(dates_in_window)
        portfolio_values = pd.concat([portfolio_values, MVO_result], ignore_index=True)

        if train_window_extend:
            train_df_window = pd.concat([train_df_window, trade_df_window], ignore_index=True)

        initial_fund = MVO_result["account_value"].iloc[-1] * (1 - sell_cost_pct)

    # === Step 3: Finalize Portfolio Values ===
    portfolio_values.index = pd.to_datetime(portfolio_dates)
    df_daily_return = portfolio_values.copy()
    df_daily_return["daily_return"] = df_daily_return["account_value"].pct_change()
    df_daily_return = df_daily_return.reset_index().rename(columns={"index": "date"})
    df_daily_return.loc[0, "daily_return"] = 0.0
    df_daily_return = df_daily_return[["date", "daily_return"]]

    df_daily_return.to_csv(output_return_csv_path, index=False)
    print(f"[INFO] Rolling MVO daily returns saved to {output_return_csv_path}")

    # === Step 4: Expand Positions for Each Date ===
    expanded_positions = []
    all_dates = pd.to_datetime(portfolio_dates)

    for idx, weights in enumerate(weights_log):
        start_date = pd.to_datetime(rebalance_dates[idx])
        if idx + 1 < len(rebalance_dates):
            end_date = pd.to_datetime(rebalance_dates[idx + 1])
            mask = (all_dates >= start_date) & (all_dates < end_date)
        else:
            mask = (all_dates >= start_date)

        dates_in_window = all_dates[mask]

        weight_values = list(weights.values())
        for d in dates_in_window:
            expanded_positions.append([d] + weight_values)

    df_positions = pd.DataFrame(expanded_positions)
    df_positions.columns = ['date'] + [str(i) for i in range(len(weights_log[0]))]

    df_positions.to_csv(output_position_csv_path, index=False)
    print(f"[INFO] Rolling MVO expanded positions saved to {output_position_csv_path}")

    return df_daily_return, df_positions

## Rolling MVO Strategy Execution

Applies the `run_rolling_mvo_pipeline` across three datasets with annual (252-day) rebalancing using adaptive Mean-Variance Optimization.

### **Workflow**
- Recalculates optimal portfolio weights every **252 days**.
- Training starts with historical data and extends over time.
- Initial capital: **\$1,000,000** with weight bounds between **1%** and **25%**.
- Trading period: **2023-04-05** to **2025-04-10**.
- Outputs stored in `/adaptive_mvo/` folders for each dataset.

### **Datasets Processed**
1. `2007-2025_no_crypto.csv`  
   - **Train**: 2007-06-01 to 2023-04-04
2. `2015-2025_crypto.csv`  
   - **Train**: 2015-02-02 to 2023-04-04
3. `2015-2025_no_crypto.csv`  
   - **Train**: 2015-02-02 to 2023-04-04

### **Outputs**
- `df_daily_return_adaptive_mvo.csv` : Daily returns  
- `df_positions_adaptive_mvo.csv` : Expanded adaptive portfolio weights


In [112]:
TRAIN_START_DATE = '2015-02-02'
TRAIN_END_DATE = '2023-04-04'
TRADE_START_DATE = '2023-04-05'
TRADE_END_DATE = '2025-04-10'

df_daily_return, weights_log = run_rolling_mvo_pipeline(
    df=processed_0,
    train_start_date='2007-06-01',
    train_end_date=TRAIN_END_DATE,
    trade_start_date=TRADE_START_DATE,
    trade_end_date=TRADE_END_DATE,
    window_size=252,
    initial_fund=1_000_000,
    weight_bounds=(0.01, 0.25),
    original_csv_path='2007-2025_no_crypto.csv',  
    model_name='adaptive_mvo',
    output_return_csv='df_daily_return_adaptive_mvo.csv',
    output_position_csv='df_positions_adaptive_mvo.csv'
)

df_daily_return, weights_log = run_rolling_mvo_pipeline(
    df=processed_1,
    train_start_date=TRAIN_START_DATE,
    train_end_date=TRAIN_END_DATE,
    trade_start_date=TRADE_START_DATE,
    trade_end_date=TRADE_END_DATE,
    window_size=252,
    initial_fund=1_000_000,
    weight_bounds=(0.01, 0.25),
    original_csv_path='2015-2025_crypto.csv',   
    model_name='adaptive_mvo',
    output_return_csv='df_daily_return_adaptive_mvo.csv',
    output_position_csv='df_positions_adaptive_mvo.csv'
)

df_daily_return, weights_log = run_rolling_mvo_pipeline(
    df=processed_2,
    train_start_date=TRAIN_START_DATE,
    train_end_date=TRAIN_END_DATE,
    trade_start_date=TRADE_START_DATE,
    trade_end_date=TRADE_END_DATE,
    window_size=252,
    initial_fund=1_000_000,
    weight_bounds=(0.01, 0.25),
    original_csv_path='2015-2025_no_crypto.csv',   
    model_name='adaptive_mvo',
    output_return_csv='df_daily_return_adaptive_mvo.csv',
    output_position_csv='df_positions_adaptive_mvo.csv'
)



[Rebalancing] Window 1/2

[Rebalancing] Window 2/2
[INFO] Rolling MVO daily returns saved to 2007-2025_no_crypto/adaptive_mvo/df_daily_return_adaptive_mvo.csv
[INFO] Rolling MVO expanded positions saved to 2007-2025_no_crypto/adaptive_mvo/df_positions_adaptive_mvo.csv

[Rebalancing] Window 1/2

[Rebalancing] Window 2/2
[INFO] Rolling MVO daily returns saved to 2015-2025_crypto/adaptive_mvo/df_daily_return_adaptive_mvo.csv
[INFO] Rolling MVO expanded positions saved to 2015-2025_crypto/adaptive_mvo/df_positions_adaptive_mvo.csv

[Rebalancing] Window 1/2

[Rebalancing] Window 2/2
[INFO] Rolling MVO daily returns saved to 2015-2025_no_crypto/adaptive_mvo/df_daily_return_adaptive_mvo.csv
[INFO] Rolling MVO expanded positions saved to 2015-2025_no_crypto/adaptive_mvo/df_positions_adaptive_mvo.csv
