# Stock NeurIPS2018 Part 3. Backtest
This series is a reproduction of paper *the process in the paper Practical Deep Reinforcement Learning Approach for Stock Trading*.

This is the third and last part of the NeurIPS2018 series, introducing how to use use the agents we trained to do backtest, and compare with baselines such as Mean Variance Optimization and DJIA index.

Other demos can be found at the repo of [FinRL-Tutorials]((https://github.com/AI4Finance-Foundation/FinRL-Tutorials)).

# Part 1. Install Packages

In [136]:
## install required packages
!pip install swig
!pip install wrds
!pip install pyportfolioopt
## install finrl library
!pip install git+https://github.com/AI4Finance-Foundation/FinRL.git
!pip install pandas_market_calendars


Collecting git+https://github.com/AI4Finance-Foundation/FinRL.git
  Cloning https://github.com/AI4Finance-Foundation/FinRL.git to /private/var/folders/ks/bjl76g8d4zxgw0m5p8z2pd9r0000gn/T/pip-req-build-ik3f7ypb
  Running command git clone --filter=blob:none --quiet https://github.com/AI4Finance-Foundation/FinRL.git /private/var/folders/ks/bjl76g8d4zxgw0m5p8z2pd9r0000gn/T/pip-req-build-ik3f7ypb
  Resolved https://github.com/AI4Finance-Foundation/FinRL.git to commit d25d902a6de54931a329adc38a2663e8f576adc4
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting elegantrl@ git+https://github.com/AI4Finance-Foundation/ElegantRL.git (from finrl==0.3.8)
  Cloning https://github.com/AI4Finance-Foundation/ElegantRL.git to /private/var/folders/ks/bjl76g8d4zxgw0m5p8z2pd9r0000gn/T/pip-install-ma5qwv3i/elegantrl_4429c015343d4832a2317546f33194d4
  Running command git clone --

In [137]:
# ===========================
# Suppress Warnings
# ===========================
import warnings
warnings.filterwarnings("ignore")

# ===========================
# Standard Libraries
# ===========================
import os
import sys
import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
matplotlib.use('Agg')  

# ===========================
# Enable Inline Plotting (Jupyter)
# ===========================
%matplotlib inline

# ===========================
# FinRL Imports
# ===========================
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.main import check_and_make_directories
from finrl.config import (
    TRAINED_MODEL_DIR,
    TENSORBOARD_LOG_DIR,
    RESULTS_DIR,
    INDICATORS,
)

# ===========================
# Create Necessary Directories
# ===========================
check_and_make_directories([
    TRAINED_MODEL_DIR,
    TENSORBOARD_LOG_DIR,
    RESULTS_DIR
])


# ===========================
# Custom Imports (model.py)
# ===========================
sys.path.append(os.path.abspath("."))  
from models import DRLEnsembleAgent

sys.path.append("../FinRL-Library")


NameError: name 'matplotlib' is not defined

# Part 2. Backtesting

In [None]:
TRAIN_START_DATE = '2016-02-02'
TRAIN_END_DATE = '2023-04-04'
TRADE_START_DATE = '2023-04-05'
TRADE_END_DATE = '2025-04-10'

In [None]:
def process_csv_to_features(csv_path):
    # Step 1: Load Data
    df = pd.read_csv(csv_path)

    # Step 2: Identify 5-day and 7-day tickers
    day_values_per_tic = df.groupby('tic')['day'].apply(lambda x: sorted(x.unique())).reset_index()
    day_values_per_tic.columns = ['tic', 'unique_days']

    tics_5day = day_values_per_tic[day_values_per_tic['unique_days'].apply(lambda x: x == list(range(5)))]['tic']
    tics_7day = day_values_per_tic[day_values_per_tic['unique_days'].apply(lambda x: x == list(range(7)))]['tic']

    df_5day_full = df[df['tic'].isin(tics_5day)]
    df_7day_full = df[df['tic'].isin(tics_7day)]

    # Step 3: Apply Technical Indicators
    fe_ti = FeatureEngineer(
        use_technical_indicator=True,
        use_turbulence=False,
        user_defined_feature=False
    )
    df_5day_full = fe_ti.preprocess_data(df_5day_full)
    df_7day_full = fe_ti.preprocess_data(df_7day_full)

    # Step 4: Combine and Clean Index
    combined_df = pd.concat([df_5day_full, df_7day_full], ignore_index=False)
    combined_df.index = range(len(combined_df))

    # Step 5: Remove dates with only one ticker
    combined_df['date'] = pd.to_datetime(combined_df['date'])
    combined_df = combined_df[combined_df.groupby('date')['date'].transform('count') > 1]
    combined_df = combined_df.sort_values(['date', 'tic']).reset_index(drop=True)

    # Step 6: Apply Turbulence Feature
    fe_turb = FeatureEngineer(
        use_technical_indicator=False,
        use_turbulence=True,
        user_defined_feature=False
    )
    processed = fe_turb.preprocess_data(combined_df)

    # Step 7: Final Cleaning
    processed = processed.copy()
    processed = processed.fillna(0)
    processed = processed.replace(np.inf, 0)

    return processed


In [None]:
df = process_csv_to_features('data.csv')
print(df.head())

In [None]:
train = data_split(df, TRAIN_START_DATE, TRAIN_END_DATE).reset_index(drop=True)
trade = data_split(df, TRADE_START_DATE, TRADE_END_DATE).reset_index(drop=True)


In [None]:
trade.head()

Unnamed: 0,date,close,high,low,open,volume,tic,day
0,2023-04-05,100.53,100.98,100.41,100.5,9883500.0,agg,2
1,2023-04-05,91.53,91.54,91.53,91.53,6913877.0,bil,2
2,2023-04-05,28175.37,28799.99,27817.32,28178.66,15066.72,btcusd,2
3,2023-04-05,187.83,188.86,186.78,188.34,11440830.0,gld,2
4,2023-04-05,407.6,408.7,405.88,407.91,65200240.0,spy,2


To backtest the agents, upload trade_data.csv in the same directory of this notebook. For Colab users, just upload trade_data.csv to the default directory.

In [None]:

stock_dimension = len(trade.tic.unique())


# Part 3: Mean Variance Optimization

Mean Variance optimization is a very classic strategy in portfolio management. Here, we go through the whole process to do the mean variance optimization and add it as a baseline to compare.

First, process dataframe to the form for MVO weight calculation.

In [None]:
def process_df_for_mvo(df):
    df = df.sort_values(['date', 'tic'], ignore_index=True)[['date', 'tic', 'close']]
    all_tickers = sorted(df['tic'].unique())
    ticker_index = {tic: idx for idx, tic in enumerate(all_tickers)}
    stock_dimension = len(all_tickers)

    mvo = pd.DataFrame(columns=all_tickers)

    grouped = df.groupby('date')
    for date, group in grouped:
        row = [np.nan] * stock_dimension
        for _, row_data in group.iterrows():
            row[ticker_index[row_data['tic']]] = row_data['close']
        if not any(pd.isna(row)):  # only include dates with all tickers
            mvo.loc[date] = row

    return mvo


### Helper functions for mean returns and variance-covariance matrix

In [None]:
# Codes in this section partially refer to Dr G A Vijayalakshmi Pai

# https://www.kaggle.com/code/vijipai/lesson-5-mean-variance-optimization-of-portfolios/notebook

def StockReturnsComputing(StockPrice, Rows, Columns):
  import numpy as np
  StockReturn = np.zeros([Rows-1, Columns])
  for j in range(Columns):        # j: Assets
    for i in range(Rows-1):     # i: Daily Prices
      StockReturn[i,j]=((StockPrice[i+1, j]-StockPrice[i,j])/StockPrice[i,j])* 100

  return StockReturn

In [143]:
def run_naive_portfolio_pipeline(df, 
                                  start_date, 
                                  end_date, 
                                  initial_fund=1_000_000, 
                                  buy_cost_pct=0.0, 
                                  output_return_csv='df_daily_return_naive.csv'):
    """
    Compute naive equal-weighted portfolio returns over a date range and export to CSV.

    Parameters:
    -----------
    df : DataFrame
        Processed dataframe containing 'date', 'tic', and 'close'.
    start_date : str
        Start date in 'YYYY-MM-DD' format.
    end_date : str
        End date in 'YYYY-MM-DD' format.
    initial_fund : float
        Starting capital.
    buy_cost_pct : float
        Transaction cost percentage on initial buy.
    output_return_csv : str
        Filename for saving daily returns CSV.
    """

    # Step 1: Filter trade data
    trade_df = data_split(df, start_date, end_date).reset_index(drop=True)
    trade_df = trade_df.sort_values(['date', 'tic']).reset_index(drop=True)

    # Step 2: Process for MVO-like structure
    trade_mvo = process_df_for_mvo(trade_df)

    if trade_mvo.empty or len(trade_mvo) < 1:
        raise ValueError("Insufficient data for naive portfolio.")

    tickers = trade_mvo.columns.tolist()
    stock_dimension = len(tickers)

    # Step 3: Equal weight allocation
    first_prices = trade_mvo.iloc[0].to_numpy()
    if np.any(first_prices == 0):
        raise ValueError("Zero price detected in first trading day.")

    equal_weight = 1.0 / stock_dimension
    allocation_per_asset = initial_fund * (1 - buy_cost_pct) * equal_weight
    shares = allocation_per_asset / first_prices

    # Step 4: Calculate portfolio value
    portfolio_values = trade_mvo @ shares
    result_df = pd.DataFrame({
        "date": trade_mvo.index,
        "account_value": portfolio_values
    })

    # Step 5: Compute daily returns
    result_df["date"] = pd.to_datetime(result_df["date"])
    result_df.set_index("date", inplace=True)

    df_daily_return = result_df.copy()
    df_daily_return["daily_return"] = df_daily_return["account_value"].pct_change()
    df_daily_return = df_daily_return.reset_index()

    df_daily_return.loc[0, "daily_return"] = 0.0
    df_daily_return = df_daily_return[["date", "daily_return"]]

    # Step 6: Export
    df_daily_return.to_csv(output_return_csv, index=False)
    print(f"[INFO] Naive portfolio daily returns saved to {output_return_csv}")

    return df_daily_return


In [145]:
df_daily_return_naive = run_naive_portfolio_pipeline(
    df=df,
    start_date=TRADE_START_DATE,
    end_date=TRADE_END_DATE,
    initial_fund=1_000_000,
    buy_cost_pct=0.001,  
    output_return_csv='naive_portfolio_2024.csv'
)


[INFO] Naive portfolio daily returns saved to naive_portfolio_2024.csv


### Calculate the weights for mean-variance

In [None]:
trade.head()

Unnamed: 0,date,close,high,low,open,volume,tic,day
0,2023-04-05,100.53,100.98,100.41,100.5,9883500.0,agg,2
1,2023-04-05,91.53,91.54,91.53,91.53,6913877.0,bil,2
2,2023-04-05,28175.37,28799.99,27817.32,28178.66,15066.72,btcusd,2
3,2023-04-05,187.83,188.86,186.78,188.34,11440830.0,gld,2
4,2023-04-05,407.6,408.7,405.88,407.91,65200240.0,spy,2


In [None]:
StockData = process_df_for_mvo(train)

TradeData = process_df_for_mvo(trade)

TradeData.to_numpy()

array([[  100.53,    91.53, 28175.37, ...,    81.83,   206.91,    40.25],
       [  100.44,    91.54, 28053.46, ...,    82.39,   207.1 ,    40.49],
       [   99.75,    91.55, 29660.41, ...,    82.77,   208.59,    40.4 ],
       ...,
       [   98.2 ,    91.49, 79140.01, ...,    81.99,   232.36,    40.43],
       [   97.69,    91.51, 76251.64, ...,    79.79,   228.54,    39.89],
       [   97.91,    91.51, 82594.75, ...,    84.55,   246.69,    42.49]])

In [None]:
#compute asset returns
arStockPrices = np.asarray(StockData)
[Rows, Cols]=arStockPrices.shape
arReturns = StockReturnsComputing(arStockPrices, Rows, Cols)

#compute mean returns and variance covariance matrix of returns
meanReturns = np.mean(arReturns, axis = 0)
covReturns = np.cov(arReturns, rowvar=False)

#set precision for printing results
np.set_printoptions(precision=3, suppress = True)

#display mean returns and variance-covariance matrix of returns
print('Mean returns of assets in k-portfolio 1\n', meanReturns)
print('Variance-Covariance matrix of returns\n', covReturns)

Mean returns of assets in k-portfolio 1
 [-0.004  0.     0.348  0.034  0.05   0.045  0.014  0.045  0.026]
Variance-Covariance matrix of returns
 [[ 0.111  0.001  0.172  0.112  0.042  0.053  0.109  0.055  0.045]
 [ 0.001  0.001  0.001 -0.001 -0.    -0.     0.    -0.    -0.   ]
 [ 0.172  0.001 21.561  0.45   1.286  1.504  1.163  1.378  1.232]
 [ 0.112 -0.001  0.45   0.797  0.046  0.045  0.153  0.067  0.17 ]
 [ 0.042 -0.     1.286  0.046  1.428  1.538  1.266  1.477  1.17 ]
 [ 0.053 -0.     1.504  0.045  1.538  1.971  1.52   1.75   1.343]
 [ 0.109  0.     1.163  0.153  1.266  1.52   1.935  1.437  1.023]
 [ 0.055 -0.     1.378  0.067  1.477  1.75   1.437  1.651  1.264]
 [ 0.045 -0.     1.232  0.17   1.17   1.343  1.023  1.264  1.689]]


### Use PyPortfolioOpt

In [None]:
from pypfopt.efficient_frontier import EfficientFrontier



In [None]:
ef_mean = EfficientFrontier(meanReturns, covReturns, weight_bounds=(0.01, 0.25))
raw_weights_mean = ef_mean.max_sharpe()
cleaned_weights_mean = ef_mean.clean_weights()
mvo_weights = np.array([1000000 * cleaned_weights_mean[key] for key in cleaned_weights_mean.keys()])
mvo_weights

array([ 10000., 250000., 200000., 250000., 250000.,  10000.,  10000.,
        10000.,  10000.])

In [None]:
FirstTradePrice = np.array([1/p for p in TradeData.head(1).to_numpy()[0]])
Initial_Portfolio = np.multiply(mvo_weights, FirstTradePrice)


In [None]:
Portfolio_Assets = TradeData @ Initial_Portfolio
MVO_result = pd.DataFrame(Portfolio_Assets, columns=["account_value"])
MVO_result

Unnamed: 0,account_value
2023-04-05,1.000000e+06
2023-04-06,9.984852e+05
2023-04-10,1.008480e+06
2023-04-11,1.014441e+06
2023-04-12,1.012220e+06
...,...
2025-04-03,1.605777e+06
2025-04-04,1.580196e+06
2025-04-07,1.537157e+06
2025-04-08,1.512924e+06


In [None]:
# Assuming MVO_result has datetime index and 'account_value' column
df_daily_return = MVO_result.copy()

# Compute daily returns
df_daily_return["daily_return"] = df_daily_return["account_value"].pct_change()

# Reset index to make 'date' a column
df_daily_return = df_daily_return.reset_index().rename(columns={"index": "date"})

# Replace NaN in first row with 0.0 using loc (best practice)
df_daily_return.loc[0, "daily_return"] = 0.0

# Keep only required columns
df_daily_return = df_daily_return[["date", "daily_return"]]

# Preview
df_daily_return.head()

df_daily_return.to_csv('df_daily_return_mvo.csv')


In [146]:
def run_mvo_portfolio_pipeline(df, 
                                train_start_date, 
                                train_end_date, 
                                trade_start_date, 
                                trade_end_date, 
                                initial_fund=1_000_000, 
                                weight_bounds=(0.01, 0.25), 
                                output_return_csv='df_daily_return_mvo.csv'):
    """
    Compute MVO (Mean-Variance Optimization) portfolio returns and export daily returns to CSV.

    Parameters:
    -----------
    df : DataFrame
        Processed dataframe containing 'date', 'tic', and 'close'.
    train_start_date : str
        Training start date in 'YYYY-MM-DD'.
    train_end_date : str
        Training end date in 'YYYY-MM-DD'.
    trade_start_date : str
        Trading start date in 'YYYY-MM-DD'.
    trade_end_date : str
        Trading end date in 'YYYY-MM-DD'.
    initial_fund : float
        Starting capital for the portfolio.
    weight_bounds : tuple
        Bounds for asset weights in optimization.
    output_return_csv : str
        Filename to save the daily return CSV.
    """

    # Step 1: Split data
    train = data_split(df, train_start_date, train_end_date).reset_index(drop=True)
    trade = data_split(df, trade_start_date, trade_end_date).reset_index(drop=True)

    # Step 2: Process for MVO
    StockData = process_df_for_mvo(train)
    TradeData = process_df_for_mvo(trade)

    # Step 3: Compute returns, mean, and covariance
    arStockPrices = np.asarray(StockData)
    Rows, Cols = arStockPrices.shape
    arReturns = StockReturnsComputing(arStockPrices, Rows, Cols)

    meanReturns = np.mean(arReturns, axis=0)
    covReturns = np.cov(arReturns, rowvar=False)

    # Step 4: Mean-Variance Optimization (Max Sharpe)
    ef_mean = EfficientFrontier(meanReturns, covReturns, weight_bounds=weight_bounds)
    ef_mean.max_sharpe()
    cleaned_weights_mean = ef_mean.clean_weights()

    # Step 5: Allocate capital based on optimized weights
    allocation = np.array([initial_fund * cleaned_weights_mean[tic] for tic in cleaned_weights_mean.keys()])
    first_prices = TradeData.iloc[0].to_numpy()
    shares = allocation / first_prices

    # Step 6: Compute portfolio value over time
    portfolio_values = TradeData @ shares
    MVO_result = pd.DataFrame({
        "date": TradeData.index,
        "account_value": portfolio_values
    })

    # Step 7: Calculate daily returns
    MVO_result["date"] = pd.to_datetime(MVO_result["date"])
    MVO_result.set_index("date", inplace=True)

    df_daily_return = MVO_result.copy()
    df_daily_return["daily_return"] = df_daily_return["account_value"].pct_change()

    df_daily_return = df_daily_return.reset_index()
    df_daily_return.loc[0, "daily_return"] = 0.0
    df_daily_return = df_daily_return[["date", "daily_return"]]

    # Step 8: Export to CSV
    df_daily_return.to_csv(output_return_csv, index=False)
    print(f"[INFO] MVO daily returns saved to {output_return_csv}")

    return df_daily_return


In [147]:
df_daily_return_mvo = run_mvo_portfolio_pipeline(
    df=df,
    train_start_date=TRAIN_START_DATE,
    train_end_date=TRAIN_END_DATE,
    trade_start_date=TRADE_START_DATE,
    trade_end_date=TRADE_END_DATE,
    initial_fund=1_000_000,
    weight_bounds=(0.01, 0.25),
    output_return_csv='mvo_portfolio_2024.csv'
)


[INFO] MVO daily returns saved to mvo_portfolio_2024.csv


In [None]:
from pypfopt.efficient_frontier import EfficientFrontier
import pandas as pd
import numpy as np

def compute_rolling_mvo_rebalance_63(trade_df, train_df, window_size=63, train_window=126):
    trade_df = trade_df.reset_index(drop=True)
    stock_dimension = len(trade_df.tic.unique())
    unique_dates = trade_df.date.unique()
    total_windows = len(unique_dates) // window_size

    # Initialize tracking
    portfolio_values = pd.DataFrame(columns=["account_value"], dtype=float)
    portfolio_dates = []
    weights_log = []
    initial_fund = 1_000_000
    buy_cost_pct = 0.000
    sell_cost_pct = 0.000

    # Initial train data
    train_df_window = train_df.copy()

    for w in range(total_windows):
        print(f"\nRebalancing window {w+1}/{total_windows}...")

        start_idx = w * window_size
        end_idx = (w + 1) * window_size
        window_dates = unique_dates[start_idx:end_idx]
        trade_df_window = trade_df[trade_df['date'].isin(window_dates)].copy()

        # Step 1: Preprocess
        train_mvo = process_df_for_mvo(train_df_window)
        trade_mvo = process_df_for_mvo(trade_df_window)
        if train_mvo.empty or len(train_mvo) < 2:
            print(f"[Window {w}] Skipped: insufficient training data (empty or < 2 rows).")
            continue
        
        if train_mvo.shape[1] < 3:
                print(f"[Window {w}] Skipped: not enough assets for optimization.")
                continue
 

        arStockPrices = np.asarray(train_mvo)
        Rows, Cols = arStockPrices.shape
        arReturns = StockReturnsComputing(arStockPrices, Rows, Cols)

        # Convert to pandas with proper tickers
        meanReturns = pd.Series(np.mean(arReturns, axis=0), index=train_mvo.columns)
        if meanReturns.std() < 1e-4:
            print(f"[Window {w}] Skipped: mean returns too flat to optimize Sharpe.")
            continue
        covReturns = pd.DataFrame(np.cov(arReturns, rowvar=False), index=train_mvo.columns, columns=train_mvo.columns)

        print(f"[Window {w}] Mean returns stats → std: {meanReturns.std():.8f}, min: {meanReturns.min():.6f}, max: {meanReturns.max():.6f}")
        print(f"[Window {w}] Covariance condition number: {np.linalg.cond(covReturns.values):.2e}")
        print(f"[Window {w}] Num assets: {train_mvo.shape[1]}, Num days: {train_mvo.shape[0]}")

        # Step 2: Optimize
        try:
            ef_mean = EfficientFrontier(meanReturns, covReturns, weight_bounds=(0.0, 0.5))
            # raw_weights_mean = ef_mean.max_sharpe(verbose=True)
            raw_weights_mean = ef_mean.max_sharpe()
            cleaned_weights_mean = ef_mean.clean_weights()
        except Exception as e:
            print(f"[Window {w}] Falling back to min_volatility...")
            ef_mean = EfficientFrontier(meanReturns, covReturns, weight_bounds=(0.0, 0.5))
            raw_weights_mean = ef_mean.min_volatility()

        weights_log.append(cleaned_weights_mean)

        mvo_weights = np.array([
            initial_fund * (1 - buy_cost_pct) * cleaned_weights_mean[key]
            for key in cleaned_weights_mean.keys()
        ])

        # Step 3: Compute portfolio value
        FirstTradePrice = np.array([1 / p for p in trade_mvo.head(1).to_numpy()[0]])
        try:
            first_prices = trade_mvo.head(1).to_numpy()[0]
            if np.any(first_prices == 0):
                print(f"[Window {w}] Skipped: contains 0 price(s) in first trade day.")
                continue
            FirstTradePrice = 1 / first_prices
        except Exception as e:
            print(f"[Window {w}] Skipped: error when converting trade prices.\n{e}")
            continue
        Initial_Portfolio = np.multiply(mvo_weights, FirstTradePrice)
        Portfolio_Assets = trade_mvo @ Initial_Portfolio
        MVO_result = pd.DataFrame(Portfolio_Assets, columns=["account_value"])

        # Step 4: Collect results
        dates_in_window = trade_df_window['date'].drop_duplicates().sort_values().tolist()
        portfolio_dates.extend(dates_in_window)
        portfolio_values = pd.concat([portfolio_values, MVO_result], ignore_index=True)

        # Step 5: Update training set and capital
        train_df_window = pd.concat([
            # train_df_window.iloc[stock_dimension * window_size:],  # remove oldest
            trade_df_window
        ], ignore_index=True)

        final_value = MVO_result["account_value"].iloc[-1]
        initial_fund = final_value * (1 - sell_cost_pct)

    # Final formatting
    portfolio_values.index = pd.to_datetime(portfolio_dates)

    return portfolio_values, weights_log


In [None]:

rolling_result, weights_log = compute_rolling_mvo_rebalance_63(trade, train)
rolling_result.head()


Rebalancing window 1/8...
[Window 0] Mean returns stats → std: 0.10898415, min: -0.004482, max: 0.347734
[Window 0] Covariance condition number: 2.45e+04
[Window 0] Num assets: 9, Num days: 1805

Rebalancing window 2/8...
[Window 1] Mean returns stats → std: 0.08097557, min: -0.090125, max: 0.133825
[Window 1] Covariance condition number: 1.26e+03
[Window 1] Num assets: 9, Num days: 63

Rebalancing window 3/8...
[Window 2] Mean returns stats → std: 0.05548225, min: -0.200786, max: -0.000837
[Window 2] Covariance condition number: 7.33e+02
[Window 2] Num assets: 9, Num days: 63
[Window 2] Falling back to min_volatility...

Rebalancing window 4/8...
[Window 3] Mean returns stats → std: 0.23864156, min: -0.001185, max: 0.821228
[Window 3] Covariance condition number: 1.51e+03
[Window 3] Num assets: 9, Num days: 63

Rebalancing window 5/8...
[Window 4] Mean returns stats → std: 0.24680595, min: -0.044944, max: 0.770477
[Window 4] Covariance condition number: 2.47e+03
[Window 4] Num assets

Unnamed: 0,account_value
2023-04-05,1000010.0
2023-04-06,998494.9
2023-04-10,1002841.0
2023-04-11,1006528.0
2023-04-12,1006016.0


In [None]:
# Assuming MVO_result has datetime index and 'account_value' column
df_daily_return = rolling_result.copy()

# Compute daily returns
df_daily_return["daily_return"] = df_daily_return["account_value"].pct_change()

# Reset index to make 'date' a column
df_daily_return = df_daily_return.reset_index().rename(columns={"index": "date"})

# Replace NaN in first row with 0.0 using loc (best practice)
df_daily_return.loc[0, "daily_return"] = 0.0

# Keep only required columns
df_daily_return = df_daily_return[["date", "daily_return"]]

# Preview
df_daily_return.tail()

Unnamed: 0,date,daily_return
499,2025-04-02,-0.014453
500,2025-04-03,0.002384
501,2025-04-04,0.002068
502,2025-04-07,-0.026462
503,2025-04-08,-0.016667


In [None]:
df_daily_return.to_csv('df_daily_return_adaptive_mvo.csv')

In [148]:
def run_rolling_mvo_pipeline(df,
                              train_start_date,
                              train_end_date,
                              trade_start_date,
                              trade_end_date,
                              window_size=63,
                              train_window_extend=True,
                              initial_fund=1_000_000,
                              weight_bounds=(0.0, 0.5),
                              buy_cost_pct=0.0,
                              sell_cost_pct=0.0,
                              output_return_csv='df_daily_return_adaptive_mvo.csv'):
    """
    Run Rolling Mean-Variance Optimization (MVO) with rebalancing and export daily returns.

    Parameters:
    -----------
    df : DataFrame
        Processed dataframe with 'date', 'tic', and 'close'.
    train_start_date, train_end_date : str
        Training period in 'YYYY-MM-DD'.
    trade_start_date, trade_end_date : str
        Trading period in 'YYYY-MM-DD'.
    window_size : int
        Rebalance window size (days).
    train_window_extend : bool
        If True, expand training set with each window.
    initial_fund : float
        Starting capital.
    weight_bounds : tuple
        Bounds for asset weights.
    buy_cost_pct, sell_cost_pct : float
        Transaction cost percentages.
    output_return_csv : str
        Filename to save daily returns.
    """

    train_df = data_split(df, train_start_date, train_end_date).reset_index(drop=True)
    trade_df = data_split(df, trade_start_date, trade_end_date).reset_index(drop=True)

    stock_dimension = len(trade_df.tic.unique())
    unique_dates = trade_df.date.unique()
    total_windows = len(unique_dates) // window_size

    portfolio_values = pd.DataFrame(columns=["account_value"], dtype=float)
    portfolio_dates = []
    weights_log = []

    train_df_window = train_df.copy()

    for w in range(total_windows):
        print(f"\n[Rebalancing] Window {w+1}/{total_windows}")

        start_idx = w * window_size
        end_idx = (w + 1) * window_size
        window_dates = unique_dates[start_idx:end_idx]
        trade_df_window = trade_df[trade_df['date'].isin(window_dates)].copy()

        train_mvo = process_df_for_mvo(train_df_window)
        trade_mvo = process_df_for_mvo(trade_df_window)

        if train_mvo.empty or len(train_mvo) < 2 or train_mvo.shape[1] < 3:
            print(f"[Window {w}] Skipped due to insufficient data.")
            continue

        arReturns = StockReturnsComputing(np.asarray(train_mvo), *train_mvo.shape)
        meanReturns = pd.Series(np.mean(arReturns, axis=0), index=train_mvo.columns)
        covReturns = pd.DataFrame(np.cov(arReturns, rowvar=False), index=train_mvo.columns, columns=train_mvo.columns)

        if meanReturns.std() < 1e-4:
            print(f"[Window {w}] Skipped: flat mean returns.")
            continue

        try:
            ef_mean = EfficientFrontier(meanReturns, covReturns, weight_bounds=weight_bounds)
            ef_mean.max_sharpe()
            cleaned_weights_mean = ef_mean.clean_weights()
        except:
            print(f"[Window {w}] Fallback to min_volatility.")
            ef_mean = EfficientFrontier(meanReturns, covReturns, weight_bounds=weight_bounds)
            ef_mean.min_volatility()
            cleaned_weights_mean = ef_mean.clean_weights()

        weights_log.append(cleaned_weights_mean)

        mvo_weights = np.array([
            initial_fund * (1 - buy_cost_pct) * cleaned_weights_mean[key]
            for key in cleaned_weights_mean.keys()
        ])

        first_prices = trade_mvo.head(1).to_numpy()[0]
        if np.any(first_prices == 0):
            print(f"[Window {w}] Skipped: zero prices detected.")
            continue

        shares = mvo_weights / first_prices
        portfolio_series = trade_mvo @ shares
        MVO_result = pd.DataFrame(portfolio_series, columns=["account_value"])

        dates_in_window = trade_df_window['date'].drop_duplicates().sort_values().tolist()
        portfolio_dates.extend(dates_in_window)
        portfolio_values = pd.concat([portfolio_values, MVO_result], ignore_index=True)

        if train_window_extend:
            train_df_window = pd.concat([train_df_window, trade_df_window], ignore_index=True)

        initial_fund = MVO_result["account_value"].iloc[-1] * (1 - sell_cost_pct)

    portfolio_values.index = pd.to_datetime(portfolio_dates)

    # Calculate daily returns
    df_daily_return = portfolio_values.copy()
    df_daily_return["daily_return"] = df_daily_return["account_value"].pct_change()
    df_daily_return = df_daily_return.reset_index().rename(columns={"index": "date"})
    df_daily_return.loc[0, "daily_return"] = 0.0
    df_daily_return = df_daily_return[["date", "daily_return"]]

    df_daily_return.to_csv(output_return_csv, index=False)
    print(f"[INFO] Rolling MVO daily returns saved to {output_return_csv}")

    return df_daily_return, weights_log


In [150]:
df_daily_return_adaptive_mvo, weights_log = run_rolling_mvo_pipeline(
    df=df,
    train_start_date=TRAIN_START_DATE,
    train_end_date=TRAIN_END_DATE,
    trade_start_date=TRADE_START_DATE,
    trade_end_date=TRADE_END_DATE,
    window_size=63,
    initial_fund=1_000_000,
    weight_bounds=(0.0, 0.5),
    output_return_csv='adaptive_mvo_2024_2025.csv'
)



[Rebalancing] Window 1/8

[Rebalancing] Window 2/8

[Rebalancing] Window 3/8

[Rebalancing] Window 4/8

[Rebalancing] Window 5/8

[Rebalancing] Window 6/8

[Rebalancing] Window 7/8

[Rebalancing] Window 8/8
[INFO] Rolling MVO daily returns saved to adaptive_mvo_2024_2025.csv
