In [9]:
import pandas as pd
import numpy as np
pd.set_option('future.no_silent_downcasting', True)

# Input file path
file_path = "Data For Part B/48_Industry_Portfolios.CSV"

# Step 1: Read data with proper missing value handling
df_firms = pd.read_csv(
    file_path, 
    skiprows=2587, 
    nrows=1182,
    na_values=[-99.99, -999]  # Handle both missing value indicators
)

df_size = pd.read_csv(
    file_path, 
    skiprows=3773, 
    nrows=1182,
    na_values=[-99.99, -999]  # Handle both missing value indicators
)

# Step 2: Process date format
df_firms.rename(columns={df_firms.columns[0]: 'Date'}, inplace=True)
df_size.rename(columns={df_size.columns[0]: 'Date'}, inplace=True)

df_firms['Date'] = pd.to_datetime(df_firms['Date'], format='%Y%m', errors='coerce')
df_size['Date'] = pd.to_datetime(df_size['Date'], format='%Y%m', errors='coerce')

# Step 3: Ensure numerical columns are converted to float type
numeric_cols = df_firms.columns.drop('Date')
for col in numeric_cols:
    df_firms[col] = pd.to_numeric(df_firms[col], errors='coerce')
    df_size[col] = pd.to_numeric(df_size[col], errors='coerce')

# Step 4: Set index for calculations
df_firms.set_index('Date', inplace=True)
df_size.set_index('Date', inplace=True)

# Step 5: Compute market capitalization
df_market_cap = df_firms * df_size

df_market_cap.index = df_market_cap.index.to_period('M')  # Convert index to PeriodIndex

# Step 6: Read the "Sum of BE/Sum of ME" data
df_BM = pd.read_csv(
    file_path, 
    skiprows=4959, 
    nrows=99,
    na_values=[-99.99, -999]  # Handle both missing value indicators
)

df_BM.rename(columns={df_BM.columns[0]: 'Date'}, inplace=True)

# Step 7: Create monthly data
monthly_data = []

for i in range(len(df_BM)):
    year = df_BM.loc[i, 'Date']
    bm_values = df_BM.iloc[i, 1:].values  # Extract B/M ratios excluding 'Date'
    
    for month in range(7, 13):
        month_data = [f"{year}-{month:02d}"]
        month_data.extend(bm_values.tolist())
        monthly_data.append(month_data)
    
    for month in range(1, 7):
        month_data = [f"{year+1}-{month:02d}"]
        month_data.extend(bm_values.tolist())
        monthly_data.append(month_data)

df_BM_monthly = pd.DataFrame(monthly_data, columns=['Date'] + list(df_BM.columns[1:]))

df_BM_monthly['Date'] = pd.to_datetime(df_BM_monthly['Date'], errors='coerce')
df_BM_monthly['Date'] = df_BM_monthly['Date'].dt.to_period('M')
df_BM_monthly.set_index('Date', inplace=True)

# Step 8: Read "Average Value Weighted Returns -- Monthly"
df_return = pd.read_csv(
    file_path, 
    skiprows=11, 
    nrows=1182,
    na_values=[-99.99, -999]  # Handle missing values during import
)

df_return.rename(columns={df_return.columns[0]: 'Date'}, inplace=True)
df_return['Date'] = pd.to_datetime(df_return['Date'], format='%Y%m', errors='coerce')

df_return.set_index('Date', inplace=True)

df_momentum = df_return.rolling(window=12, min_periods=12).mean()
df_momentum.index = df_momentum.index.to_period('M')


In [None]:
#1
print("Market Capitalization Data Sample:")
df_market_cap

Market Capitalization Data Sample:


Unnamed: 0_level_0,Agric,Food,Soda,Beer,Smoke,Toys,Fun,Books,Hshld,Clths,...,Boxes,Trans,Whlsl,Rtail,Meals,Banks,Insur,RlEst,Fin,Other
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1926-07,299.40,1247.60,,21.36,955.52,13.00,171.84,4.33,178.16,220.32,...,212.10,4977.87,2.38,1539.45,64.92,43.50,95.46,45.84,49.60,97.00
1926-08,306.18,1246.00,,20.25,967.52,14.12,176.04,6.50,177.04,237.96,...,227.16,5058.17,1.80,1536.81,66.00,45.51,94.95,47.12,46.65,102.04
1926-09,313.02,1278.00,,25.74,1024.48,16.50,174.60,9.29,169.44,231.48,...,220.92,5281.55,1.90,1521.63,65.64,50.91,97.38,48.94,46.80,108.84
1926-10,308.73,1288.40,,26.76,1030.72,17.88,183.48,8.83,169.84,228.36,...,208.62,5269.14,1.76,1522.95,64.80,49.38,97.98,47.44,44.45,104.64
1926-11,307.02,1236.00,,25.86,1041.28,17.62,174.06,9.31,161.12,228.36,...,196.80,5115.11,1.48,1485.99,61.98,43.56,93.78,44.66,43.95,95.76
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-08,44880.42,429966.50,364008.40,366745.94,266317.80,37403.86,469992.12,31125.00,700076.76,221826.30,...,76173.66,1001728.70,612178.50,4403786.40,715240.96,2955967.19,1897388.87,80933.56,1594860.00,1559522.88
2024-09,45887.28,450420.00,383597.76,369697.79,287486.58,36560.68,497920.56,31633.10,743215.56,234585.90,...,77018.49,1022689.02,604116.25,4431178.18,753160.65,3014625.58,1977556.46,81376.86,1607626.10,1655730.72
2024-10,47034.96,453750.50,384463.03,372284.66,277503.65,37779.04,519229.60,30838.80,750817.98,247426.80,...,79277.58,1047316.50,599328.24,4611681.46,791989.38,2948825.70,1952851.53,86188.68,1646844.28,1632512.40
2024-11,49054.56,433380.00,354876.13,353107.26,300981.70,37887.98,540009.64,30977.20,704199.28,232245.90,...,76416.12,1061504.48,589028.59,4566562.38,786733.02,3093994.32,1867851.40,86779.23,1748620.41,1582960.64


In [None]:
#2
print("\nBook-to-Market Ratio Data Sample:")
df_BM_monthly


Book-to-Market Ratio Data Sample:


Unnamed: 0_level_0,Agric,Food,Soda,Beer,Smoke,Toys,Fun,Books,Hshld,Clths,...,Boxes,Trans,Whlsl,Rtail,Meals,Banks,Insur,RlEst,Fin,Other
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1926-07,0.65,0.63,,2.54,0.79,1.99,0.81,4.34,0.38,0.90,...,0.97,1.41,2.35,0.44,0.66,0.66,0.62,0.88,0.89,0.63
1926-08,0.65,0.63,,2.54,0.79,1.99,0.81,4.34,0.38,0.90,...,0.97,1.41,2.35,0.44,0.66,0.66,0.62,0.88,0.89,0.63
1926-09,0.65,0.63,,2.54,0.79,1.99,0.81,4.34,0.38,0.90,...,0.97,1.41,2.35,0.44,0.66,0.66,0.62,0.88,0.89,0.63
1926-10,0.65,0.63,,2.54,0.79,1.99,0.81,4.34,0.38,0.90,...,0.97,1.41,2.35,0.44,0.66,0.66,0.62,0.88,0.89,0.63
1926-11,0.65,0.63,,2.54,0.79,1.99,0.81,4.34,0.38,0.90,...,0.97,1.41,2.35,0.44,0.66,0.66,0.62,0.88,0.89,0.63
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-02,0.68,0.49,0.1,0.20,0.18,0.36,0.14,0.43,0.15,0.22,...,0.33,0.23,0.26,0.12,0.16,0.66,0.37,0.43,0.35,0.45
2025-03,0.68,0.49,0.1,0.20,0.18,0.36,0.14,0.43,0.15,0.22,...,0.33,0.23,0.26,0.12,0.16,0.66,0.37,0.43,0.35,0.45
2025-04,0.68,0.49,0.1,0.20,0.18,0.36,0.14,0.43,0.15,0.22,...,0.33,0.23,0.26,0.12,0.16,0.66,0.37,0.43,0.35,0.45
2025-05,0.68,0.49,0.1,0.20,0.18,0.36,0.14,0.43,0.15,0.22,...,0.33,0.23,0.26,0.12,0.16,0.66,0.37,0.43,0.35,0.45


In [None]:
#3
print("\nMomentum Data Sample:")
df_momentum


Momentum Data Sample:


Unnamed: 0_level_0,Agric,Food,Soda,Beer,Smoke,Toys,Fun,Books,Hshld,Clths,...,Boxes,Trans,Whlsl,Rtail,Meals,Banks,Insur,RlEst,Fin,Other
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1926-07,,,,,,,,,,,...,,,,,,,,,,
1926-08,,,,,,,,,,,...,,,,,,,,,,
1926-09,,,,,,,,,,,...,,,,,,,,,,
1926-10,,,,,,,,,,,...,,,,,,,,,,
1926-11,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-08,1.801667,0.454167,1.231667,-0.335833,2.620833,-0.441667,2.371667,1.851667,1.003333,-0.075833,...,2.010000,1.155000,1.555833,2.253333,0.927500,2.925000,2.440833,1.892500,2.455833,2.653333
2024-09,1.961667,0.890000,1.831667,0.201667,2.714167,0.455000,3.662500,2.173333,1.658333,0.771667,...,2.496667,1.746667,1.720000,3.105000,1.750833,3.085000,2.142500,3.262500,2.962500,2.767500
2024-10,2.813333,0.948333,1.223333,0.134167,3.752500,1.670833,3.764167,2.395000,1.178333,-0.089167,...,2.484167,2.421667,1.663333,2.970833,1.840000,3.752500,1.492500,4.074167,4.036667,2.690833
2024-11,3.255833,0.645833,0.847500,-0.224167,3.535000,1.533333,4.024167,2.186667,1.348333,-0.320833,...,2.133333,2.101667,1.988333,3.246667,1.721667,3.756667,1.859167,3.896667,4.090000,2.757500


In [22]:
import pandas as pd
import numpy as np
from scipy.optimize import minimize
from collections import defaultdict

###############################################################################
# 0. HELPER: ENSURE WE USE A MONTHLY PeriodIndex
###############################################################################
def ensure_monthly_period_index(df):
    """
    Convert df index to a monthly PeriodIndex if not already.
    If it's already PeriodIndex, we ensure freq='M'.
    """
    if not isinstance(df.index, pd.PeriodIndex):
        df.index = pd.to_datetime(df.index).to_period("M")
    else:
        df.index = df.index.asfreq("M")
    df.sort_index(inplace=True)
    return df

###############################################################################
# 1. CROSS-SECTIONAL STANDARDIZATION
###############################################################################
def cross_sectional_standardize(df_char):
    """
    For each date (row), subtract the cross-sectional mean and divide by
    the cross-sectional std. Ensures zero mean and unit std *across stocks*.
    """
    return df_char.apply(lambda row: (row - row.mean()) / row.std(ddof=1), axis=1)

###############################################################################
# 2. BRANDT ET AL. (2009) EQUATION (16) WEIGHTING
###############################################################################
def brandt_eq16_weights(
    w_bar_i_t,   # baseline weight (float or array), e.g., 0.0
    theta,       # param vector for [z_t ⊗ x_i,t]
    z_t,         # instruments for time t (shape = (#instruments,))
    x_i_t        # cross-sectional chars for asset i (shape = (#chars,))
):
    """
    w_{i,t} = w_bar_{i,t} + theta' [z_t ⊗ x_i,t], possibly * (1/N_t).
    Adjust exact formula to match your specification.
    """
    kron_prod = np.kron(z_t, x_i_t)  # (#instruments * #chars,)
    return w_bar_i_t + np.dot(theta, kron_prod)

def renormalize_weights(weights):
    """Force sum of weights = 1 (budget constraint)."""
    total = weights.sum()
    if abs(total) < 1e-12:
        return weights
    return weights / total

###############################################################################
# 3. CRRA UTILITY (EQ. (6)) AND PORTFOLIO RETURNS
###############################################################################
def portfolio_returns_from_params(
    theta,
    df_returns,      # T x N DataFrame: monthly returns for T months & N stocks
    df_BM_std,       # T x N DataFrame: standardized B/M
    df_MC_std,       # T x N DataFrame: standardized Market Cap
    df_Mom_std,      # T x N DataFrame: standardized Momentum
    z_t_series=None, # if you have time-series instruments
    w_bar=0.0
):
    """
    Given a parameter vector 'theta', form monthly portfolio weights 
    via Eq. (16), then compute realized monthly returns.
    
    If no instruments, set z_t = [1.0].  
    If multiple instruments, shape your 'theta' dimension accordingly.
    """
    common_dates = df_returns.index
    port_rets = []
    
    for date in common_dates:
        # If we have instruments, pull the data for 'date'; else use [1.]
        if z_t_series is not None:
            # e.g., if single instrument => shape (1,)
            z_t = np.array([z_t_series.loc[date]])
        else:
            z_t = np.array([1.0])
        
        # Cross-sectional characteristics at 'date'
        x_bm  = df_BM_std.loc[date, :]
        x_mc  = df_MC_std.loc[date, :]
        x_mom = df_Mom_std.loc[date, :]
        
        # Combine into shape (N_stocks, 3) if you have 3 characteristics
        X = np.vstack([x_bm.values, x_mc.values, x_mom.values]).T
        
        # For each stock, build raw weight
        N_stocks = X.shape[0]
        raw_weights = []
        for i in range(N_stocks):
            w_i_t = brandt_eq16_weights(w_bar, theta, z_t, X[i, :])
            raw_weights.append(w_i_t)
        
        raw_weights = np.array(raw_weights)
        
        # If eq. (16) uses 1/N_t factor outside, do it here
        raw_weights *= (1.0 / N_stocks)
        
        # Renormalize
        final_weights = renormalize_weights(raw_weights)
        
        # Realized return => dot product of final_weights & stock returns
        r_i_t = df_returns.loc[date, :].values
        port_rets.append(np.sum(final_weights * r_i_t))
    
    return np.array(port_rets)

def crra_utility_objective(
    theta, gamma,
    df_returns, df_BM_std, df_MC_std, df_Mom_std
):
    """
    Negative average CRRA utility:
       U = E[ (1 + R_p)^(1-gamma ) / (1-gamma) ].
    Minimizing => multiply by -1.
    """
    p_rets = portfolio_returns_from_params(
        theta=theta,
        df_returns=df_returns,
        df_BM_std=df_BM_std,
        df_MC_std=df_MC_std,
        df_Mom_std=df_Mom_std,
        z_t_series=None  # or your real instruments
    )
    gross = 1.0 + p_rets
    crra_vals = (gross ** (1.0 - gamma)) / (1.0 - gamma)
    return -np.mean(crra_vals)

def estimate_portfolio_policy(
    df_returns, df_BM_std, df_MC_std, df_Mom_std,
    gamma=5.0, dim_x=3
):
    """
    Maximize CRRA utility on (in-sample) returns. 
    If you have 3 characteristics & no instruments, dimension = 3.
    """
    init_theta = np.zeros(dim_x)
    res = minimize(
        fun=lambda th: crra_utility_objective(
            th, gamma=gamma,
            df_returns=df_returns,
            df_BM_std=df_BM_std,
            df_MC_std=df_MC_std,
            df_Mom_std=df_Mom_std
        ),
        x0=init_theta,
        method='BFGS'
    )
    return res.x

###############################################################################
# 4. PUTTING IT ALL TOGETHER: ROLLING OOS WITH SAFEGUARDS
###############################################################################
def run_rolling_oos(df_return, df_BM_monthly, df_market_cap, df_momentum, cutoff_str="1973-12", gamma=5.0):
    """
    Full routine:
      1) Convert all to monthly PeriodIndex
      2) Cross-sectional standardize
      3) Gather common intersection of monthly dates
      4) Partition in-sample vs. out-of-sample
      5) Estimate & roll forward
      6) Compute OOS performance, handle zero stdev
    """
    
    #-------------------- Ensure monthly PeriodIndex
    df_return     = ensure_monthly_period_index(df_return)
    df_BM_monthly = ensure_monthly_period_index(df_BM_monthly)
    df_market_cap = ensure_monthly_period_index(df_market_cap)
    df_momentum   = ensure_monthly_period_index(df_momentum)
    
    #-------------------- Cross-sectional standardization
    df_BM_std  = cross_sectional_standardize(df_BM_monthly)
    df_MC_std  = cross_sectional_standardize(df_market_cap)
    df_Mom_std = cross_sectional_standardize(df_momentum)
    
    #-------------------- Intersection of all indexes
    all_dates = (df_return.index
                 .intersection(df_BM_std.index)
                 .intersection(df_MC_std.index)
                 .intersection(df_Mom_std.index))
    all_dates = sorted(all_dates)
    
    #-------------------- Partition in-sample vs. out-of-sample
    cutoff = pd.Period(cutoff_str, freq="M")
    in_samp_dates = [d for d in all_dates if d <= cutoff]
    oos_dates     = [d for d in all_dates if d >  cutoff]
    
    # If no OOS dates, we can warn or just proceed
    if len(oos_dates) == 0:
        print(f"No out-of-sample dates found after {cutoff_str}. Please check your data.")
        return
    
    df_in_returns = df_return.loc[in_samp_dates]
    df_in_BM_std  = df_BM_std.loc[in_samp_dates]
    df_in_MC_std  = df_MC_std.loc[in_samp_dates]
    df_in_Mom_std = df_Mom_std.loc[in_samp_dates]
    
    #-------------------- Estimate initial theta
    theta_est = estimate_portfolio_policy(
        df_in_returns, df_in_BM_std, df_in_MC_std, df_in_Mom_std,
        gamma=gamma, dim_x=3
    )
    
    #-------------------- Rolling OOS by year
    dates_by_year = defaultdict(list)
    for d in oos_dates:
        dates_by_year[d.year].append(d)
    
    oos_portfolio_returns = []
    
    for year in sorted(dates_by_year.keys()):
        # Use the current theta_est for each month in this year
        for d in dates_by_year[year]:
            # Evaluate OOS return for date d
            monthly_return_arr = portfolio_returns_from_params(
                theta=theta_est,
                df_returns=df_return.loc[[d]],
                df_BM_std=df_BM_std.loc[[d]],
                df_MC_std=df_MC_std.loc[[d]],
                df_Mom_std=df_Mom_std.loc[[d]]
            )
            # monthly_return_arr is length 1 => single float
            oos_portfolio_returns.append(monthly_return_arr[0])
        
        # Expand in-sample window to end of this year, re-estimate
        last_period_this_year = pd.Period(f"{year}-12", freq="M")
        new_in_sample_dates = [dd for dd in all_dates if dd <= last_period_this_year]
        
        df_in_returns = df_return.loc[new_in_sample_dates]
        df_in_BM_std  = df_BM_std.loc[new_in_sample_dates]
        df_in_MC_std  = df_MC_std.loc[new_in_sample_dates]
        df_in_Mom_std = df_Mom_std.loc[new_in_sample_dates]
        
        theta_est = estimate_portfolio_policy(
            df_in_returns, df_in_BM_std, df_in_MC_std, df_in_Mom_std,
            gamma=gamma, dim_x=3
        )
    
    #-------------------- OOS Performance
    oos_portfolio_returns = np.array(oos_portfolio_returns, dtype=float)
    
    if len(oos_portfolio_returns) < 2:
        print("Not enough out-of-sample months to calculate volatility or Sharpe Ratio.")
        print(f"OOS returns: {oos_portfolio_returns}")
        return
    
    mean_monthly_ret = np.mean(oos_portfolio_returns)
    std_monthly_ret  = np.std(oos_portfolio_returns, ddof=1)
    
    # If stdev is extremely small (close to zero), Sharpe -> NaN or infinite
    if np.isclose(std_monthly_ret, 0.0):
        sharpe_ratio = np.nan
    else:
        sharpe_ratio = mean_monthly_ret / std_monthly_ret
    
    annualized_mean = (1 + mean_monthly_ret)**12 - 1
    annualized_std  = std_monthly_ret * np.sqrt(12)
    
    print("===== Out-of-Sample Performance =====")
    print(f"Data from {oos_dates[0]} to {oos_dates[-1]}")
    print(f"Number of OOS months: {len(oos_portfolio_returns)}")
    print(f"Annualized Avg. Monthly Return: {annualized_mean:.2%}")
    print(f"Annualized Std Dev:             {annualized_std:.2%}")
    print(f"Sharpe Ratio:                   {sharpe_ratio:.2f}")

###############################################################################
# 5. EXAMPLE USAGE
###############################################################################
# Suppose you already loaded:
# df_return, df_BM_monthly, df_market_cap, df_momentum
# Then run:
run_rolling_oos(df_return, df_BM_monthly, df_market_cap, df_momentum, 
                 cutoff_str="1973-12", gamma=5.0)





===== Out-of-Sample Performance =====
Data from 1974-01 to 2024-12
Number of OOS months: 612
Annualized Avg. Monthly Return: 0.00%
Annualized Std Dev:             0.00%
Sharpe Ratio:                   nan
