# Univariate Performance Statistics Analysis
### Analyzing MKT, SMB, HML, and Risk-Free Rate
#### Three Subsamples: Beginning-1980, 1981-2001, 2002-End

In [24]:
import pandas as pd
import numpy as np

# Read the Excel file from the 'factors' sheet
df = pd.read_excel('dfa_analysis_data.xlsx', sheet_name='factors')


In [25]:
# Function to calculate performance statistics
def calculate_statistics(returns, rf_rate):
    """
    Calculate mean, volatility, Sharpe ratio, and VaR(0.05)
    
    Parameters:
    returns: Series or array of returns
    rf_rate: Series or array of risk-free rates
    
    Returns:
    Dictionary with statistics
    """
    # Convert to numpy arrays
    returns = np.array(returns)
    rf_rate = np.array(rf_rate)
    
    # Calculate mean (annualized - assuming monthly data, multiply by 12)
    mean_return = np.mean(returns) * 12
    
    # Calculate volatility (annualized - assuming monthly data, multiply by sqrt(12))
    volatility = np.std(returns, ddof=1) * np.sqrt(12)
    
    # Calculate Sharpe ratio
    excess_returns = returns - rf_rate
    sharpe_ratio = (np.mean(excess_returns) * 12) / (np.std(excess_returns, ddof=1) * np.sqrt(12))
    
    # Calculate VaR at 5% level (using historical simulation)
    var_05 = np.percentile(returns, 5)
    
    return {
        'Mean': mean_return,
        'Volatility': volatility,
        'Sharpe': sharpe_ratio,
        'VaR(0.05)': var_05
    }

In [26]:
# Prepare the data - identify date column and convert to datetime
# Assuming first column is date
date_col = df.columns[0]
df[date_col] = pd.to_datetime(df[date_col])
df = df.sort_values(date_col)


In [27]:
# Define the three subsamples
# Period 1: Beginning - 1980 (up to Dec 1980)
period1 = df[df[date_col] <= '1980-12-31']

# Period 2: 1981 - 2001 (Jan 1981 to Dec 2001)
period2 = df[(df[date_col] >= '1981-01-01') & (df[date_col] <= '2001-12-31')]

# Period 3: 2002 - End (Jan 2002 onwards)
period3 = df[df[date_col] >= '2002-01-01']

print("Period 1 (Beginning - 1980):", len(period1), "observations")
print("  Date range:", period1[date_col].min(), "to", period1[date_col].max())
print("\nPeriod 2 (1981 - 2001):", len(period2), "observations")
print("  Date range:", period2[date_col].min(), "to", period2[date_col].max())
print("\nPeriod 3 (2002 - End):", len(period3), "observations")
print("  Date range:", period3[date_col].min(), "to", period3[date_col].max())

Period 1 (Beginning - 1980): 654 observations
  Date range: 1926-07-31 00:00:00 to 1980-12-31 00:00:00

Period 2 (1981 - 2001): 252 observations
  Date range: 1981-01-31 00:00:00 to 2001-12-31 00:00:00

Period 3 (2002 - End): 284 observations
  Date range: 2002-01-31 00:00:00 to 2025-08-31 00:00:00


In [28]:
# Calculate statistics for all three periods and all factors
def analyze_all_periods(period_data, period_name, factors, rf_col):
    """
    Analyze all factors for a given period
    """
    print(f"\n{'='*80}")
    print(f"{period_name}")
    print(f"{'='*80}")
    
    results = {}
    
    for factor in factors:
        stats = calculate_statistics(period_data[factor], period_data[rf_col])
        results[factor] = stats
        
        print(f"\n{factor.upper()}:")
        print(f"  Mean (annualized):     {stats['Mean']:>10.4f}")
        print(f"  Volatility (annualized): {stats['Volatility']:>10.4f}")
        print(f"  Sharpe Ratio:          {stats['Sharpe']:>10.4f}")
        print(f"  VaR(0.05):             {stats['VaR(0.05)']:>10.4f}")
    
    return results

In [29]:

mkt_col = 'Mkt-RF'
smb_col = 'SMB'
hml_col = 'HML'
rf_col = 'RF'

In [30]:
# Run analysis for all three periods
if all([mkt_col, smb_col, hml_col, rf_col]):
    factors_dict = {'mkt': mkt_col, 'smb': smb_col, 'hml': hml_col}
    
    results_p1 = analyze_all_periods(period1, "Period 1: Beginning - 1980", 
                                      [mkt_col, smb_col, hml_col], rf_col)
    results_p2 = analyze_all_periods(period2, "Period 2: 1981 - 2001", 
                                      [mkt_col, smb_col, hml_col], rf_col)
    results_p3 = analyze_all_periods(period3, "Period 3: 2002 - End", 
                                      [mkt_col, smb_col, hml_col], rf_col)
else:
    print("Error: Could not identify all required columns. Please check column names.")


Period 1: Beginning - 1980

MKT-RF:
  Mean (annualized):         0.0810
  Volatility (annualized):     0.2050
  Sharpe Ratio:              0.2588
  VaR(0.05):                -0.0841

SMB:
  Mean (annualized):         0.0339
  Volatility (annualized):     0.1143
  Sharpe Ratio:              0.0539
  VaR(0.05):                -0.0419

HML:
  Mean (annualized):         0.0503
  Volatility (annualized):     0.1342
  Sharpe Ratio:              0.1677
  VaR(0.05):                -0.0442

Period 2: 1981 - 2001

MKT-RF:
  Mean (annualized):         0.0779
  Volatility (annualized):     0.1572
  Sharpe Ratio:              0.0934
  VaR(0.05):                -0.0641

SMB:
  Mean (annualized):        -0.0020
  Volatility (annualized):     0.1173
  Sharpe Ratio:             -0.5520
  VaR(0.05):                -0.0459

HML:
  Mean (annualized):         0.0646
  Volatility (annualized):     0.1099
  Sharpe Ratio:              0.0135
  VaR(0.05):                -0.0416

Period 3: 2002 - End

MKT-RF:


In [31]:
# Create a summary table
def create_summary_table(results_dict, period_names):
    """
    Create a formatted summary table of all results
    """
    summary_data = []
    
    for period_name, results in zip(period_names, results_dict):
        for factor_name, stats in results.items():
            summary_data.append({
                'Period': period_name,
                'Factor': factor_name,
                'Mean': stats['Mean'],
                'Volatility': stats['Volatility'],
                'Sharpe': stats['Sharpe'],
                'VaR(0.05)': stats['VaR(0.05)']
            })
    
    summary_df = pd.DataFrame(summary_data)
    return summary_df

if all([mkt_col, smb_col, hml_col, rf_col]):
    period_names = ['Beginning - 1980', '1981 - 2001', '2002 - End']
    summary_df = create_summary_table([results_p1, results_p2, results_p3], period_names)
    
    print("\n" + "="*80)
    print("SUMMARY TABLE")
    print("="*80)
    print(summary_df.to_string(index=False))
    
    # Also create pivot tables for easier reading
    print("\n" + "="*80)
    print("MEAN RETURNS (Annualized)")
    print("="*80)
    print(summary_df.pivot(index='Factor', columns='Period', values='Mean'))
    
    print("\n" + "="*80)
    print("VOLATILITY (Annualized)")
    print("="*80)
    print(summary_df.pivot(index='Factor', columns='Period', values='Volatility'))
    
    print("\n" + "="*80)
    print("SHARPE RATIO")
    print("="*80)
    print(summary_df.pivot(index='Factor', columns='Period', values='Sharpe'))
    
    print("\n" + "="*80)
    print("VaR(0.05)")
    print("="*80)
    print(summary_df.pivot(index='Factor', columns='Period', values='VaR(0.05)'))


SUMMARY TABLE
          Period Factor      Mean  Volatility    Sharpe  VaR(0.05)
Beginning - 1980 Mkt-RF  0.080958    0.204988  0.258799  -0.084090
Beginning - 1980    SMB  0.033914    0.114277  0.053857  -0.041875
Beginning - 1980    HML  0.050321    0.134228  0.167743  -0.044245
     1981 - 2001 Mkt-RF  0.077852    0.157183  0.093351  -0.064135
     1981 - 2001    SMB -0.002014    0.117260 -0.552021  -0.045880
     1981 - 2001    HML  0.064557    0.109863  0.013491  -0.041640
      2002 - End Mkt-RF  0.091306    0.153529  0.490639  -0.077265
      2002 - End    SMB  0.007931    0.088448 -0.088683  -0.039195
      2002 - End    HML  0.001204    0.106442 -0.137055  -0.041485

MEAN RETURNS (Annualized)
Period  1981 - 2001  2002 - End  Beginning - 1980
Factor                                           
HML        0.064557    0.001204          0.050321
Mkt-RF     0.077852    0.091306          0.080958
SMB       -0.002014    0.007931          0.033914

VOLATILITY (Annualized)
Period  1981 