In [1]:
import pandas as pd
import numpy as np
import math

starting_year_to_filter = 1963
end_year_to_filter = 2020
number_of_lookback_periods = 120
data_to_read_address = "data/25_Portfolios_ME_OP_5x5_monthly.CSV"
number_of_PPs_to_consider = 3
number_of_PEPs_to_consider = 3
number_of_PAPs_to_consider = 3

def rank_and_map(df):
    # Make a copy to avoid modifying the original DataFrame
    df_copy = df.copy()
    # Exclude the 'date' column for ranking
    data_columns = df_copy.columns[1:]
    
    # Apply ranking and scaling row-wise (for each date)
    def rank_row(row):
        # Get the ranks (min rank is 1)
        ranks = row.rank(method='min')
        # Normalize the ranks to range between 0 and 1
        ranks_normalized = (ranks - 1) / (len(row) - 1)
        # Map to range [-0.5, 0.5]
        return ranks_normalized - 0.5
    
    # Apply rank_row function to each row, excluding the 'date' column
    df_copy[data_columns] = df_copy[data_columns].apply(rank_row, axis=1)
    return df_copy



def cross_sectional_demean(df):
    # Make a copy to avoid modifying the original DataFrame
    df_copy = df.copy()
    # Exclude the 'date' column
    data_columns = df_copy.columns[1:]
    
    # Apply demeaning row-wise (for each date)
    def demean_row(row):
        row_mean = row.mean()  # Compute the mean of the row
        return row - row_mean  # Subtract the mean from each element in the row
    
    # Apply demean_row function to each row, excluding the 'date' column
    df_copy[data_columns] = df_copy[data_columns].apply(demean_row, axis=1)
    return df_copy


def compute_rs_product(df1, df2):
    # Ensure the date columns match
    if not df1['date'].equals(df2['date']):
        raise ValueError("Date columns of both dataframes must match.")
    
  # Convert to numeric, set invalid values as NaN
    df1 = df1.astype({col: 'float64' for col in df1.columns if col != 'date'})
    df2 = df2.astype({col: 'float64' for col in df2.columns if col != 'date'})
    result = {}
    
    # Iterate over each row (each date)
    for index, date in enumerate(df1['date']):
        # Get the R vector (from df1) and S' vector (from df2) for the current date
        R = df1.iloc[index, 1:].values.reshape(-1, 1)  # n x 1 vector
        S_transpose = df2.iloc[index, 1:].values.reshape(1, -1)  # 1 x n vector
        # Compute the outer product (RS')
        matrix_rs = np.dot(R, S_transpose)  # n x n matrix
        # Store the result in a dictionary, with date as the key
        result[date] = matrix_rs

    return result


def get_prediction_matrix(input_date, result_matrices, n_periods):
    # Sort the dates in result_matrices to ensure they're in order
    sorted_dates = sorted(result_matrices.keys())
    # Find the index of the input date in the sorted list of dates
    if input_date not in sorted_dates:
        raise ValueError("The input date is not found in the result_matrices.")
    
    input_date_index = sorted_dates.index(input_date)
    # Select the last n_periods (excluding the input date)
    start_index = max(0, input_date_index - n_periods)  # Ensure we don't go below index 0
    selected_dates = sorted_dates[start_index:input_date_index]
    
    if len(selected_dates) == 0:
        raise ValueError(f"There are no previous periods to calculate the average for the given number: {n_periods}.")
    
    # Initialize a matrix to accumulate the sum
    matrix_shape = result_matrices[sorted_dates[0]].shape
    sum_matrix = np.zeros(matrix_shape, dtype=float)
    # Sum all the selected matrices
    for date in selected_dates:
        sum_matrix += np.array(result_matrices[date], dtype=float)
    
    # Calculate the element-wise average
    average_matrix = sum_matrix / len(selected_dates)
    return average_matrix



# i should start from 0. In other words, to get the first PP's expected return you must set i=0.
def get_ith_PPs_expected_return(S,i):
    return S[i]

# i should start from 0. In other words, to get the first PP you must set i=0.
def get_ith_position_matrix(U,VT,i):
    u_column = U[:, i]
    v_column = VT[i, :]
    return np.outer(v_column,u_column)

def first_n_PPs_expected_return(S,n):
    sum = 0
    for i in range(n):
        sum += get_ith_PPs_expected_return(S,i)
    return sum

def first_n_PPs_position_matrix(U,VT,number_of_PPs):
    matrix_shape = U.shape
    sum_matrix = np.zeros(matrix_shape, dtype=float)
    for i in range(number_of_PPs):
        sum_matrix += get_ith_position_matrix(U,VT,i)
    return sum_matrix/number_of_PPs

# i should start from 0. In other words, to get the first PEP you must set i=0.
def get_ith_PEPs_expected_return(eigenvalues,i):
    return eigenvalues[i]

def get_ith_symmetric_position_matrix(eigenvectors,i):
    w = eigenvectors[:, i]
    return np.outer(w,w)

def first_n_PEPs_expected_return(eigenvalues,n):
    sum = 0
    for i in range(n):
        sum += abs(get_ith_PEPs_expected_return(eigenvalues,i))
    return sum

def first_n_PEPs_position_matrix(eigenvectors,number_of_PEPs):
    matrix_shape = eigenvectors.shape
    sum_matrix = np.zeros(matrix_shape, dtype=float)
    for i in range(number_of_PEPs):
        sum_matrix += get_ith_symmetric_position_matrix(eigenvectors,i)
    return sum_matrix/number_of_PEPs

# i should start from 0. In other words, to get the first PEP you must set i=0.
def get_ith_PAPs_expected_return(filtered_eigenvalues_ta,i):
    return 2 * filtered_eigenvalues_ta[i]

def get_ith_asymmetric_position_matrix(sorted_eigenvectors_ta_real_part,sorted_eigenvectors_ta_imaginary_part,i):
    return np.outer(sorted_eigenvectors_ta_real_part[:,i],sorted_eigenvectors_ta_imaginary_part[:,i]) - np.outer(sorted_eigenvectors_ta_imaginary_part[:,i],sorted_eigenvectors_ta_real_part[:,i])
    
def first_n_PAPs_expected_return(filtered_eigenvalues_ta,n):
    sum = 0
    for i in range(n):
        sum += get_ith_PAPs_expected_return(filtered_eigenvalues_ta,i)
    return sum

def first_n_PAPs_position_matrix(sorted_eigenvectors_ta_real_part,sorted_eigenvectors_ta_imaginary_part,number_of_PAPs):
    number_of_rows = sorted_eigenvectors_ta_real_part.shape[0]
    sum_matrix = np.zeros((number_of_rows,number_of_rows), dtype=float)
    for i in range(number_of_PAPs):
        sum_matrix += get_ith_asymmetric_position_matrix(sorted_eigenvectors_ta_real_part,sorted_eigenvectors_ta_imaginary_part,i)
    return sum_matrix/number_of_PAPs

def calculate_sharpe_ratio(returns):
    # Compute excess returns
    
    # Compute average excess return
    average_return = returns.mean()
    
    # Compute standard deviation of returns
    std_dev_returns = returns.std()
    
    # Compute Sharpe Ratio
    sharpe_ratio = average_return / std_dev_returns
    
    return sharpe_ratio

In [2]:
df_25_ff_size_value_sorted_monthly = pd.read_csv(data_to_read_address)
df_25_ff_size_value_sorted_monthly['date'] = pd.to_datetime(df_25_ff_size_value_sorted_monthly['date'], format='%Y%m') + pd.offsets.MonthEnd(1)
df_25_ff_size_value_sorted_monthly.head(5)

Unnamed: 0,date,SMALL LoOP,ME1 OP2,ME1 OP3,ME1 OP4,SMALL HiOP,ME2 OP1,ME2 OP2,ME2 OP3,ME2 OP4,...,ME4 OP1,ME4 OP2,ME4 OP3,ME4 OP4,ME4 OP5,BIG LoOP,ME5 OP2,ME5 OP3,ME5 OP4,BIG HiOP
0,1963-07-31,-0.588,1.3361,2.5678,-0.8866,-0.9773,-1.1724,-1.98,-0.9323,0.8199,...,-3.4911,-0.445,-2.0771,-1.2839,0.0144,0.2983,1.5917,0.0529,-0.9568,0.6882
1,1963-08-31,2.3656,4.0255,2.4627,2.6665,4.1942,6.5111,5.8692,3.8409,4.8916,...,5.4381,5.3484,6.4204,4.4289,6.5983,4.8572,4.2866,5.3068,5.6946,5.9835
2,1963-09-30,-1.3983,-0.8718,-1.9406,4.2566,-1.3751,-1.838,-1.7595,0.0814,-2.1056,...,-1.7807,-1.0764,-2.2074,-2.6623,-2.3075,-2.4348,-2.2749,0.8966,-1.8203,-1.2376
3,1963-10-31,0.2669,-0.9385,1.3085,3.1559,0.3955,3.6192,3.1351,1.2895,-1.2666,...,3.0527,0.4481,1.7736,-0.6654,3.0401,-0.0258,0.1551,2.4051,3.152,7.7794
4,1963-11-30,-2.1755,-1.413,-3.3743,-2.3083,-3.1325,-1.3337,-0.1868,-0.0751,-1.1251,...,-0.1035,-1.0495,-0.0628,-0.4844,0.6525,-0.9922,-1.1321,1.2581,0.7443,-2.7711


Note that I shift signals one period forward to make computations easier. 

In [3]:
signal_df = pd.DataFrame()
signal_df["date"] = df_25_ff_size_value_sorted_monthly["date"]
signal_df= signal_df.join(df_25_ff_size_value_sorted_monthly.iloc[:, 1:].shift(1))
signal_df.head()

Unnamed: 0,date,SMALL LoOP,ME1 OP2,ME1 OP3,ME1 OP4,SMALL HiOP,ME2 OP1,ME2 OP2,ME2 OP3,ME2 OP4,...,ME4 OP1,ME4 OP2,ME4 OP3,ME4 OP4,ME4 OP5,BIG LoOP,ME5 OP2,ME5 OP3,ME5 OP4,BIG HiOP
0,1963-07-31,,,,,,,,,,...,,,,,,,,,,
1,1963-08-31,-0.588,1.3361,2.5678,-0.8866,-0.9773,-1.1724,-1.98,-0.9323,0.8199,...,-3.4911,-0.445,-2.0771,-1.2839,0.0144,0.2983,1.5917,0.0529,-0.9568,0.6882
2,1963-09-30,2.3656,4.0255,2.4627,2.6665,4.1942,6.5111,5.8692,3.8409,4.8916,...,5.4381,5.3484,6.4204,4.4289,6.5983,4.8572,4.2866,5.3068,5.6946,5.9835
3,1963-10-31,-1.3983,-0.8718,-1.9406,4.2566,-1.3751,-1.838,-1.7595,0.0814,-2.1056,...,-1.7807,-1.0764,-2.2074,-2.6623,-2.3075,-2.4348,-2.2749,0.8966,-1.8203,-1.2376
4,1963-11-30,0.2669,-0.9385,1.3085,3.1559,0.3955,3.6192,3.1351,1.2895,-1.2666,...,3.0527,0.4481,1.7736,-0.6654,3.0401,-0.0258,0.1551,2.4051,3.152,7.7794


I can think of this matrix as $S_{t-1}$.

In [4]:
normalized_signal_df = rank_and_map(signal_df)
normalized_signal_df = normalized_signal_df[(normalized_signal_df['date'].dt.year > starting_year_to_filter) & (normalized_signal_df['date'].dt.year < end_year_to_filter)].reset_index(drop=True)
normalized_signal_df.head(5)

Unnamed: 0,date,SMALL LoOP,ME1 OP2,ME1 OP3,ME1 OP4,SMALL HiOP,ME2 OP1,ME2 OP2,ME2 OP3,ME2 OP4,...,ME4 OP1,ME4 OP2,ME4 OP3,ME4 OP4,ME4 OP5,BIG LoOP,ME5 OP2,ME5 OP3,ME5 OP4,BIG HiOP
0,1964-01-31,-0.458333,-0.333333,-0.291667,-0.166667,-0.5,-0.041667,-0.083333,0.041667,-0.25,...,0.458333,0.166667,0.083333,0.125,-0.375,0.0,0.5,0.208333,0.375,0.333333
1,1964-02-29,0.5,0.375,0.208333,0.416667,0.166667,-0.375,0.125,0.0,-0.208333,...,0.041667,-0.333333,-0.166667,-0.5,-0.458333,-0.041667,0.458333,0.083333,-0.083333,0.25
2,1964-03-31,0.458333,-0.416667,-0.5,-0.083333,0.25,0.375,-0.25,-0.375,-0.458333,...,-0.166667,-0.041667,0.5,0.041667,0.333333,0.166667,-0.125,-0.375,-0.25,0.083333
3,1964-04-30,-0.125,0.0,-0.375,-0.041667,-0.291667,0.375,0.458333,0.041667,0.208333,...,0.416667,0.166667,0.291667,-0.166667,0.125,-0.416667,-0.083333,-0.5,-0.458333,-0.208333
4,1964-05-31,-0.083333,0.333333,0.166667,0.416667,-0.25,-0.041667,-0.166667,-0.333333,-0.416667,...,0.5,0.125,-0.125,0.083333,-0.458333,0.458333,0.375,0.208333,-0.208333,0.25


This matrix can be denoted as $R_{t-1}$

In [5]:
demeaned_return_df = cross_sectional_demean(df_25_ff_size_value_sorted_monthly)
demeaned_return_df = demeaned_return_df[(demeaned_return_df['date'].dt.year > starting_year_to_filter) & (demeaned_return_df['date'].dt.year < end_year_to_filter)].reset_index(drop=True)
demeaned_return_df.head()


Unnamed: 0,date,SMALL LoOP,ME1 OP2,ME1 OP3,ME1 OP4,SMALL HiOP,ME2 OP1,ME2 OP2,ME2 OP3,ME2 OP4,...,ME4 OP1,ME4 OP2,ME4 OP3,ME4 OP4,ME4 OP5,BIG LoOP,ME5 OP2,ME5 OP3,ME5 OP4,BIG HiOP
0,1964-01-31,2.71524,2.02904,0.39704,2.12194,0.22004,-1.45036,0.01864,-0.24306,-0.84836,...,-0.13046,-1.33746,-0.69816,-1.71046,-1.69576,-0.35956,2.26944,-0.01116,-0.62756,0.77444
1,1964-02-29,1.675488,-1.273612,-2.555812,-0.255212,0.649388,1.433588,-0.435612,-1.117912,-2.476512,...,-0.371912,-0.080712,1.982588,0.230688,1.248788,0.348188,-0.306012,-1.117912,-0.435612,0.282888
2,1964-03-31,-0.528612,-0.383212,-1.506112,-0.438212,-1.337712,1.897788,3.127288,-0.121712,0.445388,...,1.962688,0.417888,0.583988,-0.608012,0.116688,-1.588112,-0.439612,-3.122512,-1.631612,-1.022312
3,1964-04-30,-0.615292,1.492508,0.425008,2.779008,-1.359492,-0.489192,-0.929892,-1.514492,-2.019392,...,5.947508,0.304008,-0.871492,-0.128392,-2.759592,3.172308,2.461408,0.474808,-1.064392,0.935508
4,1964-05-31,0.502476,-1.446124,0.083876,-1.160624,-2.721724,3.347476,-0.007224,-0.061624,-1.869124,...,0.386276,0.519276,0.840676,0.389976,1.304876,-1.812524,1.559776,-0.610524,-0.711324,1.728976


This gives: $R_{t}S'_{t}$

In [6]:
rs_matrix = compute_rs_product(demeaned_return_df, normalized_signal_df)

Prediction matrix for date T+1, used returns data up to month T and signals data up to month T-1. In the function get_prediction_matrix, I start the calculations from the previous month. Note that although the input date is the current data, but the in the function that month is excluded.

Note that in calculating realized returns, I am using the current month(the month of rearlized returns) as index. But remember that the matrix was $S_{t-1}$. So, the index actually retreives the value of the previous month. I formed the matrix this way in order to make the calculations easier.

In [7]:
# I leave out the first 120 observations to compute the prediction matrix.


realized_returns_df = pd.DataFrame(columns=[
    "return_of_simple_factor", 
    "realized_return_of_first_three_PP", 
    "expected_return_of_first_three_PP",
    "realized_return_of_first_three_PEP",
    "expected_return_of_first_three_PEP",
    "realized_return_of_first_three_PAP",
    "expected_return_of_first_three_PAP"
])

for year_month_index in demeaned_return_df.iloc[number_of_lookback_periods:]['date']:
    date_to_consider = pd.Timestamp(year_month_index)
    
    #for PP's
    prediction_matrix = get_prediction_matrix(date_to_consider, rs_matrix, number_of_lookback_periods)
    U, S, VT = np.linalg.svd(prediction_matrix)

    #for PEP's
    Symmetric_prediction_matrix = (prediction_matrix + prediction_matrix.T)/2
    eigenvalues, eigenvectors = np.linalg.eig(Symmetric_prediction_matrix)
    idx = eigenvalues.argsort()[::-1]  # Sort in descending order
    eigenvalues = eigenvalues[idx]
    eigenvectors = eigenvectors[:, idx]

    # for PAP'S
    assymetric_prediction_matrix = 0.5 * (prediction_matrix - prediction_matrix.T)
    transposed_assymetric_prediction_matrix = assymetric_prediction_matrix.T
    eigenvalues_ta, eigenvectors_ta = np.linalg.eig(transposed_assymetric_prediction_matrix)
    sorted_indices_ta = np.argsort(-eigenvalues_ta.imag)
    sorted_eigenvalues_ta = eigenvalues_ta[sorted_indices_ta].imag
    sorted_eigenvectors_ta = eigenvectors_ta[:, sorted_indices_ta] * math.sqrt(2)  #sqrt(2) is to make the size of the vectors equal to 1.
    positive_indices = np.where(sorted_eigenvalues_ta > 0)
    filtered_eigenvalues_ta = sorted_eigenvalues_ta[positive_indices]
    filtered_eigenvectors_ta = sorted_eigenvectors_ta[:, positive_indices].squeeze()
    sorted_eigenvectors_ta_imaginary_part = filtered_eigenvectors_ta.imag
    sorted_eigenvectors_ta_real_part = filtered_eigenvectors_ta.real

    #to calculate realized returns
    signal_vector = normalized_signal_df[normalized_signal_df.date == date_to_consider].values[0, 1:].reshape(1, -1)  # 1*n matrix
    return_vector = df_25_ff_size_value_sorted_monthly[df_25_ff_size_value_sorted_monthly.date == date_to_consider].values[0, 1:].reshape(-1, 1)  # n*1  # there is not much difference between using demeaned returns or not_demeaned ones. I can replace df_25_ff_size_value_sorted_monthly with demeaned_return_df.
    

    # Compute realized returns
    return_of_simple_factor = (signal_vector @ return_vector)[0][0]
    realized_return_of_first_three_PP = (signal_vector @ first_n_PPs_position_matrix(U, VT, number_of_PPs_to_consider) @ return_vector)[0][0]
    expected_return_of_first_three_PP = first_n_PPs_expected_return(S, number_of_PPs_to_consider)
    realized_return_of_first_three_PEP = (signal_vector @ first_n_PEPs_position_matrix(eigenvectors,number_of_PEPs_to_consider) @ return_vector)[0][0]
    expected_return_of_first_three_PEP = first_n_PEPs_expected_return(eigenvalues, number_of_PEPs_to_consider)
    realized_return_of_first_three_PAP = (signal_vector @ first_n_PAPs_position_matrix(sorted_eigenvectors_ta_real_part,sorted_eigenvectors_ta_imaginary_part,number_of_PAPs_to_consider) @ return_vector)[0][0]
    expected_return_of_first_three_PAP = first_n_PAPs_expected_return(filtered_eigenvalues_ta, number_of_PAPs_to_consider)
    

    # Prepare a list for the current row values
    row_values = [
        return_of_simple_factor,  
        realized_return_of_first_three_PP, 
        expected_return_of_first_three_PP,
        realized_return_of_first_three_PEP,
        expected_return_of_first_three_PEP,
        realized_return_of_first_three_PAP,
        expected_return_of_first_three_PAP
    ]

    # Iterate over all Principal Portfolios (up to len(S)) and calculate realized/expected returns for each
    for i in range(len(S)):

        # for PP's
        realized_return_ith_PP = (signal_vector @ get_ith_position_matrix(U, VT, i) @ return_vector)[0][0]
        expected_return_ith_PP = get_ith_PPs_expected_return(S, i)
        # Add the values for realized and expected returns of the ith PP to the row
        row_values.append(realized_return_ith_PP)
        row_values.append(expected_return_ith_PP)

        # for PEP's
        realized_return_ith_PEP = (signal_vector @ get_ith_symmetric_position_matrix(eigenvectors, i) @ return_vector)[0][0]
        expected_return_ith_PEP = get_ith_PEPs_expected_return(eigenvalues, i)
        # Add the values for realized and expected returns of the ith PEP to the row
        row_values.append(realized_return_ith_PEP)
        row_values.append(expected_return_ith_PEP)


        # Dynamically add columns if they don't exist. for PP's.
        realized_col_name_pp = f"realized_return_of_{i+1}_PP"
        expected_col_name_pp = f"expected_return_of_{i+1}_PP"

        # Dynamically add columns if they don't exist. for PEP's.
        realized_col_name_pep = f"realized_return_of_{i+1}_PEP"
        expected_col_name_pep = f"expected_return_of_{i+1}_PEP"
        
        # for PP's
        if realized_col_name_pp not in realized_returns_df.columns:
            realized_returns_df[realized_col_name_pp] = None
        if expected_col_name_pp not in realized_returns_df.columns:
            realized_returns_df[expected_col_name_pp] = None

        #for PEP'S
        if realized_col_name_pep not in realized_returns_df.columns:
            realized_returns_df[realized_col_name_pep] = None
        if expected_col_name_pep not in realized_returns_df.columns:
            realized_returns_df[expected_col_name_pep] = None

    for i in range(sorted_eigenvectors_ta_imaginary_part.shape[1]):
        # for PAP's
        realized_return_ith_PAP = (signal_vector @ get_ith_asymmetric_position_matrix(sorted_eigenvectors_ta_real_part,sorted_eigenvectors_ta_imaginary_part,i) @ return_vector)[0][0]
        expected_return_ith_PAP = get_ith_PAPs_expected_return(filtered_eigenvalues_ta,i)
        # Add the values for realized and expected returns of the ith PEP to the row
        row_values.append(realized_return_ith_PAP)
        row_values.append(expected_return_ith_PAP)

        # Dynamically add columns if they don't exist. for PEP's.
        realized_col_name_pap = f"realized_return_of_{i+1}_PAP"
        expected_col_name_pap = f"expected_return_of_{i+1}_PAP"

        #for PAP'S
        if realized_col_name_pap not in realized_returns_df.columns:
            realized_returns_df[realized_col_name_pap] = None
        if expected_col_name_pap not in realized_returns_df.columns:
            realized_returns_df[expected_col_name_pap] = None

    # Append the row to the dataframe
    realized_returns_df.loc[len(realized_returns_df)] = row_values


  realized_returns_df[expected_col_name_pep] = None
  realized_returns_df[realized_col_name_pap] = None
  realized_returns_df[expected_col_name_pap] = None
  realized_returns_df[realized_col_name_pap] = None
  realized_returns_df[expected_col_name_pap] = None
  realized_returns_df[realized_col_name_pap] = None
  realized_returns_df[expected_col_name_pap] = None
  realized_returns_df[realized_col_name_pap] = None
  realized_returns_df[expected_col_name_pap] = None
  realized_returns_df[realized_col_name_pap] = None
  realized_returns_df[expected_col_name_pap] = None
  realized_returns_df[realized_col_name_pap] = None
  realized_returns_df[expected_col_name_pap] = None
  realized_returns_df[realized_col_name_pap] = None
  realized_returns_df[expected_col_name_pap] = None
  realized_returns_df[realized_col_name_pap] = None
  realized_returns_df[expected_col_name_pap] = None
  realized_returns_df[realized_col_name_pap] = None
  realized_returns_df[expected_col_name_pap] = None
  realized_r

In [8]:
pap_std = realized_returns_df['realized_return_of_first_three_PAP'].std()
pep_std = realized_returns_df['realized_return_of_first_three_PEP'].std()

realized_returns_df['adjusted_PAP'] = realized_returns_df['realized_return_of_first_three_PAP'] * (pep_std / pap_std)

# Step 2: Take the average of the adjusted "PAP" and "PEP"
realized_returns_df['PEP and PAP 1-3'] = (realized_returns_df['adjusted_PAP'] + realized_returns_df['realized_return_of_first_three_PEP']) / 2

# Optional: drop the adjusted column if you don't need it anymore
realized_returns_df.drop(columns='adjusted_PAP', inplace=True)


sharpe_df = realized_returns_df.drop(realized_returns_df.filter(like="expected").columns, axis=1).apply(lambda col: calculate_sharpe_ratio(col)) * math.sqrt(12)

pp_columns = realized_returns_df.filter(like="PP")
pp_realized_mean_df = pp_columns.filter(like="realized").mean(axis=0)
pp_expected_mean_df = pp_columns.filter(like="expected").mean(axis=0)

pep_columns = realized_returns_df.filter(like="PEP")
pep_realized_mean_df = pep_columns.filter(like="realized").mean(axis=0)
pep_expected_mean_df = pep_columns.filter(like="expected").mean(axis=0)

pap_columns = realized_returns_df.filter(like="PAP")
pap_realized_mean_df = pap_columns.filter(like="realized").mean(axis=0)
pap_expected_mean_df = pap_columns.filter(like="expected").mean(axis=0)

realized_returns_df.to_csv("temp/realized_returns.csv")
sharpe_df.to_csv("temp/sharpe.csv")

pp_columns.to_csv("temp/pp_columns.csv")
pp_realized_mean_df.to_csv("temp/pp_realized_mean_df.csv")
pp_expected_mean_df.to_csv("temp/pp_expected_mean_df.csv")

pep_columns.to_csv("temp/pep_columns.csv")
pep_realized_mean_df.to_csv("temp/pep_realized_mean_df.csv")
pep_expected_mean_df.to_csv("temp/pep_expected_mean_df.csv")

pap_columns.to_csv("temp/pap_columns.csv")
pap_realized_mean_df.to_csv("temp/pap_realized_mean_df.csv")
pap_expected_mean_df.to_csv("temp/pap_expected_mean_df.csv")