In [19]:
import pandas as pd
import h5py
import os
import numpy as np

In [20]:
def getShortRate(f, alpha, pars):
  rhoA, rhoB, nu, DEL, kap, lbar, sig_l, muY, sigY = pars
  phiA = 1/(rhoA + nu)
  phiB = 1/(rhoB + nu)
  phi = f*phiA + (1-f)*phiB
  betA = (rhoA+nu)*phi
  betB = (rhoB+nu)*phi
  bet = alpha*betA+(1-alpha)*betB
  rhot = f*rhoA+(1-f)*rhoB
  r = rhot + muY-sigY**2 + nu*(1-bet)
  return r

In [21]:
def apply_getShortRate(row, pars):
  return getShortRate(row['f'], row['alpha'], pars)

In [22]:
def getSlope(df):
    y10_t = df['yield'].apply(lambda x: x[6])
    y1_t= df['yield'].apply(lambda x: x[0])
    slope = y10_t - y1_t
    return slope

In [23]:
def optimized_calcHPR(df):
    # Extract shifted columns with correct indexing
    y5_t = df['yield'].apply(lambda x: x[4])
    y4_t_shifted = df['yield'].shift(-12).apply(lambda x: x[3] if x is not None else np.nan)
    y1_t_shifted = df['yield'].apply(lambda x: x[0])

    # Perform the calculation vectorized
    results = 5 * y5_t - 4 * y4_t_shifted - y1_t_shifted
    return results


In [24]:
def optimized_calcHPRSR(df):
    # Extract shifted columns with correct indexing
    y5_t = df['yield'].apply(lambda x: x[4])
    y4_t_shifted = df['yield'].shift(-12).apply(lambda x: x[3] if x is not None else np.nan)
    r_t_shifted = df['shortRate']

    # Perform the calculation vectorized
    results = 5 * y5_t - 4 * y4_t_shifted - r_t_shifted
    return results

In [25]:
def getSharpeRatio(df, pars):
    DEL = pars[3]
    f = df['f']
    SR = DEL*(0.5 - f)
    return SR

In [26]:
def getPD(df, pars):
    rhoA = pars[0]
    rhoB = pars[1]
    nu = pars[2]
    phiA = 1/(rhoA + nu)
    phiB = 1/(rhoB + nu)
    f = df['f']
    PD = phiA*f + phiB*(1-f)
    return PD

In [27]:
def getStdR(df, pars):
    rhoA = pars[0]
    rhoB = pars[1]
    nu = pars[2]
    DEL = pars[3]
    sigY = pars[8]
    phiA = 1/(rhoA + nu)
    phiB = 1/(rhoB + nu)
    f = df['f']
    PD = phiA*f + phiB*(1-f)
    sigR = (phiA - phiB)/PD * f*(1-f)*DEL
    stdR = np.sqrt(sigR**2 + sigY**2)
    return stdR

In [28]:
def getExR(df, pars):
    rhoA = pars[0]
    rhoB = pars[1]
    nu = pars[2]
    DEL = pars[3]
    sigY = pars[8]
    phiA = 1/(rhoA + nu)
    phiB = 1/(rhoB + nu)
    f = df['f']
    PD = phiA*f + phiB*(1-f)
    sigR = (phiA - phiB)/PD * f*(1-f)*DEL
    SR = DEL*(0.5 - f)
    exR = sigY**2 + SR*sigR
    return exR

In [29]:
def getBondExR(df, pars):
    DEL = pars[3]
    f = df['f']
    SR = DEL*(0.5 - f)
    sigY1 = df['sigY'].apply(lambda x: x[0])
    sigY2 = df['sigY'].apply(lambda x: x[1])
    sigY3 = df['sigY'].apply(lambda x: x[2])
    sigY4 = df['sigY'].apply(lambda x: x[3])
    sigY5 = df['sigY'].apply(lambda x: x[4])
    sigY7 = df['sigY'].apply(lambda x: x[5])
    sigY10 = df['sigY'].apply(lambda x: x[6])
    sigB1 = -sigY1
    sigB2 = -sigY2*2
    sigB3 = -sigY3*3
    sigB4 = -sigY4*4
    sigB5 = -sigY5*5
    sigB7 = -sigY7*7
    sigB10 = -sigY10*10
    ExR1 = SR*sigB1
    ExR2 = SR*sigB2
    ExR3 = SR*sigB3
    ExR4 = SR*sigB4
    ExR5 = SR*sigB5
    ExR7 = SR*sigB7
    ExR10 = SR*sigB10
    return ExR1, ExR2, ExR3, ExR4, ExR5, ExR7, ExR10

In [30]:
def getBondStd(df, pars):
    DEL = pars[3]
    f = df['f']
    sigY1 = df['sigY'].apply(lambda x: x[0])
    sigY2 = df['sigY'].apply(lambda x: x[1])
    sigY3 = df['sigY'].apply(lambda x: x[2])
    sigY4 = df['sigY'].apply(lambda x: x[3])
    sigY5 = df['sigY'].apply(lambda x: x[4])
    sigY7 = df['sigY'].apply(lambda x: x[5])
    sigY10 = df['sigY'].apply(lambda x: x[6])
    sigB1 = -sigY1
    sigB2 = -sigY2*2
    sigB3 = -sigY3*3
    sigB4 = -sigY4*4
    sigB5 = -sigY5*5
    sigB7 = -sigY7*7
    sigB10 = -sigY10*10
    stdB1 = np.sqrt(sigB1**2)
    stdB2 = np.sqrt(sigB2**2)
    stdB3 = np.sqrt(sigB3**2)
    stdB4 = np.sqrt(sigB4**2)
    stdB5 = np.sqrt(sigB5**2)
    stdB7 = np.sqrt(sigB7**2)
    stdB10 = np.sqrt(sigB10**2)
    return stdB1, stdB2, stdB3, stdB4, stdB5, stdB7, stdB10
    

In [31]:

def process_file_and_get_stats(filePath):
    # Load DataFrame from HDF5 file
    df = pd.read_hdf(filePath, key='df')
    pars = df['pars'].head(9).tolist()
    DEL = pars[3]
    # Assuming necessary external functions are already defined and imported:
    # getSharpeRatio, apply_getShortRate, optimized_calcHPR, optimized_calcHPRSR, 
    # getSlope, getPD, getStdR, getExR, getBondExR
    
    # Perform necessary calculations
    df['SR'] = getSharpeRatio(df, pars)  # Assuming getSharpeRatio is defined to accept correct parameters
    df['shortRate'] = df.apply(lambda row: apply_getShortRate(row, pars), axis=1)   # Assuming apply_getShortRate function is defined
    df['HPRm1'] = optimized_calcHPR(df)  # Assuming optimized_calcHPR function is defined
    df['HPRmSR'] = optimized_calcHPRSR(df)  # Assuming optimized_calcHPRSR function is defined
    df['slope'] = getSlope(df)  # Assuming getSlope function is defined
    df['PD'] = getPD(df, pars)  # Assuming getPD function is defined
    df['stdR'] = getStdR(df, pars)  # Assuming getStdR function is defined
    df['ExR'] = getExR(df, pars)  # Assuming getExR function is defined
    ExR1, ExR2, ExR3, ExR4, ExR5, ExR7, ExR10 = getBondExR(df, pars)  # Assuming getBondExR function is defined
    df['ExR1'], df['ExR2'], df['ExR3'], df['ExR4'], df['ExR5'], df['ExR7'], df['ExR10'] = ExR1, ExR2, ExR3, ExR4, ExR5, ExR7, ExR10
    stdB1, stdB2, stdB3, stdB4, stdB5, stdB7, stdB10 =getBondStd(df, pars)
    df['stdB1'], df['stdB2'], df['stdB3'], df['stdB4'], df['stdB5'], df['stdB7'], df['stdB10'] = stdB1, stdB2, stdB3, stdB4, stdB5, stdB7, stdB10
    # Calculate the mean and standard deviation of 'alpha'
    mean_alpha = df['alpha'].mean()
    std_alpha = df['alpha'].std()

    # Calculate the mean and standard deviation of 'f'
    mean_f = df['f'].mean()
    std_f = df['f'].std()

    # Calculate the mean and standard deviation of the SR
    mean_SR = df['SR'].mean()
    std_SR = df['SR'].std()

    #Caclulate the mean and standard deviation of 'PD'
    mean_PD = df['PD'].mean()
    std_PD = df['PD'].std()

    # Calculate the mean and standard deviation of 'stdR'
    mean_stdR = df['stdR'].mean()
    std_stdR = df['stdR'].std()

    # Calculate the mean and standard deviation of 'ExR'
    mean_ExR = df['ExR'].mean()
    std_ExR = df['ExR'].std()

    # Calculate the mean and standard deviation of 'ExR1'
    mean_ExR1 = df['ExR1'].mean()
    std_ExR1 = df['ExR1'].std()

    # Calculate the mean and standard deviation of 'ExR2'
    mean_ExR2 = df['ExR2'].mean()
    std_ExR2 = df['ExR2'].std()

    # Calculate the mean and standard deviation of 'ExR3'
    mean_ExR3 = df['ExR3'].mean()
    std_ExR3 = df['ExR3'].std()

    # Calculate the mean and standard deviation of 'ExR4'
    mean_ExR4 = df['ExR4'].mean()
    std_ExR4 = df['ExR4'].std()

    # Calculate the mean and standard deviation of 'ExR5'
    mean_ExR5 = df['ExR5'].mean()
    std_ExR5 = df['ExR5'].std()

    # Calculate the mean and standard deviation of 'ExR7'
    mean_ExR7 = df['ExR7'].mean()
    std_ExR7 = df['ExR7'].std()
    
    # Calculate the mean and standard deviation of 'ExR10'
    mean_ExR10 = df['ExR10'].mean()
    std_ExR10 = df['ExR10'].std()

    # Calculate the mean and standard deviation of Bond Volatility
    mean_stdB1 = df['stdB1'].mean()
    std_stdB1 = df['stdB1'].std()

    mean_stdB2 = df['stdB2'].mean()
    std_stdB2 = df['stdB2'].std()

    mean_stdB3 = df['stdB3'].mean()
    std_stdB3 = df['stdB3'].std()

    mean_stdB4 = df['stdB4'].mean()
    std_stdB4 = df['stdB4'].std()

    mean_stdB5 = df['stdB5'].mean()
    std_stdB5 = df['stdB5'].std()

    mean_stdB7 = df['stdB7'].mean()
    std_stdB7 = df['stdB7'].std()

    mean_stdB10 = df['stdB10'].mean()
    std_stdB10 = df['stdB10'].std()

    # Calculate the mean and standard deviation of 'yield'
    mean_r = np.nanmean(df['shortRate'].tolist(), axis=0)
    std_r = np.nanstd(df['shortRate'].tolist(), axis=0)

    # Calculate the mean and standard deviation of 'yield'
    mean_yield = np.nanmean(df['yield'].tolist(), axis=0)
    std_yield = np.nanstd(df['yield'].tolist(), axis=0)

    # Calculate the mean and standard deviation of 'slope'
    mean_slope = np.nanmean(df['slope'].tolist(), axis=0)
    std_slope = np.nanstd(df['slope'].tolist(), axis=0)

    # Calculate the mean and standard deviation of 'sigY'
    mean_sigY = np.nanmean(df['sigY'].tolist(), axis=0)
    std_sigY = np.nanstd(df['sigY'].tolist(), axis=0)

    # Calculate the mean and standard deviation for HPR5mSR
    mean_HPR5mSR = np.nanmean(df['HPRmSR'].tolist(), axis=0)
    std_HPR5mSR = np.nanstd(df['HPRmSR'].tolist(), axis=0)

    # Calculate the mean and standard deviation for HPR5m1
    mean_HPR5m1 = np.nanmean(df['HPRm1'].tolist(), axis=0)
    std_HPR5m1 = np.nanstd(df['HPRm1'].tolist(), axis=0)

    # Compute means and standard deviations
    stats = {
        "DEL": DEL,
        "Mean Alpha": mean_alpha,
        "Std Alpha": std_alpha,
        "Mean f": mean_f,
        "Std f": std_f,
        "Mean Short Rate": mean_r,
        "Std Short Rate": std_r,
        "Mean y1": mean_yield[0],
        "Mean y2": mean_yield[1],
        "Mean y3": mean_yield[2],
        "Mean y4": mean_yield[3],
        "Mean y5": mean_yield[4],
        "Mean y7": mean_yield[5],
        "Mean y10": mean_yield[6],
        "Std Y1": std_yield[0],
        "Std Y2": std_yield[1],
        "Std Y3": std_yield[2],
        "Std Y4": std_yield[3],
        "Std Y5": std_yield[4],
        "Std Y7": std_yield[5],
        "Std Y10": std_yield[6],
        "Mean Slope": mean_slope,
        "Std Slope": std_slope,
        "Mean SigY": mean_sigY,
        "Std SigY": std_sigY,
        "Mean HPRmSR": mean_HPR5mSR,
        "Std HPRmSR": std_HPR5mSR,
        "Mean HPRm1": mean_HPR5m1,
        "Std HPRm1": std_HPR5m1,
        "Mean SR": mean_SR,
        "Std SR": std_SR,
        "Mean PD": mean_PD,
        "Std PD": std_PD,
        "Mean StdR": mean_stdR,
        "Std StdR": std_stdR,
        "Mean ExR": mean_ExR,
        "Std ExR": std_ExR,
        "Mean ExR1": mean_ExR1,
        "Std ExR1": std_ExR1,
        "Mean ExR2":mean_ExR2,
        "Std ExR2": std_ExR2,
        "Mean ExR3": mean_ExR3,
        "Std ExR3": std_ExR3,
        "Mean ExR4": mean_ExR4,
        "Std ExR4": std_ExR4,
        "Mean ExR5": mean_ExR5,
        "Std ExR5": std_ExR5,
        "Mean ExR7": mean_ExR7,
        "Std ExR7": std_ExR7,
        "Mean ExR10": mean_ExR10,
        "Std ExR10": std_ExR10,
        "Mean StdB1": mean_stdB1,
        "Std StdB1": std_stdB1,
        "Mean StdB2": mean_stdB2,
        "Std StdB2": std_stdB2,
        "Mean StdB3": mean_stdB3,
        "Std StdB3": std_stdB3,
        "Mean StdB4": mean_stdB4,
        "Std StdB4": std_stdB4,
        "Mean StdB5": mean_stdB5,
        "Std StdB5": std_stdB5,
        "Mean StdB7": mean_stdB7,
        "Std StdB7": std_stdB7,
        "Mean StdB10": mean_stdB10,
        "Std StdB10": std_stdB10
    }

    # Convert the dictionary to a DataFrame row
    result_row = pd.DataFrame([stats])
    return result_row

In [32]:
#result_row = process_file_and_get_stats('results_20240726_173905.h5')
#print(result_row)


In [33]:
def process_multiple_files(file_list):
    # Initialize an empty DataFrame to collect results
    results_df = pd.DataFrame()

    # Iterate over each file in the list
    for file_name in file_list:
        # Process the file and get the result row
        result_row = process_file_and_get_stats(file_name)
        
        # Append the result row to the results DataFrame
        results_df = pd.concat([results_df, result_row], ignore_index=True)
    
    # Save the results DataFrame to a CSV file
    results_df.to_csv('aggregate_resultsLongV2.csv', index=False)
    print("Results saved to 'aggregate_resultsLongV2.csv'.")
    return results_df


In [None]:
# List of filenames for case 1: of rhoA = -0.015 and rhoB = 0.025
file_names1 = [
    'Data/Model Disagreement/results_20240726_173905.h5', #DEL = 0.1
    'Data/Model Disagreement/results_20240726_201358.h5', #DEL = 0.2
    'Data/Model Disagreement/results_20240726_222104.h5', #DEL = 0.3
    'Data/Model Disagreement/resultsLong_20240726_211654.h5', #DEL = 0.4
    'Data/Model Disagreement/resultsLong_20240726_232846.h5', #DEL = 0.6
    'Data/Model Disagreement/results_20240726_225143.h5', #DEL = 0.7
    'Data/Model Disagreement/results_20240726_232958.h5', #DEL = 0.8
    'Data/Model Disagreement/results_20240727_095712.h5', #DEL = 0.5
    'Data/Model Disagreement/resultsLong_20240727_100833.h5', #DEL = 0.0
    'Data/Model Disagreement/resultsLong_20240728_131427.h5'  #DEL = 0.9
]
# If you want to run the false consensus bias, uncomment the following lines and comment out the file_names1 lines above:
# file_names1 = [
#     'resultsLong_20240727_100833.h5', #DEL = 0.0
#     'results_20250121_211741.h5', 
#     'results_20250124_161548.h5',
#     'results_20250124_165259.h5',
#     'results_20250124_175244.h5',
#     'results_20250125_155203.h5',
#     'results_20250127_052904.h5',
#     'results_20250127_083607.h5',
#     'results_20250127_150049.h5',
# ]
# Process all files and save the results
results_df = process_multiple_files(file_names1)

Results saved to 'aggregate_resultsLongV2.csv'.
