In [2]:
import pandas as pd
import numpy as np

In [3]:
def mai_interim_to_processed(input_file, output_file):
    # Load the original CSV file
    df = pd.read_csv(input_file)

    # Calculate outputs
    df['credit_rating'] = df[['credit_rating_ni', 'credit_rating_wi']].mean(axis=1)
    df['gdp'] = df[['gdp_ni', 'gdp_wi']].mean(axis=1)
    df['house_mkt'] = df[['house_mkt_ni', 'house_mkt_wi']].mean(axis=1)
    df['inflation'] = df[['inflation_ni', 'inflation_wi']].mean(axis=1)
    df['monetary'] = df[['monetary_ni', 'monetary_wi']].mean(axis=1)
    df['oil'] = df[['oil_ni', 'oil_wi']].mean(axis=1)
    df['unemp'] = df[['unemp_ni', 'unemp_wi']].mean(axis=1)
    df['usd'] = df[['usd_ni', 'usd_wi']].mean(axis=1)

    # Create a new DataFrame with the desired columns
    new_df = df[['date', 'credit_rating', 'gdp', 'house_mkt', 'inflation', 'monetary', 'oil', 'unemp', 'usd']]

    # Save the new DataFrame to a new CSV file
    new_df.to_csv(output_file, index=False)

In [4]:
def mef_interim_to_processed(input_file, output_file):
    # Load the original CSV file
    df = pd.read_csv(input_file)

    # Calculate outputs
    df['dp'] = np.log(df['d12']) - np.log(df['index'])
    df['dy'] = np.log(df['d12']) - np.log(df['lag_index_1'])
    df['ep'] = np.log(df['e12']) - np.log(df['index'])
    df['de'] = np.log(df['d12']) - np.log(df['e12'])
    df['rvol'] = df['svar']
    df['tms']	= df['lty'] - df['tbl']
    df['dfy']	= df['baa'] - df['aaa']
    df['dfr']	= df['corpr'] - df['ltr']

    # Create a new DataFrame with the desired columns
    new_df = df[['date', 'dp',	'dy',	'ep',	'de', 'rvol',	'bm',	'ntis',	'tbl',	'lty',	'ltr',	'tms',	'dfy',	'dfr',	'infl']]

    # Save the new DataFrame to a new CSV file
    new_df.to_csv(output_file, index=False)

In [5]:
def mkt_interim_to_processed(input_file, output_file):
    # Load the original CSV file
    df = pd.read_csv(input_file)

    # Convert date columns to datetime objects
    df['date'] = pd.to_datetime(df['date'])
    df['lag_date_1'] = pd.to_datetime(df['lag_date_1'])
    df['lead_date_1'] = pd.to_datetime(df['lead_date_1'])

    # Calculate the difference in days
    df['GSPCprem'] = (df['GSPC'] / df['lag_GSPC_1'] - 1) * 252 / (df['date'] - df['lag_date_1']).dt.days * 100 - df['lag_rfr_1']
    df['lead_GSPCprem_1'] = (df['lead_GSPC_1'] / df['GSPC'] - 1) * 252 / (df['lead_date_1'] - df['date']).dt.days * 100 - df['rfr']

    # Create a new DataFrame with the desired columns
    new_df = df[['date', 'GSPCprem', 'lead_GSPCprem_1']]

    # Save the new DataFrame to a new CSV file
    new_df.to_csv(output_file, index=False)