In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from Data_handler import Data

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import statsmodels.api as sm
import os

# 2 Loading the data

In [None]:
# Prepare data
Data_instance = Data()               # create instance of the class Data()
data = Data_instance.get_data()      # store the data of Data_instance

In [None]:
display(data)

# 3 Betting Against Beta

## Question a)

In [None]:
# Overview of the data
display(data.head())

In [None]:
data = Data_instance.get_rolling_beta().copy().dropna() # Get the rolling betas from the data. This is a method from the class Data, in Data_handler.py
display(data)

In [None]:
data_Qb = data.copy().dropna()
print("Initial number of observations: ", data.shape[0])
print("Final number of observations: \t", data_Qb.shape[0])

## Question b)

### Equally weighted portfolios

In [None]:
# Create deciles based on Beta value
data_Qb["EW_monthly_decile"] = data_Qb.groupby("date")["beta"].transform(lambda x: pd.qcut(x, 10, labels=False, duplicates='drop'))

# Compute monthly return per decile group
data_Qb["EW_monthly_return"] = data_Qb.groupby(["date", "EW_monthly_decile"])["ret"].transform("mean")

display(data_Qb)#.sample(5)) # to get an overview of the dataframe

### Value weighted portfolios

In [None]:
# Add company market capitalization
data_Qb['MktCap'] = np.abs(data_Qb.prc) * data_Qb.shrout

# Create monthly deciles based MktCap
data_Qb["VW_monthly_decile"] = data_Qb["EW_monthly_decile"] #data_Qb.groupby("date")["MktCap"].transform(lambda x: pd.qcut(x, 10, labels=False, duplicates='drop'))

# Compute the total market capitaliztion for each decile, each month
data_Qb["MktCap_sum"] = data_Qb.groupby(["date", "VW_monthly_decile"])["MktCap"].transform("sum")

# Create weight of the stock to the decile portofolio, for each month
data_Qb["VW_monthly_weight"] = data_Qb["MktCap"] / data_Qb["MktCap_sum"]

# Compute each stock's monthly return in the VW portoflio, for each month, decile
data_Qb["VW_monthly_return"] = data_Qb["ret"] * data_Qb["VW_monthly_weight"]

# Clean the dataframe and delete useless columns: VW_monthly_weight, MktCap_sum
data_Qb = data_Qb.drop(columns=["MktCap_sum"])

display(data_Qb.head(5)) # to get an overview of the dataframe

### Auxiliary functions for plots

In [None]:
def plot_from_lists(mean, std, sharpe, plot_color = 'blue'):
    deciles = list(range(len(mean)))

    _, axs = plt.subplots(1, 3, figsize=(25, 7), sharey=False)

    axs[0].bar(deciles, mean, color=plot_color)
    axs[0].set_title("Average portolio mean return")
    axs[0].set_xticks(deciles)
    axs[0].set_xlabel("Decile")
    axs[0].set_ylabel("Annualized return")

    axs[1].bar(deciles, std, color=plot_color)
    axs[1].set_title("Average portolio annualized standard deviation")
    axs[1].set_xticks(deciles)
    axs[1].set_xlabel("Decile")
    axs[1].set_ylabel("Annualized standard deviation")

    axs[2].bar(deciles, sharpe, color=plot_color)
    axs[2].set_title("Average portolio annualized sharpe ratio")
    axs[2].set_xticks(deciles)
    axs[2].set_xlabel("Decile")
    axs[2].set_ylabel("Annualized sharpe ratio")
    
    return plt

In [None]:
def get_mean_std_sr_EW(data, decile_col_name, decile_return_col_name):

    deciles = list(data[decile_col_name].unique())
    deciles.sort()

    MEAN, STD, SR = [], [], []

    for decile in deciles:
        d = data[data[decile_col_name] == decile]#.head(2)
        rf = d["tmytm"].mean() * 12
        mean = d[decile_return_col_name].mean() * 12
        std = d[decile_return_col_name].std() * np.sqrt(12)
        sr = (mean - rf) / std
        MEAN.append(mean)
        STD.append(std)
        SR.append(sr)
    
    return MEAN, STD, SR

def get_mean_std_sr_VW(data, decile_col_name, decile_return_col_name):

    deciles = list(data[decile_col_name].unique())
    deciles.sort()

    MEAN, STD, SR = [], [], []

    ret_vw = data.groupby(['date', decile_col_name])[decile_return_col_name].sum().reset_index()
    
    MEAN = ret_vw.groupby(decile_col_name)[decile_return_col_name].mean() * 12
    STD =  ret_vw.groupby(decile_col_name)[decile_return_col_name].std() * np.sqrt(12)

    for decile in deciles:
        d = data[data[decile_col_name] == decile]
        rf = d["tmytm"].mean() * 12
        mean = MEAN[decile]
        std = STD[decile]
        sr = (mean - rf) / std
        SR.append(sr)
    
    return MEAN, STD, SR

# def plot_mean_std_sr(data, EW = False, VW = False, decile_col='' question = None, plot_name = None, show=True):

#     if not os.path.exists("Figures"):
#             os.makedirs("Figures")

#     if EW and VW:
#         raise ValueError("You cannot specify both 'EW=True' and 'VW=True'.")
#     elif EW:
#         mean, std, sharpe = get_mean_std_sr_EW(data, 'EW_monthly_decile', 'EW_monthly_return')
#         plot = plot_from_lists(mean, std, sharpe, plot_color = 'blue')
#         plot.suptitle(f'Average portolio annualized mean return, standard deviation and sharpe ratio ({plot_name})')
#         plot.savefig(f"Figures/question_{question}_plot_{plot_name}")
#         if show: 
#             plot.show()
#     elif VW:
#         mean, std, sharpe = get_mean_std_sr_VW(data_Qb, 'VW_monthly_decile', 'VW_monthly_return')
#         plot = plot_from_lists(mean, std, sharpe, plot_color = 'blue')
#         plot.suptitle(f'Average portolio annualized mean return, standard deviation and sharpe ratio ({plot_name})')
#         plot.savefig(f"Figures/question_{question}_plot_{plot_name}")
#         if show: 
#             plot.show()
#     else:
#         raise ValueError("You need to specify either 'EW=True' or 'VW=True'.")
    

def plot_mean_std_sr(data, EW = False, VW = False, EW_decile_col='EW_monthly_decile', EW_return_col = 'EW_monthly_return', VW_decile_col='VW_monthly_decile', VW_return_col = 'VW_monthly_return', question = None, plot_name = None, show=True):

    if not os.path.exists("Figures"):
            os.makedirs("Figures")

    if EW and VW:
        raise ValueError("You cannot specify both 'EW=True' and 'VW=True'.")
    
    elif EW:
        mean, std, sharpe = get_mean_std_sr_EW(data, EW_decile_col, EW_return_col)
        plot = plot_from_lists(mean, std, sharpe, plot_color = 'blue')
        plot.suptitle(f'Average portolio annualized mean return, standard deviation and sharpe ratio ({plot_name})')
        plot.savefig(f"Figures/question_{question}_plot_{plot_name}")
        if show: 
            plot.show()
    
    elif VW:
        mean, std, sharpe = get_mean_std_sr_VW(data, VW_decile_col, VW_return_col)
        plot = plot_from_lists(mean, std, sharpe, plot_color = 'blue')
        plot.suptitle(f'Average portolio annualized mean return, standard deviation and sharpe ratio ({plot_name})')
        plot.savefig(f"Figures/question_{question}_plot_{plot_name}")
        if show: 
            plot.show()
    
    else:
        raise ValueError("You need to specify either 'EW=True' or 'VW=True'.")

### Plot the mean returns, volatility, and sharpe ratios for EW and VW portfolios

In [None]:
plot_mean_std_sr(
    data=data_Qb,
    EW = True, 
    EW_decile_col='EW_monthly_decile',
    EW_return_col='EW_monthly_return',
    VW = False, 
    question = '3b',
    plot_name= 'equal_weight', 
    show = True
)

plot_mean_std_sr(
    data=data_Qb,
    EW = False, 
    VW = True,
    VW_decile_col='VW_monthly_decile',
    VW_return_col='VW_monthly_return',
    question = '3b',
    plot_name= 'value_weight', 
    show = True
)

## Question c) and d)

### Prepare data for BAB

In [None]:
data_bab = Data_instance.get_rolling_beta().copy().dropna() # Create a separate dataset for the BAB question
data_bab.head()

### Built the function for determining BAB weights

In [None]:
def get_bab_weights(data):
    """Computes the weights of the Betting-Against-Beta portfolio."""
    df = data.copy()
    df['z'] = df.groupby('date')['beta'].rank()                     # Assign each beta a rank, for each month
    df['z_mean'] = df.groupby('date')['z'].transform('mean')        # Calculate the monthly mean the rank
    df['norm'] = np.abs(df['z']- df['z_mean'])                      # Compute abs distance of rank to mean rank
    df['sum_norm'] = df.groupby('date')['norm'].transform("sum")    # Sum the distance
    df['k'] = 2 / df['sum_norm']                                    # Compute the k

    # Compute the BAB weights
    df['wH'] = df['k'] * np.maximum(0, df['z'] - df['z_mean'])      
    df['wL'] = df['k'] * np.minimum(0, df['z'] - df['z_mean'])

    # Drop irrelevant columns
    df = df.drop(columns=["z_mean", 'z', 'norm', 'sum_norm', 'k'])

    # Compute the weighted betas
    df['bH'] = df['wH'] * df['beta']
    df['bL'] = df['wL'] * df['beta']

    # Compute the individual returns of the portfolios H and L
    df['rH'] = df['wH'] * df['ret']
    df['rL'] = df['wL'] * df['ret']

    # Compute the individual excess returns of the portfolios H and L
    # df['rHe'] = df['rH'] - df['tmytm']
    # df['rLe'] = df['rL'] - df['tmytm']

    # Compute the return and betas of the two portfolios for each period
    df_ = df.groupby('date')[['rH', 'rL', 'bH', 'bL']].sum().reset_index()                # Get the value for each period
    df_rf = df.groupby('date')[['date', 'tmytm', 'Rm_e']].first()[['tmytm','Rm_e']]       # Dataframe for the risk free rate
    df_ = pd.merge(df_, df_rf, on = 'date')                                               # Merge the risk-free rate

    # Finally create the BAB portfolio return
    df_['rBAB'] = (df_['rL'] - df_['tmytm']) / df_['bL'] - (df_['rH'] - df_['tmytm']) / df_['bH']

    return df_

In [None]:
# Create the weights rBAB
data_BAB = get_bab_weights(data_bab)
display(data_BAB)

### Get the return, std and sharpe ratio of the BAB strategy

In [None]:
BAB_ret = data_BAB.rBAB.mean() * 12
BAB_std = data_BAB.rBAB.std() * np.sqrt(12)
BAB_rfr = data_BAB.tmytm.mean() * 12
BAB_shr = (BAB_ret - BAB_rfr) / BAB_std

# Compute the CAPM alpha
data_BAB['one'] = 1 # Create the column for the constant
model = sm.OLS(data_BAB['rBAB'], data_BAB[['one', 'Rm_e']]).fit() # Fit CAPM

print("Betting-against-beta strategy")
print(" - Mean return: {:.2f}%".format(BAB_ret * 100))
print(" - Standard deviation: {:.2f}%".format(BAB_std * 100))
print(" - Sharpe ratio: {:.2f}".format(BAB_shr))
print(" - CAPM alpha: {:.2f}".format(model.params.iloc[0] * 12))


# 4 Momentum Strategy

## Question a)

### Create the deciles based on the 12-month cumulative return, excluding short term reversal

In [None]:
display(data.head())

In [None]:
data.sort_values(by=['permno', 'date'], inplace=True)
display(data.head())

In [None]:
# Sort data by permno, then date
data_mom = data.copy()
data_mom.sort_values(by=['permno', 'date'], inplace=True)

# Add a column for momentum return (last 12months, excluding last month)
data_mom['roll_ret'] = data_mom.groupby('permno').ret.transform(lambda x: x.rolling(11, closed='left').sum())
display(data_mom.head())

In [None]:
# Create deciles for the momentum returns
data_mom['decile_mom'] = data_mom.groupby('date')['roll_ret'].transform(lambda x: pd.qcut(x, 10, labels=False, duplicates='drop'))
display(data_mom.head())
data_mom.to_csv('data_mom.csv')

### Equally weighted portfolios

In [None]:
# Compute the monthly return for each decile (this is the average of the individual monthly return of each stock from each decile)
data_mom['EW_monthly_return'] = data_mom.groupby(['date', 'decile_mom'])['ret'].transform('mean')

data_mom.dropna(axis = 0, inplace=True)
data_mom.sort_values(by=['date', 'decile_mom'], inplace=True)
display(data_mom.head(5))

# # Based on data_mom, only keep one observation per decile per month
# data_mom_clean = data_mom.groupby(['date', 'decile_mom', 'EW_monthly_return', 'tmytm']).first().reset_index()
# data_mom_clean = data_mom_clean[['date', 'decile_mom', 'EW_monthly_return', 'tmytm']]
# display(data_mom_clean.sample(5)) # get overview of a random sample of 5 observations of the dataframe

### Value weighted portfolios

In [None]:
# Add company market capitalization
data_mom['MktCap'] = np.abs(data_mom.prc) * data_mom.shrout

# Compute the total market capitaliztion for each decile, each month
data_mom["MktCap_sum"] = data_mom.groupby(["date", "decile_mom"])["MktCap"].transform("sum")

# Create weight of the stock to the decile portofolio, for each month
data_mom["VW_monthly_weight"] = data_mom["MktCap"] / data_mom["MktCap_sum"]

# Compute each stock's monthly return in the VW portoflio, for each month, decile
data_mom["VW_monthly_return"] = data_mom["ret"] * data_mom["VW_monthly_weight"]

# Clean the dataframe and delete useless columns: MktCap_sum
data_mom = data_mom.drop(columns=["MktCap_sum"])

display(data_mom) # to get an overview of the dataframe

### Plotting the graphs

In [None]:
# Plot the mean, std and sharpe ratio for the momentum decile EW portfolios
plot_mean_std_sr(
    data=data_mom,
    EW = True,
    EW_decile_col='decile_mom',
    EW_return_col='EW_monthly_return',
    VW = False, 
    question = '4a',
    plot_name= 'momentum_equal_weight', 
    show = True
)

# Plot the mean, std and sharpe ratio for the momentum decile VW portfolios
plot_mean_std_sr(
    data = data_mom,
    EW = False,
    VW = True, 
    VW_decile_col='decile_mom',
    VW_return_col='VW_monthly_return',
    question = '4a',
    plot_name= 'momentum_value_weight', 
    show = True
)

## Question b)

### Prepare data

In [None]:
# Create a column 'leg' that is 1 if the decile is 7, 8 or 9, and -1 if decile is 0, 1, 2
data_mom['leg'] = np.nan
data_mom.loc[data_mom['decile_mom'] <= 2, 'leg'] = -1
data_mom.loc[data_mom['decile_mom'] >= 7, 'leg'] = 1

# Drop the observations that are in none of the legs
data_mom_b = data_mom.dropna().copy()
data_mom_b.drop(columns=['EW_monthly_return', 'VW_monthly_weight', 'VW_monthly_return'], inplace=True) # Drop the irrelevant columns
display(data_mom_b)

### Equally weighted portfolios

In [None]:
# Create a dataframe that aggregates takes the average return for each leg, at each month. Also keep the risk free rate
EW_data_mom = data_mom_b.groupby(['date', 'leg']).agg({
    'ret': 'mean', 
    'tmytm': 'first',
    }).reset_index()

EW_data_mom_piv = EW_data_mom.pivot(index='date', columns='leg', values='ret') # Pivot the data
EW_data_mom_piv['EW_return'] = EW_data_mom_piv[1] - EW_data_mom_piv[-1] # Compute the return of the EW momentum strategy as being the difference between the two legs
EW_data_mom_piv['tmytm'] = EW_data_mom.groupby('date')['tmytm'].first() # Add the risk free rate
EW_data_mom_piv = EW_data_mom_piv[['EW_return', 'tmytm']]   # Keep only the relevant columns
# display(EW_data_mom_piv)

# Compute mean, std and Sharpe ratio
mean = EW_data_mom_piv['EW_return'].mean() * 12
std = EW_data_mom_piv['EW_return'].std() * np.sqrt(12)
rf = EW_data_mom_piv['tmytm'].mean() * 12

# Dispay the results
print("Momentum strategy based on equally weighted portfolios")
print(" - Expected return:\t {:.2f}%".format(mean))
print(" - Standard deviation:\t {:.2f}%".format(std))
print(" - Sharpe ratio:\t {:.2f}".format((mean - rf)/ std))

###  Value weighted portfolios

In [None]:
VW_data_mom = data_mom_b.copy()

VW_data_mom['VW_wL'] = (VW_data_mom['leg'] == -1) * VW_data_mom['MktCap']
VW_data_mom['VW_wL_sum'] = VW_data_mom.groupby('date')['VW_wL'].transform('sum')
VW_data_mom['VW_wH'] = (VW_data_mom['leg'] == 1) * VW_data_mom['MktCap']
VW_data_mom['VW_wH_sum'] = VW_data_mom.groupby('date')['VW_wH'].transform('sum')
VW_data_mom['VW_wL'] = VW_data_mom['VW_wL'] / VW_data_mom['VW_wL_sum']
VW_data_mom['VW_wH'] = VW_data_mom['VW_wH'] / VW_data_mom['VW_wH_sum']
VW_data_mom = VW_data_mom.drop(columns=['VW_wL_sum', 'VW_wH_sum'])
VW_data_mom['VW_w'] = VW_data_mom['VW_wL'] * VW_data_mom['leg'] + VW_data_mom['VW_wH'] * VW_data_mom['leg']
VW_data_mom['VW_ret'] = VW_data_mom['VW_w'] * VW_data_mom['ret']


# Create a dataframe that aggregates the returns, at each month and keep the risk free rate
VW_data_mom_ = VW_data_mom.groupby(['date']).agg({
    'VW_ret': 'sum', 
    'tmytm': 'first',
    }).reset_index()

display(VW_data_mom_)

# Compute mean, std and Sharpe ratio
mean = VW_data_mom_['VW_ret'].mean() * 12
std = VW_data_mom_['VW_ret'].std() * np.sqrt(12)
rf = VW_data_mom_['tmytm'].mean() * 12

# Dispay the results
print("Momentum strategy based on equally weighted portfolios")
print(" - Expected return:\t {:.2f}%".format(mean))
print(" - Standard deviation:\t {:.2f}%".format(std))
print(" - Sharpe ratio:\t {:.2f}".format((mean - rf)/ std))