<a href="https://colab.research.google.com/github/bbcx-investments/notebooks/blob/main/factor_investing/quintiles_boxes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import plotly.express as px
try :
  from pandas_datareader import DataReader as pdr
except :
  !pip install pandas-datareader
  from pandas_datareader import DataReader as pdr

# associate names of French files with characteristic names
files = {'Earnings to price ratio': 'Portfolios_Formed_on_E-P',\
 'Variance': 'Portfolios_Formed_on_VAR',\
 'Accruals': 'Portfolios_Formed_on_AC',\
 'Residual variance': 'Portfolios_Formed_on_RESVAR',\
 'Net equity issuance': 'Portfolios_Formed_on_NI',\
 'Beta': 'Portfolios_Formed_on_BETA',\
 'Cash flow to price': 'Portfolios_Formed_on_CF-P',\
 'Market equity': 'Portfolios_Formed_on_ME',\
 'Book to market ratio': 'Portfolios_Formed_on_BE-ME',\
 'Dividend to price ratio': 'Portfolios_Formed_on_D-P',\
 'Investment rate': 'Portfolios_Formed_on_INV',\
 'Momentum': '10_Portfolios_Prior_12_2',\
 'Short term reversal': '10_Portfolios_Prior_1_0',\
 'Long term reversal': '10_Portfolios_Prior_60_13'}

# sort characteristics in alphabetical order
keys = np.sort(list(files.keys()))

# initialize data frame
quintiles = ['Lo 20','Qnt 2','Qnt 3','Qnt 4','Hi 20']
df = pd.DataFrame(dtype=float,index=range(1926,2022),columns=pd.MultiIndex.from_product((keys,quintiles)))

# loop over files
for key in keys :

    # read file
    f = files[key]
    d = pdr(f,'famafrench',start=1920)[2]/100

    # for momentum and reversal files, combine deciles to form quintiles
    if 'Portfolios_Formed_on' not in f :
        cols = d.columns.to_list()
        d['Lo 20'] = d[cols[:2]].mean(axis=1)
        d['Qnt 2'] = d[cols[2:4]].mean(axis=1)
        d['Qnt 3'] = d[cols[4:6]].mean(axis=1)
        d['Qnt 4'] = d[cols[6:8]].mean(axis=1)
        d['Hi 20'] = d[cols[8:]].mean(axis=1)
    
    # for other files, extract quintiles
    d = d[quintiles].copy()

    # convert dates to integers
    d = d.reset_index()
    d['Date'] = d.Date.astype(str).astype(int)
    d = d.set_index('Date')

    # add to df
    df[key] = d

# create figure for single characteristic with given start date   
def fig(key,start) :
    d = df[key].stack()
    d.name = 'Return'
    d.index.names = ['Date','Quintile']
    d = d.reset_index()
    d = d[d.Date>=start]
    fig = px.box(d, x='Quintile',y='Return')
    fig.layout.xaxis['title'] = ''
    fig.layout.yaxis['title'] = 'Return'
    fig.update_layout(margin=dict(l=60, r=20, t=20, b=60)) 
    fig.update_xaxes(title_font_size=16) 
    fig.update_yaxes(title_font_size=16) 
    fig.update_layout(font_size=14)
    fig.layout.template='plotly_dark'
    fig.update_yaxes(tickformat='.0%')
    return fig

# example
key = 'Momentum'
start = 1970
fig(key,start)