# Factors calculation for Fama French Model

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import statsmodels.api as sm
#from sklearn.linear_model import LinearRegression
import scipy, scipy.stats
pd.options.mode.chained_assignment = None  # default='warn'

In [2]:
cd F:\fama n french\

F:\fama n french


In [3]:
#Set years of data to look at: 2007-2018
start = 2007
end = 2018

In [35]:
df=pd.read_csv('data_smb_yip.csv')

In [36]:
df.head()

Unnamed: 0.1,Unnamed: 0,Symbol,Year,Return,TRI,TB90,MKTCAP,PB
0,0,TCM,2007,394.1667,34542.6016,3.9,2009.5763,4.8272
1,1,BB,2007,196.5454,34542.6016,3.9,50392.207,17.3708
2,2,QUX,2007,52.4324,34542.6016,3.9,914.2384,2.0376
3,3,TRE,2007,300.0,34542.6016,3.9,2908.6021,2.6323
4,4,VT,2007,46.1039,34542.6016,3.9,2296.7551,2.1184


In [38]:
df.drop('Unnamed: 0',axis=1,inplace=True)# drop column 

In [39]:
df.head()

Unnamed: 0,Symbol,Year,Return,TRI,TB90,MKTCAP,PB
0,TCM,2007,394.1667,34542.6016,3.9,2009.5763,4.8272
1,BB,2007,196.5454,34542.6016,3.9,50392.207,17.3708
2,QUX,2007,52.4324,34542.6016,3.9,914.2384,2.0376
3,TRE,2007,300.0,34542.6016,3.9,2908.6021,2.6323
4,VT,2007,46.1039,34542.6016,3.9,2296.7551,2.1184


In [17]:
def dfResetIndex (df):
    """
    Resets index each time a df is made
    """
    
    return df.reset_index(drop = True)

In [18]:
def calcRf (df):
    """
    Returns Rf for FF
    """
    df = dfResetIndex (df)
    return round(df.TB90[1], 4)

In [23]:
def fillMktPrem (df, s, e):
    """
    Fills in MktPrem to DF
    """
    dfNew = df
    dfNew['MKtReturn'] = ""
    dfNew['MktPrem'] = ""
    fileRange = np.arange(s, e)    
    for i in fileRange:
        dfNew['MKtReturn'].loc[dfNew['Year'] == i+1] = (dfNew['TRI'].loc[dfNew['Year'] == i+1].iloc[0] / dfNew['TRI'].loc[dfNew['Year'] == i].iloc[0] - 1)*100
        dfNew['MktPrem'].loc[dfNew['Year'] == i+1] = dfNew['MKtReturn'].loc[dfNew['Year'] == i+1].iloc[0] - dfNew['TB90'].loc[dfNew['Year'] == i+1].iloc[0]
    return dfNew

In [24]:
def calcMktPrem (df):
    """
    Returns Mkt Premium for FF
    """
    df = dfResetIndex(df)
    return round(df.MktPrem[1], 4)

In [8]:
def calcReturn(df):
    """
    Gives return of individual security from portfolio
    """
   
    return round(df.Return.mean(), 4)

In [9]:
def calcSMB(df):
    """
    Returns SMB for FF
    """
    #Define Quantile
    SQuantile = 0.3
    LQuantile = 0.7
    df["SMB"] = ""
    
    #Assigns stock size based on market cap
    df.SMB[df.MKTCAP <= df.MKTCAP.quantile(SQuantile)] = "SCap"
    df.SMB[(df.MKTCAP > df.MKTCAP.quantile(SQuantile)) & (df.MKTCAP < df.MKTCAP.quantile(LQuantile))] = "MCap"
    df.SMB[df.MKTCAP >= df.MKTCAP.quantile(LQuantile)] = "LCap"
    
    #Calculates average return of stocks in portfolio subset based on size
    SmallCapReturn = df.Return.loc[df["SMB"] == "SCap"].mean()
    LargeCapReturn = df.Return.loc[df["SMB"] == "LCap"].mean()
    
    #Returns SMB based on definition
    SMB = SmallCapReturn - LargeCapReturn
    return round(SMB, 4)


In [None]:
def calcHML (df):
    """
    Returns HML for FF
    Uses inverse of P/B as proxy for Book/Mkt
    """
    #Define Quantile
    SQuantile = 0.3
    LQuantile = 0.7
    df["HML"] = ""
    df["BP"] = df.PB**(-1) #Create Book/MktValue Proxy
    
    #Assigns stock size based on market cap
    df.HML[df.BP <= df.BP.quantile(SQuantile)] = "SValue"
    df.HML[(df.BP > df.BP.quantile(SQuantile)) & (df.BP < df.BP.quantile(LQuantile))] = "MValue"
    df.HML[df.BP >= df.BP.quantile(LQuantile)] = "LValue"
    
    #Calculates average return of stocks in portfolio subset based on size
    SmallValueReturn = df.Return.loc[df["HML"] == "SValue"].mean()
    LargeValueReturn = df.Return.loc[df["HML"] == "LValue"].mean()
    
    #Returns SMB based on definition
    HML = SmallValueReturn - LargeValueReturn
    return round(HML, 4)

In [26]:
###Fill in MktPrem
#This part should only be done once
df = fillMktPrem (df, start, end)

In [27]:
###Continue Cleanup
#Drop First Year
df = df.loc[df['Year'] != start]
df = df.reset_index(drop = True)

In [31]:
#Convert all inputs used to numeric

#df.iloc[:, 2:] = df.iloc[:, 2:].convert_objects(convert_numeric=True)

In [32]:
#Create Fama French 3 factor model for Aggressive Strategy
FFA = pd.DataFrame(columns =
                  ["Year",
                   "Return",
                   "Rf",
                   "MktPrem",
                   "HML",
                   "SMB"                    
                  ])
FFAIndex = 0 
for i in range(start+1, end+1):
    FFA.loc[FFAIndex] = [i, 
                    calcReturn(df.loc[df['Year'] == i]), 
                    calcRf(df.loc[df['Year'] == i]), 
                    calcMktPrem(df.loc[df['Year'] == i]), 
                    calcHML(df.loc[df['Year'] == i]),
                     calcSMB(df.loc[df['Year'] == i])
                   ]
    FFAIndex += 1
FFA['Year'] = FFA['Year'].astype(int)

In [33]:
FFA

Unnamed: 0,Year,Return,Rf,MktPrem,HML,SMB
0,2008,36.8584,2.4,-1.0967,43.6863,18.7416
1,2009,12.8186,0.2,-17.5444,36.1373,12.4716
2,2010,68.1621,0.67,10.8205,60.3519,14.7011
3,2011,52.0113,0.89,8.217,61.1273,24.8434
4,2012,29.9158,1.03,-5.2575,46.9944,5.493
5,2013,38.025,0.99,9.0373,10.3304,13.0665
6,2014,54.0386,0.94,26.1894,66.1083,25.7608
7,2015,20.5506,0.37,-9.0489,61.2438,-17.1839
8,2016,74.2083,0.5,8.1886,18.0131,26.1487
9,2017,31.4535,0.71,6.5242,21.5734,-0.7788
