In [2]:
import pandas as pd
import numpy as np
import jdatetime

pd.options.mode.chained_assignment = None

## Calculate daily and monthly returns and market caps for SMB and HML Calculation

In [None]:
# Extract book value
final_list = []
for file_number in range(1, 76):
    print(file_number)
    path = f'E:/Thesis/New Sampling/Factor Model'\
        f'/Balance Sheet/{file_number}.xlsx'
    df = pd.read_excel(path, skiprows=7, usecols=[1, 5, 6, 7, 8, 9])
    condition = (df['دوره مالی'] == 'جمع حقوق صاحبان سهام')
    be = df.loc[condition].values[0].tolist()[1:]
    be = [i * 1000000 for i in be]
    final_list.append(be)
pd.DataFrame(final_list).to_excel('BE.xlsx')

In [None]:
# Extract market value
final_list = []
for file_number in range(1, 76):
    print(file_number)
    ticker_list = []
    path = f'E:/Thesis/New Sampling/Daily Data - Bourseview/{file_number}.xlsx'
    df = pd.read_excel(
        path,
        skiprows=7,
        usecols=[1, 2, 3, 7, 8, 11, 17],
        names=[
            'date', 'j_date', 'open', 'adj_close',
            'close', 'market_cap', 'P/B'
        ],
        na_values='-'
    )
    df.dropna(subset=['open'], inplace=True)
    df.drop(columns='open', inplace=True)
    years_list = ['1397', '1396', '1395', '1394', '1393']
    for year in years_list:
        condition = df['j_date'].str.startswith(year)
        market_cap = df.loc[condition].max()['market_cap']
        ticker_list.append(market_cap)
    final_list.append(ticker_list)
df_1 = pd.DataFrame(final_list)
df_1.to_excel('ME.xlsx')

In [5]:
# Read market cap from excel file after a few adjustments
me_path = r'C:\Users\behnood\Desktop\Thesis\TSETMC\Factor Model\ME_final.xlsx'
me_df = pd.read_excel(
    me_path,
    usecols=[*range(7)]
)

In [6]:
# Read book value to market value ratio from excel file
bm_path = r'C:\Users\behnood\Desktop\Thesis\TSETMC\Factor Model\BM_final.xlsx'
bm_df = pd.read_excel(
    bm_path,
    usecols=[0, 2, 3, 4, 5, 6, 7],
    names=['ticker_num', 'ticker', '97', '96', '95', '94', '93']
)

In [7]:
# Read :Bourseview" data for market cap
# Concat all 75 tickers' data
bv_list = []
for file_number in range(1, 76):
    bv_path = f'E:/Thesis/New Sampling/Daily Data - Bourseview/'\
        f'{file_number}.xlsx'
    bv_df = pd.read_excel(
        bv_path,
        skiprows=7,
        usecols=[2, 3, 11],
        names=['date', 'open', 'market_cap'],
        na_values='-'
    )
    # Change order from old to new dates
    bv_df = bv_df[::-1].reset_index(drop=True)
    bv_df['date'] = bv_df['date'].str.replace('-', '')
    # Delete non-traded days
    bv_df.dropna(subset=['open'], inplace=True)
    bv_df.drop(columns='open', inplace=True)
    # Create monthly dataframe
    bv_df = bv_df.groupby(bv_df['date'].str[:6]).last()
    bv_df = bv_df.drop(columns=['date']).reset_index()
    bv_df.insert(1, 'ticker_num', file_number)
    bv_list.append(bv_df)
bv_df = pd.concat(bv_list, ignore_index=True)
bv_df = bv_df.loc[(bv_df['date'] >= '139400') & (bv_df['date'] <= '139900')]
bv_df.reset_index(drop=True, inplace=True)

In [8]:
# Read "rahavard 365" data for calculating returns
# Concat all 75 tickers' data
close_list = []
for file_number in range(1, 76):
    rahavard_path = f'E:/Thesis/New Sampling/Daily Data - Rahavard 365/'\
        f'{file_number}.txt'
    df = pd.read_csv(
        rahavard_path,
        usecols=[2, 7],
        names=['date', 'close'],
        header=0,
        dtype={'date': str},
        parse_dates=[0]
    )
    # Solve index reading problem, pandas add 2 index to the df
    df.reset_index(drop=True, inplace=True)
    # Convert to shamsi dates
    df['date'] = df['date'].apply(
        lambda x: jdatetime.date.fromgregorian(date=x).strftime('%Y%m%d')
    )
    # Create monthly dataframe
    df = df.groupby(df['date'].str[:6]).last()
    df = df.drop(columns=['date']).reset_index()
    df.insert(1, 'ticker_num', file_number)
    df['monthly_return'] = df['close'].pct_change()
    close_list.append(df)
df = pd.concat(close_list, ignore_index=True)
df = df.loc[(df['date'] >= '139400') & (df['date'] <= '139900')]

In [9]:
# Merge market cap and price dfs
merged_df = pd.merge(df, bv_df, on=['ticker_num', 'date'])

In [10]:
# Extract all months from index df
index_path = r'E:\Thesis\New Sampling\TEDPIX\شاخص كل6.xls'
index_df = pd.read_excel(
    index_path,
    usecols=[1],
    names=['date'],
    dtype={'date': str}
)
index_df.dropna(inplace=True)
all_months = pd.Series(index_df['date'].str[:6].unique().tolist())
all_months.name = 'date'

In [11]:
# Create a list of years
years_list = me_df.columns[2:].tolist()[::-1]

## Calculating SMB and HML factors

In [12]:
# Calculating SMB and HML
smb_list = []
hml_list = []
for year in years_list:
    # Select specific year, and eliminate negative BE tickers for that year
    applicable_tickers = bm_df.loc[bm_df[year] >= 0]['ticker_num'].tolist()
    negative_be_condition = me_df['ticker_num'].isin(applicable_tickers)
    temp_me = me_df.loc[negative_be_condition][['ticker_num', 'ticker', year]]
    temp_bm = bm_df.loc[negative_be_condition][['ticker_num', 'ticker', year]]
    # Split each year ME into two groups
    conditions = [
        (temp_me[year] > temp_me[year].median()),
        (temp_me[year] <= temp_me[year].median())
    ]
    temp_bm['size'] = np.select(conditions, ['B', 'S']).tolist()
    # Split each ME group into three B/M groups
    q = [0, .3, .7, 1]
    labels = ['L', 'M', 'H']
    x_b = temp_bm.loc[temp_bm['size'] == 'B'][year]
    b_bm = pd.qcut(x=x_b, q=q, labels=labels).to_dict()
    x_s = temp_bm.loc[temp_bm['size'] == 'S'][year]
    s_bm = pd.qcut(x=x_s, q=q, labels=labels).to_dict()
    temp_bm['bm'] = pd.Series(b_bm)
    temp_bm['bm'].update(pd.Series(s_bm))
    # Extrect six portfolio ticker numbers
    temp_bm['res'] = temp_bm['size'] + temp_bm['bm']
    bh = temp_bm.loc[temp_bm['res'] == 'BH']['ticker_num'].tolist()
    bm = temp_bm.loc[temp_bm['res'] == 'BM']['ticker_num'].tolist()
    bl = temp_bm.loc[temp_bm['res'] == 'BL']['ticker_num'].tolist()
    sh = temp_bm.loc[temp_bm['res'] == 'SH']['ticker_num'].tolist()
    sm = temp_bm.loc[temp_bm['res'] == 'SM']['ticker_num'].tolist()
    sl = temp_bm.loc[temp_bm['res'] == 'SL']['ticker_num'].tolist()
    next_year = str(1 + int(year))
    next_year_months = all_months[all_months.str[2:4] == next_year]
    for month in next_year_months:
        # Set conditions
        month_condition = (merged_df['date'] == month)
        bh_condition = merged_df['ticker_num'].isin(bh)
        bm_condition = merged_df['ticker_num'].isin(bm)
        bl_condition = merged_df['ticker_num'].isin(bl)
        sh_condition = merged_df['ticker_num'].isin(sh)
        sm_condition = merged_df['ticker_num'].isin(sm)
        sl_condition = merged_df['ticker_num'].isin(sl)
        # Construct portfolios
        bh_portfolio = merged_df.loc[month_condition & bh_condition]
        bm_portfolio = merged_df.loc[month_condition & bm_condition]
        bl_portfolio = merged_df.loc[month_condition & bl_condition]
        sh_portfolio = merged_df.loc[month_condition & sh_condition]
        sm_portfolio = merged_df.loc[month_condition & sm_condition]
        sl_portfolio = merged_df.loc[month_condition & sl_condition]
        # Calculate value-weighted returns
        bh_return = np.average(
            bh_portfolio.monthly_return,
            weights=bh_portfolio.market_cap
        )
        bm_return = np.average(
            bm_portfolio.monthly_return,
            weights=bm_portfolio.market_cap
        )
        bl_return = np.average(
            bl_portfolio.monthly_return,
            weights=bl_portfolio.market_cap
        )
        sh_return = np.average(
            sh_portfolio.monthly_return,
            weights=sh_portfolio.market_cap
        )
        sm_return = np.average(
            sm_portfolio.monthly_return,
            weights=sm_portfolio.market_cap
        )
        sl_return = np.average(
            sl_portfolio.monthly_return,
            weights=sl_portfolio.market_cap
        )
        # Calculate SMB and HML. Then, add them to lists
        smb = (
            ((sh_return + sm_return + sl_return) / 3)
            - ((bh_return + bm_return + bl_return) / 3)
        )
        smb_list.append(smb)
        hml = (
            ((sh_return + bh_return) / 2)
            - ((sl_return + bl_return) / 2)
        )
        hml_list.append(hml)

In [13]:
# Write calculated SMB and HML factors to a excel file
ff_df = pd.DataFrame([smb_list, hml_list]).transpose()
ff_df.to_excel('smb_hml.xlsx', index=False)