In [None]:
import glob
import os

import pandas as pd
import numpy as np
import jdatetime
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

pd.options.mode.chained_assignment = None

# preparation (calculate MKB, SMB, and HML for estimating LIQ)

----

## 1. Calculate ME and BE, and write them to Excel files

In [None]:
# Check financial year for all tickers
financial_years_list = []
for file_number in range(1, 76):
    path = f'E:/Thesis/New Sampling/Factor Model'\
        f'/Balance Sheet 2/{file_number}.xlsx'
    df = pd.read_excel(path, skiprows=7, usecols=[1, *range(5, 15)])
    financial_years_list.append(df.columns.tolist()[1:])
pd.DataFrame(financial_years_list).to_excel('financial_years.xlsx')

In [None]:
# Extract BE
final_list = []
for file_number in range(1, 76):
    print(file_number)
    path = f'E:/Thesis/New Sampling/Factor Model'\
        f'/Balance Sheet 2/{file_number}.xlsx'
    df = pd.read_excel(path, skiprows=7, usecols=[1, *range(5, 15)])
    condition = (df['دوره مالی'] == 'جمع حقوق صاحبان سهام')
    be = df.loc[condition].values[0].tolist()[1:]
    be = [i * 1000000 for i in be]
    final_list.append(be)
pd.DataFrame(final_list).to_excel('BE_raw_new.xlsx')

In [None]:
# Extract ME
# Read Bourseview data for market cap
# Concat all 75 tickers' data
me_list = []
for file_number in range(1, 76):
    me_path = f'E:/Thesis/New Sampling/Daily Data - Bourseview/'\
        f'{file_number}.xlsx'
    me_df = pd.read_excel(
        me_path,
        skiprows=7,
        usecols=[2, 3, 11],
        names=['date', 'open', 'market_cap'],
        na_values='-'
    )
    # Change order from old to new dates
    me_df = me_df[::-1].reset_index(drop=True)
    me_df['date'] = me_df['date'].str.replace('-', '')
    # Delete non-traded days
    me_df.dropna(subset=['open'], inplace=True)
    me_df.drop(columns='open', inplace=True)
    # Create monthly dataframe
    me_df = me_df.groupby(me_df['date'].str[:4]).last()
    me_df = me_df.drop(columns=['date']).reset_index()
    me_df.insert(1, 'ticker_num', file_number)
    me_list.append(me_df)
me_df = pd.concat(me_list, ignore_index=True)
me_df = me_df.loc[(me_df['date'] >= '1388') & (me_df['date'] <= '1397')]
me_df = me_df.sort_values(['ticker_num', 'date'], ascending=[True, False])
me_df.reset_index(drop=True, inplace=True)
me_df = pd.pivot_table(
    me_df,
    values='market_cap',
    index=['ticker_num'],
    columns=['date']
)
me_df.columns.name = None
me_df.rename(
    columns={
        '1388': '88',
        '1389': '89',
        '1390': '90',
        '1391': '91',
        '1392': '92',
        '1393': '93',
        '1394': '94',
        '1395': '95',
        '1396': '96',
        '1397': '97'
    },
    inplace=True
)
me_df.reset_index(inplace=True)
me_df = me_df[
    ['ticker_num', '97', '96', '95', '94', '93', '92', '91', '90', '89', '88']
]
me_df.to_excel('ME_final.xlsx', index=False)

## 2. Read ME and BM

In [None]:
# Read ME data
me_path = r'C:\Users\behnood\Desktop\Thesis\TSETMC\Factor Model\ME_final.xlsx'
me_df = pd.read_excel(me_path)

In [None]:
# Read modified BM data
bm_path = r'C:\Users\behnood\Desktop\Thesis\TSETMC\Factor Model\BM_final.xlsx'
bm_df = pd.read_excel(
    bm_path,
    usecols=[0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    names=[
        'ticker_num', 'ticker', '97',
        '96', '95', '94', '93', '92',
        '91', '90', '89', '88'
    ]
)

## 3. Calculate daily and monthly returns and market caps for SMB and HML Calculation

In [None]:
# Read Bourseview data for market cap
# Concat all 75 tickers' data
daily_bv_list = []
monthly_bv_list = []
for file_number in range(1, 76):
    bv_path = f'E:/Thesis/New Sampling/Daily Data - Bourseview/'\
        f'{file_number}.xlsx'
    bv_df = pd.read_excel(
        bv_path,
        skiprows=7,
        usecols=[2, 3, 11, 14],
        names=['date', 'open', 'market_cap', 'd_vol'],
        na_values='-'
    )
    bv_df.insert(1, 'ticker_num', file_number)
    # Change order from old to new dates
    bv_df = bv_df[::-1].reset_index(drop=True)
    bv_df['date'] = bv_df['date'].str.replace('-', '')
    # Delete non-traded days
    bv_df.dropna(subset=['open'], inplace=True)
    bv_df.drop(columns='open', inplace=True)
    daily_bv_list.append(bv_df)
    # Create monthly dataframe
    bv_df = bv_df.groupby(bv_df['date'].str[:6]).last()
    bv_df = bv_df.drop(columns=['date', 'd_vol']).reset_index()
    monthly_bv_list.append(bv_df)
# Create monthly df
bv_df = pd.concat(monthly_bv_list, ignore_index=True)
# Create daily df
daily_bv_df = pd.concat(daily_bv_list, ignore_index=True)
# Use 138812 for equation 3
bv_df = bv_df.loc[(bv_df['date'] >= '138812') & (bv_df['date'] <= '139900')]
bv_df.reset_index(drop=True, inplace=True)

In [None]:
# Read rahavard 365 data for calculating returns
# Concat all 75 tickers' data
daily_list = []
monthly_list = []
for file_number in range(1, 76):
    rahavard_path = f'E:/Thesis/New Sampling/Daily Data - Rahavard 365/'\
        f'{file_number}.txt'
    df = pd.read_csv(
        rahavard_path,
        usecols=[2, 7],
        names=['date', 'adj_close'],
        header=0,
        dtype={'date': str},
        parse_dates=[0]
    )
    # Solve index reading problem, pandas add 2 index to the df
    df.reset_index(drop=True, inplace=True)
    # Convert to shamsi dates
    df['date'] = df['date'].apply(
        lambda x: jdatetime.date.fromgregorian(date=x).strftime('%Y%m%d')
    )
    df.insert(1, 'ticker_num', file_number)
    df['i_return'] = df['adj_close'].pct_change()
    daily_list.append(df)
    # Create monthly dataframe
    monthly_df = df.groupby(df['date'].str[:6]).last()
    monthly_df = monthly_df.drop(columns=['date']).reset_index()
    monthly_df['monthly_return'] = monthly_df['adj_close'].pct_change()
    monthly_df.drop(columns=['i_return'], inplace=True)
    monthly_list.append(monthly_df)
# Create Monthly df for caculating fama-french factors
monthly_df = pd.concat(monthly_list, ignore_index=True)
monthly_df = monthly_df.loc[(
    monthly_df['date'] >= '138900') & (monthly_df['date'] <= '139900')
]
monthly_df.dropna(inplace=True)
monthly_df.reset_index(drop=True, inplace=True)
# Create daily df for calculating PS factor
df = pd.concat(daily_list, ignore_index=True)
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)

In [None]:
# Add d_vol to df from bourseview Since rahavard 365 doesn't have such a data
daily_df = pd.merge(df, daily_bv_df, on=['ticker_num', 'date'])
daily_df.drop(columns=['market_cap'], inplace=True)

In [None]:
# Merge market cap and price dfs
monthly_merged_df = pd.merge(monthly_df, bv_df, on=['ticker_num', 'date'])

In [None]:
# Extract all months from index df
index_path = r'E:\Thesis\New Sampling\TEDPIX\شاخص كل6.xls'
index_df = pd.read_excel(
    index_path,
    usecols=[1, 3],
    names=['date', 'close'],
    dtype={'date': str}
)
index_df.dropna(inplace=True)
index_df['m_return'] = index_df['close'].pct_change()
all_months = pd.Series(index_df['date'].str[:6].unique().tolist())
all_months.name = 'date'

In [None]:
# Create target years list
years_list = me_df.columns[2:].tolist()[::-1]

## 4. Calculating MKT, SMB, and HML

In [None]:
# Calculating SMB and HML
smb_list = []
hml_list = []
for year in years_list:
    # Select specific year
    # Eliminate negative BM tickers or NaN tickers for that year
    applicable_tickers = bm_df.loc[bm_df[year] >= 0]['ticker_num'].tolist()
    negative_be_condition = me_df['ticker_num'].isin(applicable_tickers)
    temp_me = me_df.loc[negative_be_condition][['ticker_num', 'ticker', year]]
    temp_bm = bm_df.loc[negative_be_condition][['ticker_num', 'ticker', year]]
    # Split each year ME into two groups
    conditions = [
        (temp_me[year] > temp_me[year].median()),
        (temp_me[year] <= temp_me[year].median())
    ]
    temp_bm['size'] = np.select(conditions, ['B', 'S']).tolist()
    # Split each ME group into three B/M groups
    q = [0, .3, .7, 1]
    labels = ['L', 'M', 'H']
    x_b = temp_bm.loc[temp_bm['size'] == 'B'][year]
    b_bm = pd.qcut(x=x_b, q=q, labels=labels).to_dict()
    x_s = temp_bm.loc[temp_bm['size'] == 'S'][year]
    s_bm = pd.qcut(x=x_s, q=q, labels=labels).to_dict()
    temp_bm['bm'] = pd.Series(b_bm)
    temp_bm['bm'].update(pd.Series(s_bm))
    # Extrect six portfolio ticker numbers
    temp_bm['res'] = temp_bm['size'] + temp_bm['bm']
    bh = temp_bm.loc[temp_bm['res'] == 'BH']['ticker_num'].tolist()
    bm = temp_bm.loc[temp_bm['res'] == 'BM']['ticker_num'].tolist()
    bl = temp_bm.loc[temp_bm['res'] == 'BL']['ticker_num'].tolist()
    sh = temp_bm.loc[temp_bm['res'] == 'SH']['ticker_num'].tolist()
    sm = temp_bm.loc[temp_bm['res'] == 'SM']['ticker_num'].tolist()
    sl = temp_bm.loc[temp_bm['res'] == 'SL']['ticker_num'].tolist()
    next_year = str(1 + int(year))
    next_year_months = all_months[all_months.str[2:4] == next_year]
    for month in next_year_months:
        # Set conditions
        month_condition = (monthly_merged_df['date'] == month)
        bh_condition = monthly_merged_df['ticker_num'].isin(bh)
        bm_condition = monthly_merged_df['ticker_num'].isin(bm)
        bl_condition = monthly_merged_df['ticker_num'].isin(bl)
        sh_condition = monthly_merged_df['ticker_num'].isin(sh)
        sm_condition = monthly_merged_df['ticker_num'].isin(sm)
        sl_condition = monthly_merged_df['ticker_num'].isin(sl)
        # Construct portfolios
        bh_portfolio = monthly_merged_df.loc[month_condition & bh_condition]
        bm_portfolio = monthly_merged_df.loc[month_condition & bm_condition]
        bl_portfolio = monthly_merged_df.loc[month_condition & bl_condition]
        sh_portfolio = monthly_merged_df.loc[month_condition & sh_condition]
        sm_portfolio = monthly_merged_df.loc[month_condition & sm_condition]
        sl_portfolio = monthly_merged_df.loc[month_condition & sl_condition]
        # Calculate value-weighted returns
        bh_return = np.average(
            bh_portfolio.monthly_return,
            weights=bh_portfolio.market_cap
        )
        bm_return = np.average(
            bm_portfolio.monthly_return,
            weights=bm_portfolio.market_cap
        )
        bl_return = np.average(
            bl_portfolio.monthly_return,
            weights=bl_portfolio.market_cap
        )
        sh_return = np.average(
            sh_portfolio.monthly_return,
            weights=sh_portfolio.market_cap
        )
        sm_return = np.average(
            sm_portfolio.monthly_return,
            weights=sm_portfolio.market_cap
        )
        sl_return = np.average(
            sl_portfolio.monthly_return,
            weights=sl_portfolio.market_cap
        )
        # Calculate SMB and HML. Then, add them to lists
        smb = (
            ((sh_return + sm_return + sl_return) / 3)
            - ((bh_return + bm_return + bl_return) / 3)
        )
        smb_list.append(smb)
        hml = (
            ((sh_return + bh_return) / 2)
            - ((sl_return + bl_return) / 2)
        )
        hml_list.append(hml)

In [None]:
# Create df from calculated SMB and HML, and write it to excel
ff_df = pd.DataFrame([smb_list, hml_list]).transpose()
ff_df.to_excel('smb_hml_for_ps.xlsx', index=False)

In [None]:
# Calculating MKT
# Calculate index monthly return
index_path = r'E:\Thesis\New Sampling\TEDPIX\شاخص كل6.xls'
index_df = pd.read_excel(
    index_path,
    usecols=[1, 3],
    names=['date', 'close'],
    dtype={'date': str}
)
index_df.dropna(inplace=True)
labels = index_df['date'].str[:6]
index_monthly_df = index_df.groupby(labels)[['close']].last().reset_index()
index_monthly_df['m_return'] = index_monthly_df['close'].pct_change()
# Calculate monthly risk free rate
rf_path = r'C:\Users\behnood\Desktop\Thesis\TSETMC'\
    r'\Risk Free Rate\monthly_rf_ps.xlsx'
rf_df = pd.read_excel(
    rf_path,
    usecols=[0, 2],
    names=['date', 'rf_return'],
    dtype={'date': str}
)
# Calculate MKT and write to excel
mkt_df = pd.merge(index_monthly_df, rf_df, on='date')
mkt_df['mkt'] = mkt_df['m_return'] - mkt_df['rf_return']
mkt_df.to_excel('mkt_for_PS.xlsx', index=False)

# Calculating LIQ

---

## Equation 1: Liquidity Calculation

<center>
    <img src="https://docs.google.com/uc?export=download&id=1DsiUSR-yoJ3TLLuisiKnU5dZRnYtlQmY" width="500" height="50"/>
</center>

In [None]:
merged_df = pd.merge(daily_df, index_df, on='date').dropna().reset_index(drop=True)
merged_df.sort_values(['ticker_num', 'date'], inplace=True)

In [None]:
liq_list = []
for file_number in range(1, 76):
    print(file_number)
    ticker_condition = (merged_df['ticker_num'] == file_number)
    months = pd.Series(merged_df[ticker_condition]['date'].str[:6].unique())
    months = months[(months >= '138900') & (months <= '139800')].tolist()
    for month in months:
        month_condition = merged_df['date'].str.startswith(month)
        liq_reg_df = merged_df.loc[ticker_condition & month_condition]
        counted_days = liq_reg_df.loc[month_condition]['date'].count()
        # Check for 15 days condition
        if counted_days >= 15:
            liq_reg_df['r_diff'] = (
                liq_reg_df['i_return']
                - liq_reg_df['m_return']
            )
            liq_reg_df['r_diff (d+1)'] = liq_reg_df['r_diff'].shift(-1)
            liq_reg_df['sign(r_diff).vol'] = (
                liq_reg_df['d_vol']
                * np.sign(liq_reg_df['r_diff'])
            )
            liq_reg_df.dropna(inplace=True)
            # Estimate regression
            y = liq_reg_df['r_diff (d+1)']
            x = liq_reg_df[['i_return', 'sign(r_diff).vol']]
            model = LinearRegression()
            model.fit(x, y)
            liq = model.coef_[1]
            liq_list.append([month, file_number, liq])

In [None]:
liq_df = pd.DataFrame(liq_list, columns=['date', 'ticker_num', 'liq'])

## Equation 2: Aggregate Liquidity Calculation

<center>
    <img src="https://docs.google.com/uc?export=download&id=1vQWTanYX_Ezn520I9TlLB7AsLZ6GlCf-" width="300" height="100"/>
</center>

## Equation 3: Innovations to Aggregate Liquidity Calculation

<center>
    <img src="https://docs.google.com/uc?export=download&id=1iFL-bC2e1G3lKjt2Q2ehOLyhqjk6PtGm" width="300" height="100">
</center>

<center>
    <img src="https://docs.google.com/uc?export=download&id=1m3irjSVqV0XgNw5R1t_aQbfyvoSkHdm7" width="300" height="100"/>
</center>

In [None]:
months = liq_df['date'].unique().tolist()
months.sort()

In [None]:
eqq_3_list = []
for idx, month in enumerate(months):
    all_months_list = all_months.tolist()
    previous_month = all_months_list[all_months_list.index(month) - 1]
    if idx == 0:
        # M is the market capitalization of eligible stocks
        # at the end of month t −1
        # Calculate first M
        cur_month_condition = (liq_df['date'] == month)
        current_elig = liq_df.loc[cur_month_condition]['ticker_num'].tolist()
        month_condition = (bv_df['date'] == previous_month)
        ticker_condition = bv_df['ticker_num'].isin(current_elig)
        first_m = bv_df.loc[
            month_condition & ticker_condition
        ]['market_cap'].sum()
    if idx >= 1:
        # A list of eligible tickers in current month
        cur_month_condition = (liq_df['date'] == month)
        current_elig = liq_df.loc[cur_month_condition]['ticker_num'].tolist()
        # A list of eligible tickers in previous month
        prev_month_condition = (liq_df['date'] == previous_month)
        previous_elig = liq_df.loc[prev_month_condition]['ticker_num'].tolist()
        # Calculate M
        month_condition = (bv_df['date'] == previous_month)
        ticker_condition = bv_df['ticker_num'].isin(previous_elig)
        m = bv_df.loc[month_condition & ticker_condition]['market_cap'].sum()
        # Calculate N
        # The number of tickers that are eligible in both
        # current and previous months
        n = len(list(set(current_elig).intersection(previous_elig)))
        # A list of tickers that are eligible in both
        # current and previous months
        both_elig = list(set(current_elig).intersection(previous_elig))
        both_elig_condition = liq_df['ticker_num'].isin(both_elig)
        cur_elig_condition = liq_df['ticker_num'].isin(current_elig)
        prev_elig_condition = liq_df['ticker_num'].isin(previous_elig)
        # Calculate agg lig for month t
        prev_agg_liq = liq_df.loc[
            cur_elig_condition & prev_month_condition
        ]['liq'].sum()
        # Calculate delta agg liq (innov)
        scaled_cur_agg_liq = liq_df.loc[
            both_elig_condition & cur_month_condition
        ]['liq'].sum()
        scaled_prev_agg_liq = liq_df.loc[
            both_elig_condition & prev_month_condition
        ]['liq'].sum()
        innov =(
            (m / first_m)
            * (1 / n)
            * (scaled_cur_agg_liq - scaled_prev_agg_liq)
        )
        eqq_3_list.append([month, m, n, prev_agg_liq, innov])

In [None]:
inv_df = pd.DataFrame(
    eqq_3_list,
    columns=['date', 'm', 'n', 'prev_agg_liq', 'innov']
)
inv_df['m (t-1)'] = inv_df['m'].shift()
inv_df['innov (t-1)'] = inv_df['innov'].shift()
inv_df.dropna(inplace=True)
inv_df.reset_index(drop=True, inplace=True)

In [None]:
ff_path = r'C:\Users\behnood\Desktop\Thesis\TSETMC\Factor Model'\
r'\Pastor Stambaugh\FF_for_PS.xlsx'
ff_df = pd.read_excel(ff_path, dtype={'date': str})

In [None]:
# Calculate tickers excess return
new_monthly_df = pd.merge(monthly_df, rf_df, on='date')
new_monthly_df['excess_return'] = (
    new_monthly_df['monthly_return'] - new_monthly_df['rf_return']
)

In [None]:
all_years = [
    '1389', '1390', '1391', '1392', '1393',
    '1394', '1395', '1396', '1397', '1398'
]
target_years = ['1394', '1395', '1396', '1397', '1398']
liq_betas = []
for year in target_years:
    five_years_ago = all_years[:all_years.index(year)][-5]
    end_point = f'{year}00'
    start_point = f'{five_years_ago}00'
    selected_period = inv_df.loc[
        (inv_df['date'] >= start_point)
        & (inv_df['date'] <= end_point)
    ]
    selected_period['(mt-1/mt).innov(t-1)'] = (
        selected_period['m (t-1)']
        / selected_period['m']
        * selected_period['innov (t-1)']
    )
    y = selected_period['innov']
    x = selected_period[['innov (t-1)', '(mt-1/mt).innov(t-1)']]
    model = LinearRegression()
    model.fit(x, y)
    prediction = model.predict(x)
    residuals = (y - prediction)
    gammas = residuals/100
    selected_ff = ff_df.loc[
        (ff_df['date'] >= start_point)
        & (ff_df['date'] <= end_point)
    ]
    selected_ff.insert(1, 'L', gammas)
    full_five_years = selected_ff.shape[0]
    for ticker_number in range(1, 76):
        excess_returns = new_monthly_df.loc[
            (new_monthly_df['ticker_num'] == ticker_number)
            & (new_monthly_df['date'] >= selected_ff['date'].tolist()[0])
            & (new_monthly_df['date'] <= selected_ff['date'].tolist()[-1])
        ]['excess_return'].tolist()
        ticker_months_count = len(excess_returns)
        if ticker_months_count == full_five_years:
            temp_selected_ff = selected_ff.copy()
            temp_selected_ff.insert(1, 'excess_return', excess_returns)
            y = temp_selected_ff['excess_return']
            x = temp_selected_ff[['L', 'mkt', 'smb', 'hml']]
            model = LinearRegression()
            model.fit(x, y)
            ticker_liq_beta = model.coef_[0]
            liq_betas.append([year, ticker_number, ticker_liq_beta])

In [None]:
beta_df = pd.DataFrame(liq_betas, columns=['year', 'ticker_num', 'beta'])

## Equation 4: Tradable Liquidity Risk Factor

<center>
    <img src="https://docs.google.com/uc?export=download&id=1dPPrNJnL9D_VMAWyi2c1P0lOwvyuUZWO" width="500" height="50"/>
</center>

In [None]:
final_list = []
for year in beta_df['year'].unique().tolist():
    selected_year = beta_df.loc[beta_df['year'] == year]
    beta_deciles = pd.qcut(
        selected_year['beta'],
        q=10,
        labels=list(range(1, 11)[::-1])
    ).to_dict()
    selected_year['decile'] = pd.Series(beta_deciles)
    decile_1 = selected_year.loc[
        selected_year['decile'] == 1
    ]['ticker_num'].tolist()
    decile_10 = selected_year.loc[
        selected_year['decile'] == 10
    ]['ticker_num'].tolist()
    twelve_months = index_monthly_df.loc[
        index_monthly_df['date'].str.startswith(year)
    ]['date'].tolist()
    for month in twelve_months:
        decile_1_condition = monthly_merged_df['ticker_num'].isin(decile_1)
        decile_10_condition = monthly_merged_df['ticker_num'].isin(decile_10)
        month_condition = (monthly_merged_df['date'] == month)
        decile_1_portfolio = monthly_merged_df.loc[
            decile_1_condition & month_condition
        ]
        decile_10_portfolio = monthly_merged_df.loc[
            decile_10_condition & month_condition
        ]
        # Calculate value-weighted returns
        decile_1_return = np.average(
            decile_1_portfolio.monthly_return,
            weights=decile_1_portfolio.market_cap
        )
        decile_10_return = np.average(
            decile_10_portfolio.monthly_return,
            weights=decile_10_portfolio.market_cap
        )
        liq = decile_10_return - decile_1_return
        final_list.append(liq)

In [None]:
# Write calculate liq factor to excel
ps_df = pd.DataFrame(final_list, columns=['LIQ'])
ps_df.to_excel('PS_factor_2.xlsx', index=False)