Universe: 142 most liquid HOSE stocks

Monthly Rebalancing

Input Data: 
* Financial Data: 'total equity','net profit after tax', 'total liabilities'
* Price Data: close, outstanding shares

Step 1: Calculate 3 factors: risk reward, leverage, book value per share
* Risk reward = 6-month return/price standard deviation
* Leverage = Total liabilities/Total equity
* Book value per price = (Total equity/Outstanding shares)/price

Step 2: Standardize these 3 factors => Calculate z-score of these 3 factors
* z-score = (X-mean(X))/standard deviation(X)

Step 3: Calculate average z-score of these 3 factors

Step 4: Rank average z-score of these 3 factors, group by date

Step 5: Choose top 8 highest z-score



In [None]:
import pandas as pd
import numpy as np
import sqlalchemy
from urllib.parse import urlencode
import requests
from requests import Response
import datetime as dt
from datetime import time, date, timedelta
from dateutil.relativedelta import relativedelta
import itertools
from math import ceil
import scipy.stats as stats

## Universe: 142 most liquid HOSE stocks

In [None]:
list_stock = ['AAA','AMD','ANV','APG','ASM','AST','BCG','BID','BMP','BWE','CCL','CII','CMG','CRE','CSV','CTD','CTF','CTG','CTI','CVT','D2D','DAG','DBC','DBD','DCL','DCM','DGW','DHC','DHG','DIG','DLG','DPG','DPM','DRC','DRH','DXG','EIB','FCN','FIR','FIT','FLC','FPT','FRT','FTS','GAS','GEG','GEX','GIL','GMD','HAG','HAI','HAX','HBC','HCM','HDB','HDC','HDG','HHS','HII','HNG','HPG','HPX','HQC','HSG','HT1','HVH','HVN','IBC','IDI','IJC','IMP','ITA','JVC','KBC','KDH','KOS','KSB','LCG','LDG','LHG','MBB','MSH','MSN','MWG','NCT','NKG','NLG','NT2','NTL','NVL','OGC','PC1','PDR','PHC','PHR','PLX','PME','PNJ','POW','PPC','PTB','PVD','PVT','REE','ROS','SAB','SAM','SBT','SCR','SHI','SKG','SMB','SSI','STB','STK','SZC','SZL','TCB','TCH','TCM','TDH','TDM','TLG','TNA','TNI','TPB','TTB','TVB','VCB','VCI','VGC','VHC','VHM','VIC','VJC','VND','VNG','VNM','VPB','VPG','VPI','VRE','VSC'
]

## Query financial data: 'total equity','net profit after tax', 'total liabilities'

In [None]:
base_url = 'http://172.31.240.7:3000/api/v1'
login_url = base_url + '/login'
query_url = base_url + '/queryFinancialInfo'


def get_rest(url, body, headers=None, print_log=False):
    if print_log:
        print("GET REQUEST - url={}, parameters={}".format(url, body))

    response: Response = requests.get(url, params=body, headers=headers)
    if response.status_code != 200:
        print(response.text)
        content = None
    else:
        try:
            content = response.json()
        except ValueError:
            print(response.text)
            content = None

    if print_log:
        print("{} - GET RESPONSE - url={}, data={}".format(response.status_code, url, content))

    return content


def post_rest(url, body, headers, print_log=False, encode_require=False):
    if print_log:
        print("POST REQUEST - url={}, body={}".format(url, body))
    if encode_require:
        response: Response = requests.post(url, data=urlencode(body), headers=headers)
    else:
        response: Response = requests.post(url, json=body, headers=headers)
    if response.status_code != 200:
        print(response.text)
        content = None
    else:
        try:
            content = response.json()
        except ValueError:
            print(response.text)
            content = None
    if print_log:
        print("{} - POST RESPONSE - url={}, data={}".format(response.status_code, url, content))

    return content


# Login to get acccess token
access_token = post_rest(url=login_url, body={
    "grant_type": "password_tradex",
    "client_id": "tradex-admin",
    "client_secret": "tradex-admin",
    "username": "vinh.do@techx.vn",
    "password": "123456",
}, headers={
    "Content-Type": "application/x-www-form-urlencoded"
}, encode_require=True).get('accessToken')


def query_financial_data(list_stock=[], year='', quarter='', type=''): # year and quarter is string
    query_content = {"code": list_stock,
                     "year": year,
                     "quarter": quarter,
                     "type": type
                     }
    data = get_rest(query_url, body=query_content, headers={
        'Authorization': 'jwt {}'.format(access_token),
    })
    data = pd.DataFrame(data)
    try:
        data = pd.melt(data, id_vars = ['code', 'quarter', 'year'], value_vars=[type])
        return data
    except:
        return data
def fin_data(factor, peers):
    # these keywords are for type of financial data we need
    query_keyword = factor # query_keyword = ['net profit after tax']
    result1=pd.DataFrame() 
    list_stock = peers # list_stock = ['TCM','TNG','MSH','VGT','GIL','VGG']
    for m in query_keyword:
        print(m)
        result = pd.DataFrame()
        for j in range (2019, 2022):
            for i in range(1,5):
                example = query_financial_data(peers, year = str(j), quarter = str(i),
                               type=m)
                result = pd.concat([result,example])
        result = pd.DataFrame(result)
        print('Done: Keyword')
        result = result.rename(columns={'value':m})
        result = result.drop(columns='variable')
        result['id'] = result['code']+result['year'] + result['quarter']
        if result1.empty:
            result1  = result
        else:
            result1 = pd.merge(result1, result, on = ['id','quarter','year','code'], how = 'left')
        print('Financial Data: Done')
    return result1

In [None]:
data = fin_data(['total equity','net profit after tax', 'total liabilities'], list_stock)

## Query price data: Close, Outstanding shares, Date, Month

In [None]:
user = 'admin'
pwd = 'mB17VfhA9gBaWXFaaYSFda2La4ULD12DaZTapt'
host = 'vinance-prod.coo1pelwmlwz.ap-southeast-1.rds.amazonaws.com'
port = '3306'
db = 'vinance'
db_engine = sqlalchemy.engine.create_engine(
    'mysql://{0}:{1}@{2}:{3}/{4}'.format(user, pwd, host, port, db))
cursor = db_engine.connect()
query_string = "SELECT id, code, date, close, oi FROM price where code in {} and date > '2018-10-31' "
price = pd.read_sql_query(query_string.format(tuple(list_stock)), cursor)
price = price.sort_values(by=['code', 'date'])
cursor.close()

price.date=price.date.apply(lambda x: str(x))
price=price.loc[price['date']!='2018-01-24']
price=price.loc[price['date']!='2018-01-23']
price=price.fillna(method='ffill')
price['date'] = pd.to_datetime(price['date'])
price['month'] =  price['date'].dt.month

## Calculate 3 factors: risk reward, leverage, book value per share

In [None]:
pri_std = price.copy()

#Calculate standard deviation
pri_std.loc[(pri_std['code']==pri_std['code'].shift(131))&(pri_std['month']!=pri_std['month'].shift(1)),'std'] = pri_std['close'].rolling(131).std()
pri_std.loc[pri_std['date']=='2021-07-30','std'] = pri_std['close'].rolling(131).std()
pri_std = pri_std.dropna()

#Calculate 6 month return
pri_std.loc[pri_std['code']==pri_std['code'].shift(6),'pri_mmt'] = pri_std['close']/pri_std['close'].shift(6)-1
pri_std['reward']= pri_std['pri_mmt']/pri_std['std']

#Lagged financial data
pri_std['query'] = (pri_std['date'].dt.date - relativedelta(months=5)).apply(lambda x: str(x.year) + str(ceil(x.month/3))) 
pri_std['id'] = pri_std['code'] + pri_std['query']
pri_std['date']=pri_std['date'].apply(lambda x:x.strftime("%Y-%m-%d"))
pri_std['index'] = pri_std['date'] + '_zscore'
data3 = pd.merge(pri_std, data, on = ['id','code'], how = 'left')

#Calculate leverage, book value per share to price
data3['lever'] = data3['total liabilities']/data3['total equity']
data3['bvpsp']=(data3['total equity']/data3['oi'])/(data3['close']*1000)
data3

##  Standardize these 3 factors => Calculate z-score of these 3 factors

In [None]:
factors = ['lever','bvpsp','reward']
tt_zscore = pd.DataFrame()
for factor in factors:
    df = pd.pivot(data3, columns = 'date',index = 'code',values =factor)
    cols = list(df.columns)
    zscore = pd.DataFrame(index=df.index)
    for col in cols:
        col_zscore = col + '_zscore'
        zscore[col_zscore] = (df[col] - df[col].mean())/df[col].std(ddof=0)
    zscore=zscore.transpose().reset_index()
    zscore = pd.melt(zscore, id_vars = ['index'], value_vars=zscore.columns.tolist()[1:],value_name=factor)
    if tt_zscore.empty:
        tt_zscore = zscore
    else:
        tt_zscore = pd.merge(tt_zscore,zscore, on =['index','code'], how = 'left')

## Exclude Financial Institution, Rank average z-score of these 3 factors, group by date

In [None]:
tt_zscore['avg'] = tt_zscore[['lever', 'bvpsp', 'reward']].mean(axis=1,skipna = False)
data4 = tt_zscore.copy()
fin_stock = ['APG','BID','CTG','EIB','FTS','HCM','HDB','MBB','SSI','STB','TCB','TPB','VCB','VCI','VPB']  
data4.loc[data4['code'].apply(lambda x: x in set(fin_stock)),'avg'] = np.nan
data4['robvre_rank'] = data4.groupby(['index'])['avg'].rank(ascending=False, na_option='bottom')
data4['robvre_max'] = data4.groupby(['index'])['robvre_rank'].transform(max)

## Choose stocks

In [None]:
data4.loc[data4['robvre_rank'] < 8,'choose'] = 1
data4 = pd.merge(data4, pri_std, on = ['index','code'], how = 'left')
data4.loc[data4['choose']==1,'return'] = data4['close'].shift(-1)/data4['close']-1
choose = data4[(data4['choose']==1)]
choose = pd.pivot_table(choose, values = 'return', index = ['date'], columns = 'code')
print('BUY THESE STOCKS!!!!!!')
choose.iloc[-1,:].dropna()