In [7]:
import pandas as pd
import numpy as np
import sqlalchemy
from urllib.parse import urlencode
import requests
from requests import Response
import datetime as dt
from datetime import time, date, timedelta
from dateutil.relativedelta import relativedelta
import itertools
from math import ceil
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [8]:
list_stock = ['AAA','AMD','ANV','APG','ASM','AST','BCG','BID','BMP','BWE','CCL','CII','CMG','CRE','CSV','CTD','CTF','CTG','CTI','CVT','D2D','DAG','DBC','DBD','DCL','DCM','DGW','DHC','DHG','DIG','DLG','DPG','DPM','DRC','DRH','DXG','EIB','FCN','FIR','FIT','FLC','FPT','FRT','FTS','GAS','GEG','GEX','GIL','GMD','HAG','HAI','HAX','HBC','HCM','HDB','HDC','HDG','HHS','HII','HNG','HPG','HPX','HQC','HSG','HT1','HVH','HVN','IBC','IDI','IJC','IMP','ITA','JVC','KBC','KDH','KOS','KSB','LCG','LDG','LHG','MBB','MSH','MSN','MWG','NCT','NKG','NLG','NT2','NTL','NVL','OGC','PC1','PDR','PHC','PHR','PLX','PME','PNJ','POW','PPC','PTB','PVD','PVT','REE','ROS','SAB','SAM','SBT','SCR','SHI','SKG','SMB','SSI','STB','STK','SZC','SZL','TCB','TCH','TCM','TDH','TDM','TLG','TNA','TNI','TPB','TTB','TVB','VCB','VCI','VGC','VHC','VHM','VIC','VJC','VND','VNG','VNM','VPB','VPG','VPI','VRE','VSC'
]

In [9]:
base_url = 'http://172.31.240.7:3000/api/v1'
login_url = base_url + '/login'
query_url = base_url + '/queryFinancialInfo'


def get_rest(url, body, headers=None, print_log=False):
    if print_log:
        print("GET REQUEST - url={}, parameters={}".format(url, body))

    response: Response = requests.get(url, params=body, headers=headers)
    if response.status_code != 200:
        print(response.text)
        content = None
    else:
        try:
            content = response.json()
        except ValueError:
            print(response.text)
            content = None

    if print_log:
        print("{} - GET RESPONSE - url={}, data={}".format(response.status_code, url, content))

    return content


def post_rest(url, body, headers, print_log=False, encode_require=False):
    if print_log:
        print("POST REQUEST - url={}, body={}".format(url, body))
    if encode_require:
        response: Response = requests.post(url, data=urlencode(body), headers=headers)
    else:
        response: Response = requests.post(url, json=body, headers=headers)
    if response.status_code != 200:
        print(response.text)
        content = None
    else:
        try:
            content = response.json()
        except ValueError:
            print(response.text)
            content = None
    if print_log:
        print("{} - POST RESPONSE - url={}, data={}".format(response.status_code, url, content))

    return content


# Login to get acccess token
access_token = post_rest(url=login_url, body={
    "grant_type": "password_tradex",
    "client_id": "tradex-admin",
    "client_secret": "tradex-admin",
    "username": "vinh.do@techx.vn",
    "password": "123456",
}, headers={
    "Content-Type": "application/x-www-form-urlencoded"
}, encode_require=True).get('accessToken')


def query_financial_data(list_stock=[], year='', quarter='', type=''): # year and quarter is string
    query_content = {"code": list_stock,
                     "year": year,
                     "quarter": quarter,
                     "type": type
                     }
    data = get_rest(query_url, body=query_content, headers={
        'Authorization': 'jwt {}'.format(access_token),
    })
    data = pd.DataFrame(data)
    try:
        data = pd.melt(data, id_vars = ['code', 'quarter', 'year'], value_vars=[type])
        return data
    except:
        return data
def fin_data(factor, peers):
    # these keywords are for type of financial data we need
    query_keyword = factor # query_keyword = ['net profit after tax']
    result1=pd.DataFrame() 
    for m in query_keyword:
        print(m)
        result = pd.DataFrame()
        for j in range (2018, 2022):
            for i in range(1,5):
                example = query_financial_data(peers, year = str(j), quarter = str(i),
                               type=m)
                result = pd.concat([result,example])
        result = pd.DataFrame(result)
        print('Done: Keyword')
        result = result.rename(columns={'value':m})
        result = result.drop(columns='variable')
        result['id'] = result['code']+result['year'] + result['quarter']
        if result1.empty:
            result1  = result
        else:
            result1 = pd.merge(result1, result, on = ['id','quarter','year','code'], how = 'left')
        print('Financial Data: Done')
    result1 = result1.sort_values(by=['code','year','quarter'])
    result1 = result1.reset_index(drop=True)
    return result1

In [10]:
def choose_stock(date, backtest):
    user = 'admin'
    pwd = 'mB17VfhA9gBaWXFaaYSFda2La4ULD12DaZTapt'
    host = 'vinance-prod.coo1pelwmlwz.ap-southeast-1.rds.amazonaws.com'
    port = '3306'
    db = 'vinance'
    db_engine = sqlalchemy.engine.create_engine(
        'mysql://{0}:{1}@{2}:{3}/{4}'.format(user, pwd, host, port, db))
    cursor = db_engine.connect()
    query_string = "SELECT id, code, date, close, oi FROM price where code in {} and date > '2018-10-31' "
    price = pd.read_sql_query(query_string.format(tuple(list_stock)), cursor)
    price = price.sort_values(by=['code', 'date'])
    print('Price Query: Done')
    cursor.close()

    price.date=price.date.apply(lambda x: str(x))
    price=price.loc[price['date']!='2018-01-24']
    price=price.loc[price['date']!='2018-01-23']
    price=price.fillna(method='ffill')
    price_fil = price.dropna()
    price_fil['date'] = pd.to_datetime(price_fil['date'])
    price_fil['query'] = (price_fil['date'].dt.date - relativedelta(months=4)).apply(lambda x: str(x.year) + str(ceil(x.month/3))) 
    price_fil['id'] = price_fil['code'] + price_fil['query']
    price_fil['month']=price_fil['date'].dt.month
    price_fil.loc[(price_fil['code']==price_fil['code'].shift(1))&(price_fil['month']!=price_fil['month'].shift(1)),'month_fil'] = 1
    price_fil.loc[price_fil['date']==date,'month_fil'] = 1 #change date to yesterday
    price_fil = price_fil.dropna()
    price_fil = price_fil.drop(columns = ['month','month_fil'])
    
    data1 = fin_data(['net profit after tax'], list_stock)
    data1.loc[data1['code']==data1['code'].shift(3), 'npat_trailing'] = data1['net profit after tax'].rolling(4).sum()
    data = pd.merge(price_fil, data1, on=['id','code'], how='left')
    data.loc[data['code'] == data['code'].shift(1),'growth'] = (data['npat_trailing']- data['npat_trailing'].shift(3))/abs(data['npat_trailing'].shift(3))
    data.loc[data['code'] == data['code'].shift(-1),'return'] = data['close'].shift(-1)/data['close'] - 1

    df_choose = data.copy(deep = True)
    df_choose.loc[(df_choose['growth'] - df_choose['growth'].shift(3) > 0.2) & (df_choose['code'] == df_choose['code'].shift(3)) ,'choose' ] = 1
    df_choose = df_choose[df_choose['choose']==1]
    df_choose['pe'] = (df_choose['npat_trailing']/df_choose['oi'])/df_choose['close']
    df_choose['pe_rank'] = df_choose.groupby(['date'])['pe'].rank(ascending=False, na_option='bottom')
    df_choose.loc[df_choose['pe_rank']<9, 'choose_pe'] = 1
    choose =df_choose[(df_choose['choose_pe']==1)]
    if backtest:
        choose = pd.pivot_table(choose, values = 'return', index = ['date'], columns = 'code')
        choose = choose.reset_index()
        day_return = choose.copy(deep=True)
        day_return['count'] = day_return.iloc[:,1:len(choose.columns.to_list())].count(axis = 1) #exclude date column
        day_return.loc[day_return['count']<5,'profit'] = day_return.iloc[:,1:len(choose.columns.to_list())].mean(axis = 1,skipna=True)*(day_return['count']/5)
        day_return.loc[day_return['count']==0,'profit'] = 0
        day_return.loc[day_return['count']>=5,'profit'] = day_return.iloc[:,1:len(choose.columns.to_list())].mean(axis = 1,skipna=True)
        day_return['cumulative'] = (day_return['profit'] +1).cumprod()
        profit = day_return['cumulative'][:len(day_return['cumulative'])-1].tolist()
        # ind = (choose.set_index('date').mean(axis = 1)+1).cumprod()
        import plotly.graph_objects as go
        fig = go.Figure()

        fig.add_trace(go.Scatter(x=choose['date'].tolist(), y=profit,
                                         mode='lines',
                                         name='cumulative'))
        # fig.add_trace(go.Scatter(x=choose['date'].tolist(), y=ind.tolist(),
        #                                  mode='lines',
        # #                                  name='benchmark'))
        mean_trade = day_return['count'][0:len(day_return)-1].mean()
        print("Fee included, ", "holding stocks/period: ", mean_trade,', cumulative profit: ', profit[-1])
        fig.show()
    else:
        choose = pd.pivot_table(choose, values = 'choose_pe', index = ['date'], columns = 'code')
    return choose

In [11]:
choose = choose_stock('2021-08-02', backtest=False)
print('======== Buy these stocks =========')
choose.iloc[-1,:].dropna()

Price Query: Done
net profit after tax
Done: Keyword
Financial Data: Done


code
DPM    1.0
LHG    1.0
NKG    1.0
PHC    1.0
SCR    1.0
TDH    1.0
TNA    1.0
VHM    1.0
Name: 2021-08-02 00:00:00, dtype: float64

In [12]:
choose = choose.reset_index()
day_return = choose.copy(deep=True)
day_return['count'] = day_return.iloc[:,1:len(choose.columns.to_list())].count(axis = 1) #exclude date column
day_return.loc[day_return['count']<5,'profit'] = day_return.iloc[:,1:len(choose.columns.to_list())].mean(axis = 1,skipna=True)*(day_return['count']/5)
day_return.loc[day_return['count']==0,'profit'] = 0
day_return.loc[day_return['count']>=5,'profit'] = day_return.iloc[:,1:len(choose.columns.to_list())].mean(axis = 1,skipna=True)
day_return['cumulative'] = (day_return['profit'] +1).cumprod()
profit = day_return['cumulative'][:len(day_return['cumulative'])-1].tolist()
# ind = (choose.set_index('date').mean(axis = 1)+1).cumprod()
import plotly.graph_objects as go
fig = go.Figure()

fig.add_trace(go.Scatter(x=choose['date'].tolist(), y=profit,
                                 mode='lines',
                                 name='cumulative'))
# fig.add_trace(go.Scatter(x=choose['date'].tolist(), y=ind.tolist(),
#                                  mode='lines',
# #                                  name='benchmark'))
mean_trade = day_return['count'][0:len(day_return)-1].mean()
print("Fee included, ", "holding stocks/period: ", mean_trade,', cumulative profit: ', profit[-1])
fig.show()        

Fee included,  holding stocks/period:  7.481481481481482 , cumulative profit:  48318382.08
