In [1]:
import pandas as pd
from datetime import datetime
import numpy as np
from urllib.parse import urlencode
import requests
from requests import Response
import plotly.graph_objects as go
import sqlalchemy

import datetime as dt

def get_rest(url, body={}, headers=None, print_log=True):
    if print_log:
        print("GET REQUEST - url={}, parameters={}".format(url, body))

    response: Response = requests.get(url, params=body, headers=headers)
    if response.status_code != 200:
        print(response.text)
        content = None
    else:
        try:
            content = response.json()
        except ValueError:
            print(response.text)
            content = None

    if print_log:
        print("{} - GET RESPONSE - url={}, data={}".format(response.status_code, url, content))

    return content

def post_rest(url, body, headers, print_log=True, encode_require=False):
    if print_log:
        print("POST REQUEST - url={}, body={}".format(url, body))
    if encode_require:
        response: Response = requests.post(url, data=urlencode(body), headers=headers)
    else:
        response: Response = requests.post(url, json=body, headers=headers)
    if response.status_code != 200:
        print(response.text)
        content = None
    else:
        try:
            content = response.json()
        except ValueError:
            print(response.text)
            content = None
    if print_log:
        print("{} - POST RESPONSE - url={}, data={}".format(response.status_code, url, content))

    return content

def get_stock_price(stock_arr: list, to_date: str, from_date='2010-01-01', pivot_type=True, price=True):
    user_v = 'admin'
    pwd_v = 'mB17VfhA9gBaWXFaaYSFda2La4ULD12DaZTapt'
    host_v = 'vinance-prod.coo1pelwmlwz.ap-southeast-1.rds.amazonaws.com'

    port_v = '3306'
    db_v = 'vinance'
    db_engine_v = sqlalchemy.engine.create_engine(
        'mysql://{0}:{1}@{2}:{3}/{4}?charset=utf8mb4'.format(user_v, pwd_v, host_v, port_v, db_v))
    cursor_v = db_engine_v.connect()
    query_price = 'SELECT date,code, close FROM price where code in {} and date >= \'{}\' and date<= \'{}\''.format(
        tuple(stock_arr), from_date, to_date)
    price_stock = pd.read_sql(query_price, con=cursor_v)
    price_stock.date = price_stock.date.apply(lambda x: str(x))
    price_stock = price_stock.loc[price_stock['date'] != '2018-01-24']
    price_stock = price_stock.loc[price_stock['date'] != '2018-01-23']
    price_stock = price_stock.pivot(index='date', columns='code', values='close').fillna(method='ffill')
    cursor_v.close()
    if pivot_type:
        return price_stock
    else:
        price_stock = pd.melt(price_stock.reset_index(), id_vars=['date'], value_vars=price_stock.columns.tolist()[:],
                              var_name='code', value_name='close')
        return price_stock.dropna()

def get_stock_oi(stock_arr: list, to_date: str, from_date='2010-01-01', pivot_type=True):
    user_v = 'admin'
    pwd_v = 'mB17VfhA9gBaWXFaaYSFda2La4ULD12DaZTapt'
    host_v = 'vinance-prod.coo1pelwmlwz.ap-southeast-1.rds.amazonaws.com'

    port_v = '3306'
    db_v = 'vinance'
    db_engine_v = sqlalchemy.engine.create_engine(
        'mysql://{0}:{1}@{2}:{3}/{4}?charset=utf8mb4'.format(user_v, pwd_v, host_v, port_v, db_v))
    cursor_v = db_engine_v.connect()
    query_price = 'SELECT date,code, oi FROM price where code in {} and date >= \'{}\' and date<= \'{}\''.format(
        tuple(stock_arr), from_date, to_date)
    price_stock = pd.read_sql(query_price, con=cursor_v)
    price_stock.date = price_stock.date.apply(lambda x: str(x))
    price_stock = price_stock.loc[price_stock['date'] != '2018-01-24']
    price_stock = price_stock.loc[price_stock['date'] != '2018-01-23']
    price_stock = price_stock.pivot(index='date', columns='code', values='oi').fillna(method='ffill')
    cursor_v.close()
    if pivot_type:
        return price_stock
    else:
        price_stock = pd.melt(price_stock.reset_index(), id_vars=['date'], value_vars=price_stock.columns.tolist()[:],
                              var_name='code', value_name='oi')
        return price_stock.dropna()


In [2]:

# base_url = 'http://172.31.255.143:3000/api/v1'
base_url = 'http://172.31.240.7:3000/api/v1'

# base_url = 'http://172.31.253.91:3000/api/v1'
login_url = base_url + '/login'
query_url = base_url + '/queryFinancialInfo'

# Login to get acccess token
access_token = post_rest(url=login_url, body={
    "grant_type": "password_tradex",
    "client_id": "tradex-admin",
    "client_secret": "tradex-admin",
    "username": "vinh.do@techx.vn",
    "password": "123456",
}, headers={
    "Content-Type": "application/x-www-form-urlencoded"
}, encode_require=True).get('accessToken')

def query_financial_data(list_stock=[], year='2021', quarter='2', type='',print_log=False):
    query_content = {"code": list_stock,
                     "year": year,
                     "quarter": quarter,
                     "type": type,
                     }
    data = get_rest(query_url, body=query_content, headers={
        'Authorization': 'jwt {}'.format(access_token),
    }, print_log=print_log)
    return pd.DataFrame(data).dropna()

def get_quarter(month:int):
    if month in [1,2,3]:
        return 1
    if month in [4,5,6]:
        return 2
    if month in [7,8,9]:
        return 3
    else:
        return 4
    
month_in_yr=range(1,13)

def get_lag(yr: int, mth: int, lag: int):
#     print(month_in_yr.index(mth)-lag)
    month_lag=month_in_yr[month_in_yr.index(mth)-lag]
    quarter = get_quarter(month=month_lag)
    if mth<=lag:
        year=yr-1
    else:
        year=yr
    return str(year)+str(quarter)

def get_growth_data(start_year: int, end_year: int, universe: list, type='net profit after tax' ):
    data = pd.DataFrame()
    for yr in range(start_year, end_year):
        for quar in range(1, 5):
            each_quar = query_financial_data(list_stock=universe, year=str(yr), quarter=str(quar),
                                             type=type)
            data = pd.concat([data, each_quar])
    data['yr_quar'] = data['year'].apply(lambda x: str(x)) + data['quarter'].apply(
        lambda x: str(x))

    data_value = data[['code', type, 'yr_quar']].pivot(index='yr_quar', columns='code',
                                                       values=type)
    #     print(data_value)
    data_sum4q = data_value.copy(deep=True)
    for col in data_sum4q.columns:
        data_sum4q[col] = data_sum4q[col].rolling(4).sum()
    data_sum4q = data_sum4q[data_sum4q > 0]
    data_growth = data_sum4q.copy(deep=True)
    for col in data_growth.columns:
        data_growth[col] = data_growth[col] / data_growth[col].shift(1) - 1
    # missing_code
    return data_growth

POST REQUEST - url=http://172.31.240.7:3000/api/v1/login, body={'grant_type': 'password_tradex', 'client_id': 'tradex-admin', 'client_secret': 'tradex-admin', 'username': 'vinh.do@techx.vn', 'password': '123456'}
200 - POST RESPONSE - url=http://172.31.240.7:3000/api/v1/login, data={'accessToken': 'eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJkbSI6InRyYWRleCIsImNvbklkIjpudWxsLCJjSWQiOjcsInNnSWRzIjpbMTEsMTZdLCJsbSI6MTUsInN1SWQiOm51bGwsInVJZCI6NjcsInJJZCI6MzI4NzcsInNjIjpudWxsLCJzdSI6bnVsbCwiZ3QiOiJwYXNzd29yZF90cmFkZXgiLCJzSWQiOm51bGwsImlhdCI6MTYyOTY5MTM0NiwiZXhwIjoxNjI5NzIxMzQ2fQ.A5hNwwwDWnYEbFjyAvSWaPlFrWe2WYM39hCBJqOXjMLpgOadYFB2HN5hbg4fhpf5UXPQotG3vR-rw9CbpzMkGs3p-SXufD6tNiiJSaKALjVF7FvX78UA1YmBMZKrkPxorSvKnx83cbk-tCT0NaD9cq7Y2v8U5RPC0yJhjdIYaaw', 'refreshToken': '635afd3a-62c7-4a75-8353-39928b0dbda1', 'userInfo': {'id': 67, 'username': 'vinh.do@techx.vn', 'displayName': 'duyvinh', 'createdAt': '20180914', 'adminRoleIds': [1, 2], 'avatar': 'https://s3-ap-southeast-1.amazonaws.com/tradex-vn

In [3]:
hose_stocks = pd.read_csv("code_grossProfit.csv", header=None)[0].tolist()
len(hose_stocks)

343

In [4]:
# TO TEST DIFFERENT DAYS OF REBALANCE, NOT JUST END OF MONTH PRICE
def get_delay_price(daily_data: pd.DataFrame, date_delay: int):

    daily_data=daily_data.set_index('date').shift(-date_delay).dropna(how='all', axis=0).reset_index()
    daily_data['yr_month']=daily_data['date'].apply(lambda x: x.split('-')[0]+x.split('-')[1])
    monthly_data=daily_data.groupby(['yr_month']).agg(['last']).stack().reset_index().set_index('yr_month').drop(columns=['level_1','date'])
    return monthly_data

# This is the function for forward test and rebalance at the end of month

PARAMETER EXPLAINATION:
- strategy_name: name of factor to use
    Eg: strategy_list = ['net profit after tax', 'revenue', 'operating profit', 'gross profit',
                 'net cash flow from operating activities']
- start_date and end_date: duration of backtest
- pct_stock: % top stocks with lowest valuation
- lag: number of month lag for getting finance data. Eg: if it's Nov and lag = 5 --> get financial data of 2nd quarter this year
- universe: list stock for backtest
- chart: True --> draw cumulative chart, False --> not drawing cumulative chart
- growth_filter: True --> filter stocks by growth y-o-y or not
- filter_type: type of financial data using as filter


In [10]:
def fwd_test(strategy_name: str, start_date: str, end_date:str, pct_stock: float, lag: int, universe: list, chart = True, growth_filter=True, filter_type='net profit after tax'):
    
    #Process to get number of stocks, get the last value of the month
    oi = get_stock_oi(stock_arr=universe, from_date=start_date, to_date=end_date, pivot_type=True).reset_index()
    oi['yr_month']=oi['date'].apply(lambda x: x.split('-')[0]+x.split('-')[1])
    monthly_oi = oi.groupby('yr_month').agg(['last']).stack().reset_index().set_index('yr_month').drop(columns=['level_1','date'])
    
    #Process to get price data
    price = get_stock_price(stock_arr=universe, from_date=start_date, to_date=end_date, pivot_type=True).reset_index()
    price['yr_month']=price['date'].apply(lambda x: x.split('-')[0]+x.split('-')[1])
    monthly_p = price.groupby('yr_month').agg(['last']).stack().reset_index().set_index('yr_month').drop(columns=['level_1','date'])
    
    #Calculation of monthly return and market cap 
    monthly_ret=monthly_p/monthly_p.shift(1)-1
    market_cap=(monthly_oi*monthly_p).reset_index()
    market_cap=pd.melt(market_cap, id_vars=['yr_month'],value_vars=market_cap.columns.tolist()[1:],var_name='code',value_name='market_cap')
    market_cap['yr_quar']=market_cap['yr_month'].apply(lambda x: get_lag(yr=int(x[:4]), mth=int(x[-2:]), lag=lag))
    market_cap.to_csv('market_cap.csv')
    start_yr = int(start_date[:4]) - 1
    end_yr = int(end_date[:4]) + 1
    monthly_ret.to_csv("monthly_ret.csv")
    #Get financial data 
    financial_data=pd.DataFrame()
    for yr in range(start_yr,end_yr):
        for quar in range(1,5):
            each_quar = query_financial_data(list_stock=universe, year=yr, quarter=quar, type=strategy_name)
            financial_data=pd.concat([financial_data, each_quar])
    financial_data['yr_quar']=financial_data['year']+financial_data['quarter']
    financial_data.to_csv('data_origin.csv')
    #To calculate 4 quarter sum data, we make a dataframe which has code on columns and period on rows 
    data_4q = financial_data[['code', strategy_name, 'yr_quar']].pivot(index='yr_quar', columns='code',
                                                   values=strategy_name)
    #Sum of 4 quarters 
    data_4q = data_4q.rolling(4).sum()
    data_4q.to_csv('data_4q.csv')
    
    #switch back to financial_data format
    financial_data=pd.melt(data_4q.reset_index(),id_vars=['yr_quar'],value_vars=data_4q.columns.tolist()[:],var_name='code',value_name=strategy_name)
 
    #attach market cap data on financial data to value_df 
    value_df = financial_data[['code', 'yr_quar',strategy_name]].merge(market_cap, on =['yr_quar','code'], how='right')
    
    #calculating valuation like E/P by dividing with market cap
    value_df['value']=value_df[strategy_name]/value_df['market_cap']
    
    #ex of pivot: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.pivot.html
    monthly_data=value_df[['code','value','yr_month']].pivot(index='yr_month', columns='code', values='value')
    monthly_data.to_csv('net_increase_monthly.csv')
    value_df.to_csv('value_df.csv')
    #ranking by value 
    monthly_rank=monthly_data.rank(axis=1, ascending=False, pct=True)
    monthly_rank.to_csv(strategy_name+'_rank.csv')

    signal=monthly_rank[monthly_rank<=pct_stock].dropna(axis=1, how='all')
    signal[signal>0]=1
    signal.to_csv('raw_signal.csv')
    
    #Growth filter
    if growth_filter:
        growth_data=get_growth_data(start_year=start_yr-1, end_year=end_yr, universe=universe, type=filter_type)
        growth_data= growth_data[growth_data>0]
        growth_data[growth_data>0]=1
        growth_data=growth_data.reset_index()
        growth_data=pd.melt(growth_data, id_vars=['yr_quar'], value_vars=growth_data.columns.tolist()[1:], var_name='code', value_name='growth')
    
        value_df=value_df.merge(growth_data,on=['code','yr_quar'], how='left')
        growth_signal = value_df[['code','yr_month','growth']].pivot(index='yr_month', columns='code', values='growth')
        signal=(signal*growth_signal).dropna(axis=1, how='all')

    #Calculation of return
    signal=signal.shift(1)
    mm_hold=signal.count(axis=1).max()
    profit_arr=(signal*monthly_ret[signal.columns.tolist()]).dropna(axis=1, how='all')
    print(signal)
    print(signal.iloc[-1,:].dropna())
    profit_arr['daily_ret']=profit_arr.mean(axis=1)

    profit_arr['profit']=(profit_arr['daily_ret']+1).cumprod()
    daily_profit=profit_arr['daily_ret'].tolist()
    cumulative=profit_arr['profit'].dropna().tolist()
    
    win_arr = [x for x in daily_profit if x > 0]
    loss_arr=[x for x in daily_profit if x < 0]
    win_rate = len(win_arr) / (len(loss_arr)+len(win_arr))
    dd_arr=[]
    for i in range(len(cumulative)):
        if i>0:
            dd_arr.append((max(cumulative[:i])- cumulative[i-1])/max(cumulative[:i]))
    max_dd=max(dd_arr)
    print('CUMULATIVE PROFIT: {}'.format(round(cumulative[-1],4)))
    print('MDD:{}'.format(max_dd))
    print("mm_holding:{}------winrate:{}-----profit/MDD:{}".format(mm_hold, round(win_rate,4), round(cumulative[-1]/max_dd,4)))
    profit_arr=profit_arr.reset_index()
    if chart:    
        # benchmark=(daily_ret.mean(axis=1)+ 1).cumprod().tolist()
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=profit_arr.yr_month.apply(lambda x: dt.datetime.strptime(x,'%Y%m')).tolist(), y=cumulative,
                                 mode='lines',
                                 name='cumulative'))
        # fig.add_trace(go.Scatter(x=profit_arr.index.tolist(), y=benchmark[99:],
        #                          mode='lines',
        #                          name='benchmark'))
        if growth_filter:
            fig.update_layout(title_text=strategy_name + "filter by" + filter_type)
        else:
            fig.update_layout(title_text=strategy_name + " no filter")
            
        fig.show()
    return profit_arr

In [11]:
a = fwd_test(strategy_name= 'cash', start_date= '2015-01-01', end_date='2021-08-23', pct_stock= 0.1, lag=4, universe= hose_stocks,chart=True, growth_filter=False)
#b = fwd_test(strategy_name= 'net increase in cash and cash equivalents', start_date= '2015-01-01', end_date='2021-08-05', pct_stock= 0.05, lag=4, universe= hose_stocks,chart=True, growth_filter=False)


code      AAA  AAM  ACC  ACL  ADS  ASM  ASP  ATG  BCE  BCG  ...  TVT  TYA  \
yr_month                                                    ...             
201501    NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  ...  NaN  NaN   
201502    NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  ...  NaN  NaN   
201503    NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  ...  NaN  NaN   
201504    NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  ...  NaN  NaN   
201505    NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  ...  NaN  NaN   
...       ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...   
202104    NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  1.0  ...  1.0  1.0   
202105    NaN  NaN  1.0  NaN  NaN  NaN  NaN  NaN  NaN  1.0  ...  1.0  1.0   
202106    NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  1.0  ...  1.0  1.0   
202107    NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  1.0  ...  1.0  1.0   
202108    NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  1.0  ...  1.0  1.0   

# This is the function for forward test and rebalance with options to delay date to rebalance

PARAMETER EXPLAINATION:
- strategy_name: name of factor to use
    Eg: strategy_list = ['net profit after tax', 'revenue', 'operating profit', 'gross profit',
                 'net cash flow from operating activities']
- start_date and end_date: duration of backtest
- pct_stock: % top stocks with lowest valuation
- lag: number of month lag for getting finance data. Eg: if it's Nov and lag = 5 --> get financial data of 2nd quarter this year
- universe: list stock for backtest
- chart: True --> draw cumulative chart, False --> not drawing cumulative chart
- delay: number of days delayed to rebalance compared with end of month date
- growth_filter: True --> filter stocks by growth y-o-y or not
- filter_type: type of financial data using as filter


In [34]:
def fwd_test_delay_buy(strategy_name: str, start_date: str, end_date:str, pct_stock: float, lag: int, universe: list, delay: int,chart = True, growth_filter=True, filter_type='net profit after tax'):
    oi = get_stock_oi(stock_arr=universe, from_date=start_date, to_date=end_date, pivot_type=True).reset_index()
    
    monthly_oi = get_delay_price(daily_data=oi, date_delay=delay)  # GET OUTSTANDING SHARES AT THE END OF MONTH
        
    
    price = get_stock_price(stock_arr=universe, from_date=start_date, to_date=end_date, pivot_type=True).reset_index()
    monthly_p = get_delay_price(daily_data=price, date_delay=delay) # GET PRICE AT THE END OF MONTH
    
    monthly_ret=monthly_p/monthly_p.shift(1)-1
    market_cap=(monthly_oi*monthly_p).reset_index()
    market_cap=pd.melt(market_cap, id_vars=['yr_month'],value_vars=market_cap.columns.tolist()[1:],var_name='code',value_name='market_cap')
    market_cap['yr_quar']=market_cap['yr_month'].apply(lambda x: get_lag(yr=int(x[:4]), mth=int(x[-2:]), lag=lag))
    
    start_yr = int(start_date[:4]) - 1
    end_yr = int(end_date[:4]) + 1
    financial_data=pd.DataFrame()
    for yr in range(start_yr,end_yr):
        for quar in range(1,5):
            each_quar = query_financial_data(list_stock=universe, year=yr, quarter=quar, type=strategy_name)
            financial_data=pd.concat([financial_data, each_quar])
    financial_data['yr_quar']=financial_data['year']+financial_data['quarter']
    
    value_df = financial_data[['code', 'yr_quar',strategy_name]].merge(market_cap, on =['yr_quar','code'], how='right')
    
    value_df['value']=value_df[strategy_name]/value_df['market_cap']
    
    monthly_data=value_df[['code','value','yr_month']].pivot(index='yr_month', columns='code', values='value')


    monthly_rank=monthly_data.rank(axis=1, ascending=False, pct=True)


    signal=monthly_rank[monthly_rank<=pct_stock].dropna(axis=1, how='all')
    signal[signal>0]=1
    
    if growth_filter:
        growth_data=get_growth_data(start_year=start_yr-1, end_year=end_yr, universe=universe, type=filter_type)
        growth_data= growth_data[growth_data>0]
        growth_data[growth_data>0]=1
        growth_data=growth_data.reset_index()
        growth_data=pd.melt(growth_data, id_vars=['yr_quar'], value_vars=growth_data.columns.tolist()[1:], var_name='code', value_name='growth')
        
        value_df=value_df.merge(growth_data,on=['code','yr_quar'], how='left')
        growth_signal = value_df[['code','yr_month','growth']].pivot(index='yr_month', columns='code', values='growth')
        signal=(signal*growth_signal).dropna(axis=1, how='all')
    
    signal=signal.shift(1)
    mm_hold=signal.count(axis=1).max()
    profit_arr=signal*monthly_ret[signal.columns.tolist()].dropna(axis=1, how='all')

    profit_arr['daily_ret']=profit_arr.mean(axis=1)

    profit_arr['profit']=(profit_arr['daily_ret']+1).cumprod().fillna(method='ffill')
    daily_profit=profit_arr['daily_ret'].tolist()
    cumulative=profit_arr['profit'].tolist()
    
    win_arr = [x for x in daily_profit if x > 0]
    loss_arr=[x for x in daily_profit if x < 0]
    win_rate = len(win_arr) / (len(loss_arr)+len(win_arr))
    dd_arr=[]
    for i in range(len(cumulative)):
        if i>0:
            dd_arr.append((max(cumulative[:i])- cumulative[i-1])/max(cumulative[:i]))
    max_dd=max(dd_arr)
    print('CUMULATIVE PROFIT: {}'.format(round(cumulative[-1],4)))
    print("mm_holding:{}------winrate:{}-----profit/MDD:{}".format(mm_hold, round(win_rate,4), round(cumulative[-1]/max_dd,4)))
#     profit_arr.to_csv('profit_valuation.csv')
#     profit_arr=profit_arr.reset_index()
    if chart:    
        # benchmark=(daily_ret.mean(axis=1)+ 1).cumprod().tolist()
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=profit_arr.reset_index().yr_month.apply(lambda x: dt.datetime.strptime(x,'%Y%m')).tolist(), y=cumulative,
                                 mode='lines',
                                 name='cumulative'))
        # fig.add_trace(go.Scatter(x=profit_arr.index.tolist(), y=benchmark[99:],
        #                          mode='lines',
        #                          name='benchmark'))
        if growth_filter:
            fig.update_layout(title_text=strategy_name + "filter by" + filter_type)
        else:
            fig.update_layout(title_text=strategy_name + " no filter")
            
        fig.show()
            
        fig.show()
    return profit_arr


In [43]:
a=fwd_test_delay_buy(strategy_name= 'gross profit', start_date= '2015-01-01', end_date='2021-07-31', pct_stock= 0.1, lag=4, universe= hose_stocks,delay=15, growth_filter=True)

CUMULATIVE PROFIT: 15.1451
mm_holding:22------winrate:0.7564-----profit/MDD:nan


# MOMENTUM SIMULATION


PARAMETER EXPLAINATION:
- strategy_name: name of factor to use
    Eg: strategy_list = ['net profit after tax', 'revenue', 'operating profit', 'gross profit',
                 'net cash flow from operating activities']
- start_date and end_date: duration of backtest
- delay_month: number of months for return calculation to decide momentum 
    Eg: delay_month = 6 --> calculate return of 6 months to get momentum
- pct_stock: % top stocks with strongest momentum
- lag: number of month lag for getting finance data. Eg: if it's Nov and lag = 5 --> get financial data of 2nd quarter this year
- universe: list stock for backtest
- chart: True --> draw cumulative chart, False --> not drawing cumulative chart
- delay: number of days delayed to rebalance compared with end of month date
- stock_hold: number of stocks holding


#    # MOMENTUM --> VALUATION

In [47]:
def fwd_test_moment_val(strategy_name: str, start_date: str, end_date:str,delay_month: int,pct_stock=0.1, lag=4, universe=hose_stocks, delay=0,chart = True,stock_hold=10):
    oi = get_stock_oi(stock_arr=universe, from_date=start_date, to_date=end_date, pivot_type=True).reset_index()
#     oi['yr_month']=oi['date'].apply(lambda x: x.split('-')[0]+x.split('-')[1])
    monthly_oi = get_delay_price(daily_data=oi, date_delay=delay)
        
    
    price = get_stock_price(stock_arr=universe, from_date=start_date, to_date=end_date, pivot_type=True).reset_index()
#     price['yr_month']=price['date'].apply(lambda x: x.split('-')[0]+x.split('-')[1])
    monthly_p = get_delay_price(daily_data=price, date_delay=delay)
    
    monthly_ret=monthly_p/monthly_p.shift(1)-1
    market_cap=(monthly_oi*monthly_p).reset_index()
    market_cap=pd.melt(market_cap, id_vars=['yr_month'],value_vars=market_cap.columns.tolist()[1:],var_name='code',value_name='market_cap')
    market_cap['yr_quar']=market_cap['yr_month'].apply(lambda x: get_lag(yr=int(x[:4]), mth=int(x[-2:]), lag=lag))
    
    start_yr = int(start_date[:4]) - 1
    end_yr = int(end_date[:4]) + 1
    financial_data=pd.DataFrame()
    for yr in range(start_yr,end_yr):
        for quar in range(1,5):
            each_quar = query_financial_data(list_stock=universe, year=yr, quarter=quar, type=strategy_name)
            financial_data=pd.concat([financial_data, each_quar])
    financial_data['yr_quar']=financial_data['year']+financial_data['quarter']
    
    value_df = financial_data[['code', 'yr_quar',strategy_name]].merge(market_cap, on =['yr_quar','code'], how='right')
    
    value_df['value']=value_df[strategy_name]/value_df['market_cap']
    
    monthly_data=value_df[['code','value','yr_month']].pivot(index='yr_month', columns='code', values='value')

    momentum_ret = (monthly_p/monthly_p.shift(delay_month)-1).shift(1)
#         momentum_ret = momentum_ret[signal.columns.tolist()]
#     momentum_ret = momentum_ret*signal
    momentum_ret = momentum_ret.rank(axis=1,ascending=False, pct=True)

#         print(momentum_ret.iloc[-1,:].dropna())
    momentum_ret = momentum_ret[momentum_ret<=pct_stock].dropna(axis=1, how='all')
    momentum_ret[momentum_ret>0]=1
    
    monthly_rank = (monthly_data[momentum_ret.columns.tolist()]*momentum_ret).rank(axis=1, ascending=False).shift(1)
    
    monthly_rank = monthly_rank[monthly_rank<=stock_hold].dropna(axis=1, how='all')
    monthly_rank[monthly_rank>0]=1
#     signal = (monthly_rank*momentum_ret).dropna(axis=1, how='all')

    profit_arr=(monthly_rank*monthly_ret[monthly_rank.columns.tolist()]).dropna(axis=1, how='all')


    profit_arr['monthly_ret']=profit_arr.mean(axis=1)

    profit_arr['profit']=(profit_arr['monthly_ret']+1).cumprod().fillna(method='ffill')
#     daily_profit=profit_arr['daily_ret'].tolist()
    cumulative=profit_arr['profit'].dropna().tolist()
    
    daily_profit=profit_arr['monthly_ret'].tolist()
#     cumulative=profit_arr['profit'].dropna().tolist()
    
    win_arr = [x for x in daily_profit if x > 0]
    loss_arr=[x for x in daily_profit if x < 0]
    win_rate = len(win_arr) / (len(loss_arr)+len(win_arr))
    dd_arr=[]
    
    for i in range(len(cumulative)):
        if i>0:
            dd_arr.append((max(cumulative[:i])- cumulative[i-1])/max(cumulative[:i]))
    max_dd=max(dd_arr)
    
    print('CUMULATIVE PROFIT: {}'.format(round(cumulative[-1],4)))
    print('winrate:{}-----profit/MDD:{}'.format(round(win_rate,4), round(cumulative[-1]/max_dd,4)))

    if chart:    

        fig = go.Figure()
        fig.add_trace(go.Scatter(x=profit_arr.reset_index().yr_month.apply(lambda x: dt.datetime.strptime(x,'%Y%m')).tolist(), y=cumulative,
                                 mode='lines',
                                 name='cumulative'))
        fig.show()
    return profit_arr
# financial_data

In [48]:
a = fwd_test_moment_val(strategy_name='revenue',start_date='2015-01-01', end_date='2021-05-31', delay_month=6, delay=0, chart = True)

CUMULATIVE PROFIT: 16.7004
winrate:0.7246-----profit/MDD:83.4389


# VALUATION_4Q_MOMENTUM

In [49]:
strategy_name='revenue'

In [50]:
def fwd_test_moment_val_short_4q(monthly_p: pd.DataFrame,market_cap: pd.DataFrame,strategy_name: str, start_date: str, end_date:str,delay_month: int,pct_stock=0.1, lag=4, chart = True,stock_hold=10,universe=hose_stocks):
#     oi = get_stock_oi(stock_arr=universe, from_date=start_date, to_date=end_date, pivot_type=True).reset_index()
# #     oi['yr_month']=oi['date'].apply(lambda x: x.split('-')[0]+x.split('-')[1])
#     monthly_oi = get_delay_price(daily_data=oi, date_delay=delay)
        
    
#     price = get_stock_price(stock_arr=universe, from_date=start_date, to_date=end_date, pivot_type=True).reset_index()
# #     price['yr_month']=price['date'].apply(lambda x: x.split('-')[0]+x.split('-')[1])
#     monthly_p = get_delay_price(daily_data=price, date_delay=delay)
    
    monthly_ret=monthly_p/monthly_p.shift(1)-1
#     market_cap=(monthly_oi*monthly_p).reset_index()
#     market_cap=pd.melt(market_cap, id_vars=['yr_month'],value_vars=market_cap.columns.tolist()[1:],var_name='code',value_name='market_cap')
#     market_cap['yr_quar']=market_cap['yr_month'].apply(lambda x: get_lag(yr=int(x[:4]), mth=int(x[-2:]), lag=lag))
    
    start_yr = int(start_date[:4]) - 1
    end_yr = int(end_date[:4]) + 1
    financial_data=pd.DataFrame()
    
    for yr in range(start_yr,end_yr):
        for quar in range(1,5):
            each_quar = query_financial_data(list_stock=universe, year=yr, quarter=quar, type=strategy_name)
            financial_data=pd.concat([financial_data, each_quar])
    financial_data['yr_quar']=financial_data['year']+financial_data['quarter']
    data_4q = financial_data[['code', strategy_name, 'yr_quar']].pivot(index='yr_quar', columns='code',
                                                   values=strategy_name)
    data_4q = data_4q.rolling(4).sum()
    
    
    financial_data=pd.melt(data_4q.reset_index(),id_vars=['yr_quar'],value_vars=data_4q.columns.tolist()[:],var_name='code',value_name=strategy_name)
    value_df = financial_data[['code', 'yr_quar',strategy_name]].merge(market_cap, on =['yr_quar','code'], how='right')
    
    value_df['value']=value_df[strategy_name]/value_df['market_cap']
    
    monthly_data=value_df[['code','value','yr_month']].pivot(index='yr_month', columns='code', values='value')
    monthly_data.to_csv('monthly_data_{}.csv'.format(strategy_name))
    
    momentum_ret = (monthly_p/monthly_p.shift(delay_month)-1)#.shift(1)
#     print(momentum_ret.head(10))
#         momentum_ret = momentum_ret[signal.columns.tolist()]
#     momentum_ret = momentum_ret*signal
    momentum_ret = momentum_ret.rank(axis=1,ascending=False, pct=True)

#         print(momentum_ret.iloc[-1,:].dropna())
    momentum_ret = momentum_ret[momentum_ret<=pct_stock].dropna(axis=1, how='all')
    momentum_ret[momentum_ret>0]=1
    
    monthly_rank = (monthly_data[momentum_ret.columns.tolist()]*momentum_ret).rank(axis=1, ascending=False)
    
    monthly_rank = monthly_rank[monthly_rank<=stock_hold].dropna(axis=1, how='all').shift(1)
    
    monthly_rank[monthly_rank>0]=1
#     signal = (monthly_rank*momentum_ret).dropna(axis=1, how='all')

    profit_arr=(monthly_rank*monthly_ret[monthly_rank.columns.tolist()]).dropna(axis=1, how='all')

    profit_arr['monthly_ret']=profit_arr.mean(axis=1)

    profit_arr['profit']=(profit_arr['monthly_ret']+1).cumprod().fillna(method='ffill')
#     daily_profit=profit_arr['daily_ret'].tolist()
    cumulative=profit_arr['profit'].dropna().tolist()
    
    daily_profit=profit_arr['monthly_ret'].tolist()
#     cumulative=profit_arr['profit'].dropna().tolist()
    
    win_arr = [x for x in daily_profit if x > 0]
    loss_arr=[x for x in daily_profit if x < 0]
    win_rate = len(win_arr) / (len(loss_arr)+len(win_arr))
    dd_arr=[]
    
    for i in range(len(cumulative)):
        if i>0:
            dd_arr.append((max(cumulative[:i])- cumulative[i-1])/max(cumulative[:i]))
    
    max_dd=max(dd_arr)
    
    print('CUMULATIVE PROFIT: {}'.format(round(cumulative[-1],4)))
    print('winrate:{}-----MDD:{}'.format(round(win_rate,4), round(max_dd,4)))

    if chart:    
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=profit_arr.reset_index().yr_month.apply(lambda x: dt.datetime.strptime(x,'%Y%m')).tolist(), y=profit_arr['profit'],
                                 mode='lines',
                                 name='cumulative'))
        fig.show()
    return profit_arr

In [51]:
for hold in range(7,15):
    print('NUMBER OF HOLDING: {}'.format(hold))
    a=fwd_test_moment_val_short_4q(strategy_name='revenue',monthly_p=monthly_p,market_cap=market_cap, start_date='2014-05-01',end_date='2020-12-31',delay_month=7,pct_stock=0.2,stock_hold=hold, chart = True)

NUMBER OF HOLDING: 7


NameError: name 'monthly_p' is not defined