In [1]:
# Setting the basic parameters
T = 12 # Using 12 months to forecast the 13th month
I = 6 # Invetsment cycle
K = 6 # Optimal number of factors provided by the literature
N = 4000 # Using part of the valid stocks
S = 30 # Actual investment willingness
long_margin = 100
short_margin = 110
industry = ['能源', '石油', '发电'] # The key words of the industry selected (e.g. '能源', '石油', '发电'), [] if no industry is selected
method = 2 # Alpha extraction method used in the estimation, 1 for the first method provided by the literature, 2 for the second

financial_metrics_list = [
    "ROE",
    "ROA",
    "ROIC",
    "Gross profit to revenue",
    "Operating profit to revenue",
    "Operating cost to revenue",
    "EBITDA to revenue",
    "NI to revenue",
    "Net operating CF to revenue",
    "CapEx to depreciation",
    "Current asset ratio",
    "Asset liability ratio",
    "Current liability ratio",
    "Tangible asset ratio",
    "EPS basic",
    "EPS diluted",
    "Net asset per share",
    "Net operating CF per share",
    "Revenue per share",
    "Capital reserve per share",
    "Retained earnings per share",
    "FCF per share",
    "Equity multiplier"
]

In [2]:
import pandas as pd
import numpy as np
import datetime, calendar
from scipy.sparse.linalg import eigsh

In [3]:
# Loading data
panel = pd.read_csv('characteristics-2024-02-01.csv', index_col=[0,1], low_memory=False).apply(pd.to_numeric, errors='coerce')
panel.index = pd.MultiIndex.from_arrays([panel.index.get_level_values(0), pd.to_datetime(panel.index.get_level_values(1))])
panel

Unnamed: 0_level_0,Unnamed: 1_level_0,Alpha,Asset liability ratio,Beta,CapEx to depreciation,Capital reserve per share,Current asset ratio,Current liability ratio,EBITDA to revenue,EPS basic,EPS diluted,...,PS TTM,ROA,ROE,ROIC,Retained earnings per share,Return,Revenue per share,Sharpe,Tangible asset ratio,Volatility
Ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
AAMC.A,2016-03-31,-2.180497,,2.079191,,,,,,,,...,,,,,,-30.588235,,-0.463858,,54.318978
AAMC.A,2016-04-30,1.836316,,2.704243,,,,,,,,...,,,,,,41.949153,,0.241664,,120.662657
AAMC.A,2016-05-31,1.182486,,-0.143233,,,,,,,,...,,,,,,13.910448,,0.104237,,180.929240
AAMC.A,2016-06-30,-0.758921,,2.846175,,,,,,,,...,,,,,,-23.637317,,-0.119184,,99.311304
AAMC.A,2016-07-31,-1.060616,,3.023535,,,,,,,,...,,,,,,-11.736445,,-0.139862,,62.544578
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZYXI.O,2023-09-30,0.668884,61.3139,1.850362,-0.31754,2.6458,67.9678,21.3717,9.749192,0.24,0.23,...,1.473115,8.4929,14.5386,8.828606,0.74706,3.896104,4.0045,0.058251,0.7898,54.736060
ZYXI.O,2023-10-31,0.680420,61.3139,1.187454,-0.31754,2.6458,67.9678,21.3717,9.749192,0.24,0.23,...,1.620008,8.4929,14.5386,8.828606,0.74706,11.000000,4.0045,0.123905,0.7898,71.913219
ZYXI.O,2023-11-30,-0.386956,61.3139,1.700408,-0.31754,2.6458,67.9678,21.3717,9.749192,0.24,0.23,...,1.671089,8.4929,14.5386,8.828606,0.74706,3.153153,4.0045,0.127347,0.7898,39.986014
ZYXI.O,2023-12-31,0.466776,61.3139,2.020288,-0.31754,2.6458,67.9678,21.3717,9.749192,0.24,0.23,...,1.986699,8.4929,14.5386,8.828606,0.74706,18.886463,4.0045,0.327627,0.7898,43.388208


In [4]:
# Column names of the characteristics and time
characteristics = panel.columns.drop('Return')
# Convert date values to Timestamp
dates = panel.index.get_level_values(1).unique()
# Fill any N.A. with previous observations except return (considering the delisting circumstance)
panel_fill = panel.copy()
panel_fill[characteristics] = panel_fill[characteristics].groupby(level=0).transform(lambda group: group.ffill())
panel_fill

Unnamed: 0_level_0,Unnamed: 1_level_0,Alpha,Asset liability ratio,Beta,CapEx to depreciation,Capital reserve per share,Current asset ratio,Current liability ratio,EBITDA to revenue,EPS basic,EPS diluted,...,PS TTM,ROA,ROE,ROIC,Retained earnings per share,Return,Revenue per share,Sharpe,Tangible asset ratio,Volatility
Ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
AAMC.A,2016-03-31,-2.180497,,2.079191,,,,,,,,...,,,,,,-30.588235,,-0.463858,,54.318978
AAMC.A,2016-04-30,1.836316,,2.704243,,,,,,,,...,,,,,,41.949153,,0.241664,,120.662657
AAMC.A,2016-05-31,1.182486,,-0.143233,,,,,,,,...,,,,,,13.910448,,0.104237,,180.929240
AAMC.A,2016-06-30,-0.758921,,2.846175,,,,,,,,...,,,,,,-23.637317,,-0.119184,,99.311304
AAMC.A,2016-07-31,-1.060616,,3.023535,,,,,,,,...,,,,,,-11.736445,,-0.139862,,62.544578
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZYXI.O,2023-09-30,0.668884,61.3139,1.850362,-0.31754,2.6458,67.9678,21.3717,9.749192,0.24,0.23,...,1.473115,8.4929,14.5386,8.828606,0.74706,3.896104,4.0045,0.058251,0.7898,54.736060
ZYXI.O,2023-10-31,0.680420,61.3139,1.187454,-0.31754,2.6458,67.9678,21.3717,9.749192,0.24,0.23,...,1.620008,8.4929,14.5386,8.828606,0.74706,11.000000,4.0045,0.123905,0.7898,71.913219
ZYXI.O,2023-11-30,-0.386956,61.3139,1.700408,-0.31754,2.6458,67.9678,21.3717,9.749192,0.24,0.23,...,1.671089,8.4929,14.5386,8.828606,0.74706,3.153153,4.0045,0.127347,0.7898,39.986014
ZYXI.O,2023-12-31,0.466776,61.3139,2.020288,-0.31754,2.6458,67.9678,21.3717,9.749192,0.24,0.23,...,1.986699,8.4929,14.5386,8.828606,0.74706,18.886463,4.0045,0.327627,0.7898,43.388208


In [5]:
# Only keep the panel needed for current month and fill the NaNs linearly
period = panel_fill[panel_fill.index.get_level_values(1).isin(dates[-1-T:])]

# Data cleaning
na_tickers = period[period.isna().any(axis=1)].index.get_level_values(0).unique()
period = period.drop(na_tickers, axis=0, level=0)

# Selection stocks based on alpha
period['Alpha_Average'] = period['Alpha'].groupby('Ticker').transform('mean')
top_alpha_tickers = period.sort_values(by='Alpha_Average', ascending=False).index.get_level_values(0).unique()[:N]
period = period[period.index.get_level_values(0).isin(top_alpha_tickers)].drop('Alpha_Average', axis=1)
tickers = period.index.get_level_values(0).unique()

period

Unnamed: 0_level_0,Unnamed: 1_level_0,Alpha,Asset liability ratio,Beta,CapEx to depreciation,Capital reserve per share,Current asset ratio,Current liability ratio,EBITDA to revenue,EPS basic,EPS diluted,...,PS TTM,ROA,ROE,ROIC,Retained earnings per share,Return,Revenue per share,Sharpe,Tangible asset ratio,Volatility
Ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
ACU.A,2023-01-31,0.224970,51.9214,0.572586,-1.648840,3.8008,64.2425,25.9255,4.546930,0.86,0.82,...,0.430501,3.9032,3.8400,2.418728,20.057558,8.453027,54.8198,0.179034,0.8237,34.516618
ACU.A,2023-02-28,0.281927,51.9214,0.541587,-1.648840,3.8008,64.2425,25.9255,4.546930,0.86,0.82,...,0.445459,3.9032,3.8400,2.418728,20.057558,3.474576,54.8198,0.113427,0.8237,29.882842
ACU.A,2023-03-31,-0.297117,49.1224,0.405839,-0.961591,3.9242,62.9868,31.0697,6.413892,0.28,0.28,...,0.415088,1.3740,1.2357,0.791980,20.153847,-5.242601,12.9277,-0.106403,0.8193,35.863369
ACU.A,2023-04-30,0.762540,49.1224,-0.171431,-0.961591,3.9242,62.9868,31.0697,6.413892,0.28,0.28,...,0.472839,1.3740,1.2357,0.791980,20.153847,13.913043,12.9277,0.216493,0.8193,53.391817
ACU.A,2023-05-31,-0.198533,49.1224,0.557062,-0.961591,3.9242,62.9868,31.0697,6.413892,0.28,0.28,...,0.450822,1.3740,1.2357,0.791980,20.153847,-4.656489,12.9277,-0.083921,0.8193,37.003020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZYXI.O,2023-09-30,0.668884,61.3139,1.850362,-0.317540,2.6458,67.9678,21.3717,9.749192,0.24,0.23,...,1.473115,8.4929,14.5386,8.828606,0.747060,3.896104,4.0045,0.058251,0.7898,54.736060
ZYXI.O,2023-10-31,0.680420,61.3139,1.187454,-0.317540,2.6458,67.9678,21.3717,9.749192,0.24,0.23,...,1.620008,8.4929,14.5386,8.828606,0.747060,11.000000,4.0045,0.123905,0.7898,71.913219
ZYXI.O,2023-11-30,-0.386956,61.3139,1.700408,-0.317540,2.6458,67.9678,21.3717,9.749192,0.24,0.23,...,1.671089,8.4929,14.5386,8.828606,0.747060,3.153153,4.0045,0.127347,0.7898,39.986014
ZYXI.O,2023-12-31,0.466776,61.3139,2.020288,-0.317540,2.6458,67.9678,21.3717,9.749192,0.24,0.23,...,1.986699,8.4929,14.5386,8.828606,0.747060,18.886463,4.0045,0.327627,0.7898,43.388208


In [6]:
# Choose the companies in the a specific industry
if industry == []:
    selected_industry = tickers
else:
    industries = pd.read_excel('industry_companies.xlsx', index_col=0)
    selected_industry = [x for x in industries.columns if any(y for y in industry if y in str(industries[x][0])) and x in tickers]

In [7]:
# Rank transform characteristics to [0,1]
period[characteristics] = period.groupby(['Date']).transform(lambda x: (1/(len(x)+1))*(np.argsort(np.argsort(np.array(x), kind='mergesort'), kind='mergesort')+1))[characteristics]
period

Unnamed: 0_level_0,Unnamed: 1_level_0,Alpha,Asset liability ratio,Beta,CapEx to depreciation,Capital reserve per share,Current asset ratio,Current liability ratio,EBITDA to revenue,EPS basic,EPS diluted,...,PS TTM,ROA,ROE,ROIC,Retained earnings per share,Return,Revenue per share,Sharpe,Tangible asset ratio,Volatility
Ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
ACU.A,2023-01-31,0.429709,0.449792,0.273546,0.232687,0.274584,0.729571,0.259695,0.401662,0.611842,0.609418,...,0.148199,0.555748,0.529778,0.529086,0.811288,8.453027,0.833102,0.391274,0.461565,0.360111
ACU.A,2023-02-28,0.706371,0.450139,0.259349,0.231648,0.274931,0.730609,0.260734,0.400970,0.612881,0.610457,...,0.170360,0.554363,0.529778,0.529778,0.811288,3.474576,0.832064,0.792936,0.461911,0.238573
ACU.A,2023-03-31,0.491343,0.406856,0.139889,0.414820,0.275970,0.722992,0.357687,0.455679,0.614958,0.614612,...,0.168283,0.568906,0.534626,0.536357,0.808864,-5.242601,0.742036,0.352839,0.457756,0.337604
ACU.A,2023-04-30,0.933518,0.406856,0.073753,0.414127,0.275623,0.722299,0.358726,0.457410,0.618421,0.618075,...,0.201524,0.567521,0.536011,0.539474,0.808864,13.913043,0.747576,0.868075,0.458449,0.687673
ACU.A,2023-05-31,0.450485,0.406856,0.353186,0.414820,0.275623,0.722992,0.358380,0.457410,0.617729,0.617382,...,0.198753,0.566482,0.534626,0.538435,0.808518,-4.656489,0.746884,0.444945,0.458102,0.368421
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZYXI.O,2023-09-30,0.910665,0.590028,0.871884,0.781856,0.188366,0.776662,0.197715,0.521468,0.532548,0.532895,...,0.517659,0.799861,0.808172,0.800554,0.545014,3.896104,0.348684,0.836565,0.428670,0.742729
ZYXI.O,2023-10-31,0.936981,0.588643,0.603878,0.783587,0.188019,0.777008,0.197715,0.521814,0.533241,0.533241,...,0.575831,0.799861,0.808172,0.799515,0.545014,11.000000,0.346953,0.905471,0.428670,0.799169
ZYXI.O,2023-11-30,0.387812,0.588643,0.615651,0.782548,0.187673,0.776316,0.196676,0.521468,0.531163,0.530817,...,0.560596,0.799861,0.809903,0.801247,0.545014,3.153153,0.346953,0.474723,0.428670,0.397853
ZYXI.O,2023-12-31,0.710180,0.588296,0.740997,0.780817,0.187673,0.776316,0.195637,0.521468,0.530817,0.530471,...,0.587950,0.799861,0.810942,0.802632,0.545014,18.886463,0.346953,0.770429,0.428670,0.533587


In [8]:
# Rolling accumulated returns in the given investment cycle without knowing the future returns
R = period[period.index.get_level_values(1).isin(dates[-T:])][['Return']]
R_rolling = R.groupby(level=0, group_keys=False).apply(lambda x: (x/100+1).rolling(window=I).apply(lambda y: y.prod())-1).dropna()
R_estimation = R_rolling[R_rolling.index.get_level_values(1).isin(dates[-T+I-1:])]
R_estimation.unstack(level=1).droplevel(0, axis=1)

Date,2023-07-31,2023-08-31,2023-09-30,2023-10-31,2023-11-30,2023-12-31,2024-01-31
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ACU.A,0.304114,0.222621,0.306879,0.291892,0.456105,0.726636,0.586995
AE.A,-0.270587,-0.365828,-0.111372,-0.153115,-0.175155,-0.243776,-0.262033
AGE.A,-0.026667,0.288136,0.025641,0.034483,-0.266000,-0.568869,-0.438356
AIM.A,0.196154,0.198077,0.058140,-0.039583,0.012931,-0.343284,-0.332797
AINC.A,-0.304734,-0.344098,-0.514578,-0.443269,-0.473953,-0.621399,-0.700000
...,...,...,...,...,...,...,...
ZKIN.O,-0.244172,-0.205674,0.026201,-0.290780,0.318841,0.340629,0.240260
ZLAB.O,-0.286664,-0.314401,-0.269092,-0.279588,-0.160049,-0.014425,-0.282435
ZS.O,0.291720,0.189859,0.331764,0.761265,0.458001,0.514422,0.469448
ZVRA.O,-0.106227,-0.127367,-0.123636,-0.170370,-0.055556,0.284314,0.186475


In [9]:
# De-mean returns
R_demean = R_estimation.groupby(level=1).transform(lambda x: x-x.mean(axis=0))
R_demean.unstack(level=1).droplevel(0, axis=1)

Date,2023-07-31,2023-08-31,2023-09-30,2023-10-31,2023-11-30,2023-12-31,2024-01-31
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ACU.A,0.294072,0.225681,0.330379,0.362523,0.447942,0.696557,0.636375
AE.A,-0.280628,-0.362769,-0.087873,-0.082484,-0.183318,-0.273855,-0.212653
AGE.A,-0.036708,0.291195,0.049140,0.105114,-0.274162,-0.598948,-0.388976
AIM.A,0.186112,0.201137,0.081639,0.031048,0.004769,-0.373363,-0.283418
AINC.A,-0.314775,-0.341038,-0.491079,-0.372638,-0.482115,-0.651479,-0.650620
...,...,...,...,...,...,...,...
ZKIN.O,-0.254214,-0.202614,0.049700,-0.220149,0.310678,0.310550,0.289639
ZLAB.O,-0.296705,-0.311341,-0.245593,-0.208957,-0.168211,-0.044504,-0.233055
ZS.O,0.281679,0.192919,0.355263,0.831896,0.449839,0.484343,0.518827
ZVRA.O,-0.116269,-0.124307,-0.100137,-0.099739,-0.063718,0.254234,0.235855


In [10]:
# Setting characteristics matrix
X = period[period.index.get_level_values(1).isin(dates[-1-T:])][characteristics]
X_shifted = X.groupby(['Ticker']).shift(I).dropna()
X_estimation = X_shifted[X_shifted.index.get_level_values(1).isin(dates[-1-T+I:])]
X_holding = X_shifted[X_shifted.index.get_level_values(1)==dates[-1]].droplevel(1)
X_estimation

Unnamed: 0_level_0,Unnamed: 1_level_0,Alpha,Asset liability ratio,Beta,CapEx to depreciation,Capital reserve per share,Current asset ratio,Current liability ratio,EBITDA to revenue,EPS basic,EPS diluted,...,PEG,PS TTM,ROA,ROE,ROIC,Retained earnings per share,Revenue per share,Sharpe,Tangible asset ratio,Volatility
Ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
ACU.A,2023-07-31,0.429709,0.449792,0.273546,0.232687,0.274584,0.729571,0.259695,0.401662,0.611842,0.609418,...,0.313712,0.148199,0.555748,0.529778,0.529086,0.811288,0.833102,0.391274,0.461565,0.360111
ACU.A,2023-08-31,0.706371,0.450139,0.259349,0.231648,0.274931,0.730609,0.260734,0.400970,0.612881,0.610457,...,0.306094,0.170360,0.554363,0.529778,0.529778,0.811288,0.832064,0.792936,0.461911,0.238573
ACU.A,2023-09-30,0.491343,0.406856,0.139889,0.414820,0.275970,0.722992,0.357687,0.455679,0.614958,0.614612,...,0.306787,0.168283,0.568906,0.534626,0.536357,0.808864,0.742036,0.352839,0.457756,0.337604
ACU.A,2023-10-31,0.933518,0.406856,0.073753,0.414127,0.275623,0.722299,0.358726,0.457410,0.618421,0.618075,...,0.294321,0.201524,0.567521,0.536011,0.539474,0.808864,0.747576,0.868075,0.458449,0.687673
ACU.A,2023-11-30,0.450485,0.406856,0.353186,0.414820,0.275623,0.722992,0.358380,0.457410,0.617729,0.617382,...,0.296053,0.198753,0.566482,0.534626,0.538435,0.808518,0.746884,0.444945,0.458102,0.368421
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZYXI.O,2023-09-30,0.386427,0.304017,0.896122,0.779086,0.173476,0.686288,0.559903,0.426939,0.505886,0.509349,...,0.003463,0.665859,0.587950,0.619460,0.678324,0.537050,0.296053,0.552978,0.364958,0.894044
ZYXI.O,2023-10-31,0.431094,0.305055,0.760042,0.777355,0.173823,0.686981,0.560596,0.430055,0.509349,0.512812,...,0.003463,0.655471,0.586219,0.619806,0.682479,0.537050,0.296745,0.514197,0.364612,0.453947
ZYXI.O,2023-11-30,0.260388,0.305402,0.811981,0.777355,0.173823,0.687327,0.562327,0.429709,0.508657,0.512119,...,0.004501,0.600416,0.584834,0.618767,0.681787,0.537050,0.297438,0.501731,0.364266,0.949100
ZYXI.O,2023-12-31,0.769044,0.563366,0.046745,0.785665,0.168975,0.793629,0.112188,0.492036,0.508657,0.511427,...,0.004501,0.578601,0.700485,0.732341,0.744806,0.538781,0.319252,0.387465,0.437327,0.521122


In [11]:
# Extracting the explained R
R_hat = X_estimation @ np.linalg.solve(X_estimation.T @ X_estimation, X_estimation.T @ R_demean)
R_hat = R_hat.unstack(level=1).droplevel(0,axis=1)
R_hat

Date,2023-07-31,2023-08-31,2023-09-30,2023-10-31,2023-11-30,2023-12-31,2024-01-31
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ACU.A,0.112079,0.106416,0.164077,0.145866,0.162950,0.159639,0.138748
AE.A,0.019496,0.028651,-0.008132,-0.015904,-0.010368,-0.104078,-0.097837
AGE.A,-0.238178,-0.221903,-0.355368,-0.356171,-0.337375,-0.376345,-0.360105
AIM.A,-0.097369,-0.083633,-0.071148,-0.091847,-0.079837,-0.106447,-0.088815
AINC.A,0.068563,0.068919,-0.039148,-0.041350,-0.043018,0.031317,0.022633
...,...,...,...,...,...,...,...
ZKIN.O,-0.055700,-0.035081,-0.046960,-0.041692,-0.045938,-0.034397,-0.034195
ZLAB.O,-0.074406,-0.042837,-0.004283,-0.007777,-0.008766,-0.019170,-0.025644
ZS.O,0.035633,0.020861,0.022518,0.029728,0.007511,0.026833,0.007217
ZVRA.O,-0.002432,0.008971,0.027830,0.028770,0.017354,0.003230,0.007480


In [12]:
# Eigendecomposition
RR_N = (R_hat @ R_hat.T) / N # Just like the expression of covariance between N stocks
eigen_values, eigen_vectors = eigsh(RR_N.to_numpy(), k=K, which='LM')
Gb_hat = np.sqrt(N) * pd.DataFrame(eigen_vectors, index=tickers, columns=[f'Factor {x}' for x in range(K, 0, -1)])
Gb_hat

# Calculation of the projection from X to G_beta
coef_beta = np.linalg.solve(X_estimation.T @ X_estimation, X_estimation.droplevel(1).T @ Gb_hat.loc[Gb_hat.index.repeat(T-I+1)])
coef_beta = pd.DataFrame(coef_beta @ eigen_values, index=characteristics, columns=[dates[-1]])
Gb_hat

Unnamed: 0_level_0,Factor 6,Factor 5,Factor 4,Factor 3,Factor 2,Factor 1
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ACU.A,-0.328447,1.538056,0.119286,-0.434069,1.133900,-1.771704
AE.A,0.946601,-0.467957,0.074082,-2.476120,-2.027201,0.337098
AGE.A,1.416514,-0.910757,1.035859,-0.065733,-3.304208,4.018604
AIM.A,0.932101,-0.425946,-1.006217,-0.502339,0.020335,1.108969
AINC.A,0.264777,0.831835,-0.134468,2.339593,-1.941651,-0.123310
...,...,...,...,...,...,...
ZKIN.O,1.347353,0.169672,-0.099608,0.302707,0.151437,0.526858
ZLAB.O,1.894501,0.856016,-0.413947,-0.639126,1.128779,0.328999
ZS.O,-1.544307,0.987998,-0.175368,-0.081265,-0.253395,-0.269679
ZVRA.O,0.621914,-0.138220,-0.712610,-0.648524,0.306712,-0.162393


In [13]:
# Using the original returns for data fit
R_estimation = R_estimation.unstack(level=1).droplevel(0, axis=1).loc[selected_industry]
R_estimation

Date,2023-07-31,2023-08-31,2023-09-30,2023-10-31,2023-11-30,2023-12-31,2024-01-31
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AE.A,-0.270587,-0.365828,-0.111372,-0.153115,-0.175155,-0.243776,-0.262033
BRN.A,0.003860,0.120511,0.211379,-0.089025,-0.039936,-0.041715,-0.021997
CEI.A,-0.568528,-0.761628,-0.854430,-0.748571,-0.771963,-0.642633,-0.800000
ELLO.A,-0.067039,0.163200,0.293333,-0.239130,-0.217262,-0.169444,-0.060479
ENSV.A,-0.678740,-0.452542,-0.197895,-0.090909,-0.120787,-0.217391,-0.480392
...,...,...,...,...,...,...,...
VIA.O,-0.749972,-0.700000,-0.600109,-0.453140,-0.134981,0.350575,0.298654
VTNR.O,-0.276099,-0.552270,-0.549595,-0.454430,-0.477589,-0.457600,-0.722960
VVPR.O,-0.345494,-0.256866,-0.136364,-0.405714,-0.676439,-0.710210,-0.777049
WAVE.O,-0.322456,-0.374826,0.017857,-0.165187,-0.472828,-0.504000,-0.408163


In [14]:
theta = pd.DataFrame()
Gb_hat = Gb_hat.loc[selected_industry]
X_holding = X_holding.loc[selected_industry]
for i in range(T-I+1):

    # Return in the previous month
    R_previous = R_estimation[[dates[i-T+I-1]]]

    # Characteristics in the previous month
    X_previous = X_estimation[X_estimation.index.get_level_values(1)==dates[i-T+I-1]].droplevel(1,axis=0).loc[selected_industry]
    
    if method == 1:
        # First method - theta
        X_E = X_previous.to_numpy() - Gb_hat @ np.linalg.solve(Gb_hat.T @ Gb_hat, Gb_hat.T @ X_previous)
        X_E.columns = characteristics
        theta[dates[i-T+I-1]] = pd.DataFrame(np.linalg.solve(X_E.T @ X_E, X_E.T @ R_previous), index=characteristics)
    else:
        # Second method - theta
        # F_previous is the coefficient of Gb_hat with respect to R_previous, regardless of the effect of intercept Ga
        F_previous = pd.DataFrame(np.linalg.solve(Gb_hat.T @ Gb_hat, Gb_hat.T @ R_previous), index=Gb_hat.columns, columns=[dates[i-T+I-1]])
        # Final regression to derive the linear estimation of Ga on X, which is denoted as theta
        Y = R_previous - Gb_hat @ F_previous
        theta[dates[i-T+I-1]] = pd.DataFrame(np.linalg.solve(X_previous.T @ X_previous, X_previous.T @ Y), index=characteristics)

# Forcast the effect of characteristics by regressing in the estimation period
time_labels = pd.DataFrame([list(range(T-I+1))], index=['Order'], columns=[dates[-T+I-1:]]).T
holding_time_label = [[T]]
theta = pd.DataFrame(holding_time_label @ np.linalg.solve(time_labels.T @ time_labels, time_labels.T @ theta.T.values), index=[dates[-1]], columns=characteristics).T

if method == 1:
    # First method - G_alpha
    X_E_holding = X_holding.to_numpy() - Gb_hat @ np.linalg.solve(Gb_hat.T @ Gb_hat, Gb_hat.T @ X_holding)
    X_E_holding.columns = characteristics
    Ga_hat = X_E_holding @ theta
else:
    # Second method - G_alpha
    # Weight of my portfolio
    Ga_hat = X_holding @ theta
    
Ga_hat

Unnamed: 0_level_0,2024-01-31
Ticker,Unnamed: 1_level_1
AE.A,0.616065
BRN.A,-0.135207
CEI.A,-0.483793
ELLO.A,0.628595
ENSV.A,0.071583
...,...
VIA.O,-0.151896
VTNR.O,-0.125581
VVPR.O,-0.171010
WAVE.O,-0.279754


In [15]:
# Scale towards a target vol
R_alpha = Ga_hat.T @ R_estimation
sd_scale_factor = 1 / (np.sqrt(T) * np.std(R_alpha, axis=1, ddof=1))
sd_scale_factor

2024-01-31    0.175426
dtype: float64

In [16]:
# Weight of my portfolio
omega = Ga_hat * sd_scale_factor
omega

Unnamed: 0_level_0,2024-01-31
Ticker,Unnamed: 1_level_1
AE.A,0.108074
BRN.A,-0.023719
CEI.A,-0.084870
ELLO.A,0.110272
ENSV.A,0.012558
...,...
VIA.O,-0.026646
VTNR.O,-0.022030
VVPR.O,-0.030000
WAVE.O,-0.049076


In [17]:
# Absolute weight of the portfolio
omega['Abs'] = omega.iloc[:,0].abs()
omega = omega.sort_values(by='Abs',ascending=False).head(min(S, len(selected_industry))).drop('Abs', axis=1)
omega

Unnamed: 0_level_0,2024-01-31 00:00:00
Ticker,Unnamed: 1_level_1
HUSA.A,-0.189621
EP.A,-0.156489
SMR.N,-0.148417
CEPU.N,0.128284
BORR.N,-0.126083
ELLO.A,0.110272
AE.A,0.108074
TNP.N,0.105244
TUSK.O,0.10057
CPE.N,-0.098503
