In [77]:
import pandas as pd
import glob
import numpy as np

# 导入股票收益率数据
file_names = glob.glob('../data/raw_data/日个股回报率/*.csv')
stock_prices = pd.concat((pd.read_csv(file) for file in file_names), ignore_index=True)

# 保留A股、创业板、科创板股票
stock_prices = stock_prices[stock_prices['Markettype'].isin([1, 4, 16, 32])]
stock_prices.Trddt = pd.to_datetime(stock_prices.Trddt)

# 收益率
stock_returns = stock_prices.pivot_table(index='Trddt', columns='Stkcd', values='Dretwd')

In [78]:
# 导入所有标准化因子数据和行业因子
factors_dict = {}
factors_dict['beta'] = pd.read_csv('../data/standardized_risk_factors/beta_standardized.csv')
factors_dict['momentum'] = pd.read_csv('../data/standardized_risk_factors/RSTR_standardized.csv')
factors_dict['size'] = pd.read_csv('../data/standardized_risk_factors/LNCAP_standardized.csv')
factors_dict['earnings'] = pd.read_csv('../data/standardized_risk_factors/earnings_yield_factor_standardized.csv')
factors_dict['volatility'] = pd.read_csv('../data/standardized_risk_factors/volatility_factor_standardized.csv')
factors_dict['growth'] = pd.read_csv('../data/standardized_risk_factors/growth_factor_standardized.csv')
factors_dict['value'] = pd.read_csv('../data/standardized_risk_factors/BTOP_standardized.csv')
factors_dict['leverage'] = pd.read_csv('../data/standardized_risk_factors/leverage_factor_standardized.csv')
factors_dict['liquidity'] = pd.read_csv('../data/standardized_risk_factors/liquidity_factor_standardized.csv')


for factor in factors_dict.keys():
    factors_dict[factor] = factors_dict[factor].rename(columns={'Unnamed: 0' : 'Trddt'})
    factors_dict[factor] = factors_dict[factor].set_index('Trddt')
    factors_dict[factor].index = pd.to_datetime(factors_dict[factor].index)


trade_date = factors_dict['growth'].index
stock_list = factors_dict['growth'].columns.values

for factor in factors_dict.keys():
    factors_dict[factor] = factors_dict[factor].loc[trade_date, stock_list]
    factors_dict[factor] = factors_dict[factor].fillna(0)
    # factors_dict[factor] = factors_dict[factor].dropna(axis = 0, how = 'all')

file_names = glob.glob('../data/industry_factors/*.csv')
for file_name in file_names:
    key = file_name.split('/')[-1].split('.')[0]
    factors_dict[key] = pd.read_csv(file_name)
    factors_dict[key] = factors_dict[key].rename(columns={'Unnamed: 0' : 'Trddt'})
    factors_dict[key] = factors_dict[key].set_index('Trddt')
    factors_dict[key].index = pd.to_datetime(factors_dict[key].index)
    factors_dict[key] = factors_dict[key].loc[trade_date, stock_list]

In [79]:
stock_returns = stock_returns.loc[trade_date, stock_list.astype(int)]
print(stock_returns)

Stkcd         1         2         5         6         8         9       \
Trddt                                                                    
2019-03-25 -0.038125 -0.035286  0.010283  0.008850 -0.032258 -0.044974   
2019-03-26 -0.000826  0.004616  0.005089 -0.048246  0.002222 -0.051247   
2019-03-27  0.023140  0.023330 -0.027848  0.004608  0.004435 -0.013139   
2019-03-28 -0.012924 -0.005181 -0.041667  0.001529 -0.006623 -0.036982   
2019-03-29  0.049100  0.066667  0.024457  0.030534  0.026667  0.030722   
...              ...       ...       ...       ...       ...       ...   
2023-03-07 -0.011552 -0.019453  0.000000 -0.024779 -0.023166 -0.011272   
2023-03-08 -0.011687 -0.002480  0.016760  0.009074  0.011858  0.009772   
2023-03-09 -0.024390  0.001865  0.000000 -0.003597 -0.015625 -0.008065   
2023-03-10 -0.004545 -0.017990  0.000000  0.019856 -0.011905 -0.017073   
2023-03-13 -0.006849  0.000632 -0.005495 -0.033628  0.004016 -0.011580   

Stkcd         12        16        20 

In [161]:
# 目标函数
def expReturn(weights, returns):
    # 1*N weights, 1*N returns
    return -np.dot(weights, returns)

In [162]:
# w_bench 
hs300weights = pd.read_csv('../data/hs300_weights.csv')
hs300weights = hs300weights.loc[:,['Stkcd','Enddt','Weight']]

hs300weights = hs300weights.pivot(index='Enddt', columns='Stkcd', values='Weight').fillna(0)
hs300weights.index = pd.to_datetime(hs300weights.index)
hs300weights = hs300weights.loc[trade_date,:]
new_hs300weights = hs300weights.reindex(columns=stock_list.astype(int), fill_value=0)

# 将数据框重新索引为trade_date, stock_list
new_hs300weights = new_hs300weights.loc[trade_date, stock_list.astype(int)]
print(new_hs300weights)

Stkcd       1       2       5       6       8       9       12      16      \
Trddt                                                                        
2019-03-25   0.890   1.173       0       0     0.0     0.0       0       0   
2019-03-26   0.900   1.191       0       0     0.0     0.0       0       0   
2019-03-27   0.910   1.205       0       0     0.0     0.0       0       0   
2019-03-28   0.902   1.204       0       0     0.0     0.0       0       0   
2019-03-29   0.911   1.236       0       0     0.0     0.0       0       0   
...            ...     ...     ...     ...     ...     ...     ...     ...   
2023-03-07   0.681   0.563       0       0     0.0     0.0       0       0   
2023-03-08   0.675   0.563       0       0     0.0     0.0       0       0   
2023-03-09   0.661   0.566       0       0     0.0     0.0       0       0   
2023-03-10   0.667   0.564       0       0     0.0     0.0       0       0   
2023-03-13   0.656   0.558       0       0     0.0     0.0      

In [163]:
# 权重约束
def weightConstraint_size(weight, weight_benchmark, risk_factor_exposure):
    return (np.matrix(weight).T - np.matrix(weight_benchmark).T) @ np.matrix(risk_factor_exposure)

def weightConstraint_beta(weight, weight_benchmark, risk_factor_exposure):
    return (np.matrix(weight).T - np.matrix(weight_benchmark).T) @ np.matrix(risk_factor_exposure)

def weightConstraint_momentum(weight, weight_benchmark, risk_factor_exposure):
    return (np.matrix(weight).T - np.matrix(weight_benchmark).T) @ np.matrix(risk_factor_exposure)

def weightConstraint_earnings(weight, weight_benchmark, risk_factor_exposure):
    return (np.matrix(weight).T - np.matrix(weight_benchmark).T) @ np.matrix(risk_factor_exposure)

def weightConstraint_volatility(weight, weight_benchmark, risk_factor_exposure):
    return (np.matrix(weight).T - np.matrix(weight_benchmark).T) @ np.matrix(risk_factor_exposure)

def weightConstraint_growth(weight, weight_benchmark, risk_factor_exposure):
    return (np.matrix(weight).T - np.matrix(weight_benchmark).T) @ np.matrix(risk_factor_exposure)

def weightConstraint_value(weight, weight_benchmark, risk_factor_exposure):
    return (np.matrix(weight).T - np.matrix(weight_benchmark).T) @ np.matrix(risk_factor_exposure)

def weightConstraint_leverage(weight, weight_benchmark, risk_factor_exposure):
    return (np.matrix(weight).T - np.matrix(weight_benchmark).T) @ np.matrix(risk_factor_exposure)

def weightConstraint_liquidity(weight, weight_benchmark, risk_factor_exposure):
    return (np.matrix(weight).T - np.matrix(weight_benchmark).T) @ np.matrix(risk_factor_exposure)

In [164]:
# 月末行业矩阵 >> H on page 18

factors_dict.keys()
industry_factors = {}

endOfMonth = pd.DataFrame(factors_dict['growth'].index)
endOfMonth.reset_index(inplace=True)
endOfMonth['year'] = pd.to_datetime(endOfMonth.Trddt).dt.year
endOfMonth['month'] = pd.to_datetime(endOfMonth.Trddt).dt.month

endOfMonth = endOfMonth.groupby(['year', 'month']).last()['Trddt']
# groupByYear = endOfMonth.groupby('year')

for day in endOfMonth:
    industry_factors[day] = pd.DataFrame()
    for key in factors_dict.keys():
        if key not in ['beta', 'momentum', 'size', 'earnings', 'volatility', 'growth', 'value', 'leverage', 'liquidity']:
            industry_factors[day][key] = factors_dict[key].loc[day,:].transpose()


In [165]:
industry_factors[endOfMonth.iloc[0]]

Unnamed: 0,industry_factors\industry_factors110000,industry_factors\industry_factors210000,industry_factors\industry_factors220000,industry_factors\industry_factors230000,industry_factors\industry_factors240000,industry_factors\industry_factors270000,industry_factors\industry_factors280000,industry_factors\industry_factors330000,industry_factors\industry_factors340000,industry_factors\industry_factors350000,...,industry_factors\industry_factors630000,industry_factors\industry_factors640000,industry_factors\industry_factors650000,industry_factors\industry_factors710000,industry_factors\industry_factors720000,industry_factors\industry_factors730000,industry_factors\industry_factors740000,industry_factors\industry_factors750000,industry_factors\industry_factors760000,industry_factors\industry_factors770000
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
688798,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
688800,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
688819,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
688981,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [166]:
# 行业约束

# industry_factors[date] = (3770, 32)
# (964, 3770)

def industryConstraint(weights, weights_benchmark, industry_factors):
    # 1*N weights, 1*N weights_benchmark
    # return (np.matrix(weights).T - np.matrix(weights_benchmark).T) @ np.matrix(industry_factors)
    print(np.matrix(weights).shape)
    print(np.matrix(weights_benchmark).shape)
    LHS = np.matrix(weights) @ np.matrix(industry_factors)
    RHS = np.matrix(weights_benchmark) @ np.matrix(industry_factors)
    return LHS - RHS

In [167]:
# w >= 0
def nonNegativeConstraint(weights):
    return weights

# sum(w) = 1
def sumConstraint(weights):
    return np.sum(weights) - 1

In [168]:
cons_dict = {'size':{'type': 'eq', 'fun': "weightConstraint_size"},
                'beta': {'type': 'eq', 'fun': "weightConstraint_beta"},
                'momentum': {'type': 'eq', 'fun': "weightConstraint_momentum"},
                'earnings':{'type': 'eq', 'fun': "weightConstraint_earnings"},
                'volatility':{'type': 'eq', 'fun': "weightConstraint_volatility"},
                'growth':{'type': 'eq', 'fun': "weightConstraint_growth"},
                'value':{'type': 'eq', 'fun': "weightConstraint_value"},
                'leverage':{'type': 'eq', 'fun': "weightConstraint_leverage"},
                'liquidity':{'type': 'eq', 'fun': "weightConstraint_liquidity"}}

factor = 'size'

# cons_dict[key for key in cons_dict.keys() if key != factor]
new_dict = {key: cons_dict[key] for key in cons_dict.keys() if key != factor}

new_dict.values()

# cons_dict['size', 'beta']

dict_values([{'type': 'eq', 'fun': 'weightConstraint_beta'}, {'type': 'eq', 'fun': 'weightConstraint_momentum'}, {'type': 'eq', 'fun': 'weightConstraint_earnings'}, {'type': 'eq', 'fun': 'weightConstraint_volatility'}, {'type': 'eq', 'fun': 'weightConstraint_growth'}, {'type': 'eq', 'fun': 'weightConstraint_value'}, {'type': 'eq', 'fun': 'weightConstraint_leverage'}, {'type': 'eq', 'fun': 'weightConstraint_liquidity'}])

In [169]:
# 优化
from scipy.optimize import minimize
from scipy.optimize import NonlinearConstraint

def optimizePortfolio(returns, date, factors_dict, factor, industry_factors, weight_benchmark):
    N = len(returns)
    x0 = [1/N]*N
    
    beta = factors_dict['beta'].loc[date,:]
    momentum = factors_dict['momentum'].loc[date,:]
    size = factors_dict['size'].loc[date,:]
    earnings = factors_dict['earnings'].loc[date,:]
    volatility = factors_dict['volatility'].loc[date,:]
    growth = factors_dict['growth'].loc[date,:]
    value = factors_dict['value'].loc[date,:]
    leverage = factors_dict['leverage'].loc[date,:]
    liquidity = factors_dict['liquidity'].loc[date,:]
    
    cons_dict = {'size':{'type': 'eq', 'fun': weightConstraint_size, 'args':(weight_benchmark, size)},
                'beta': {'type': 'eq', 'fun': weightConstraint_beta, 'args':(weight_benchmark, beta)},
                'momentum': {'type': 'eq', 'fun': weightConstraint_momentum, 'args':(weight_benchmark, momentum)},
                'earnings':{'type': 'eq', 'fun': weightConstraint_earnings, 'args':(weight_benchmark, earnings)},
                'volatility':{'type': 'eq', 'fun': weightConstraint_volatility, 'args':(weight_benchmark, volatility)},
                'growth':{'type': 'eq', 'fun': weightConstraint_growth, 'args':(weight_benchmark, growth)},
                'value':{'type': 'eq', 'fun': weightConstraint_value, 'args':(weight_benchmark, value)},
                'leverage':{'type': 'eq', 'fun': weightConstraint_leverage, 'args':(weight_benchmark, leverage)},
                'liquidity':{'type': 'eq', 'fun': weightConstraint_liquidity, 'args':(weight_benchmark, liquidity)}}
    
    new_dict = {key: cons_dict[key] for key in cons_dict.keys() if key != factor}
    constraint_list = [{'type': 'eq', 'fun': industryConstraint, 'args':(weight_benchmark, industry_factors[date])},
                    {'type': 'ineq', 'fun': nonNegativeConstraint},
                    {'type': 'eq', 'fun': sumConstraint}]

    for key in new_dict.keys():
        constraint_list.append(new_dict[key])

    constraints = tuple(constraint_list)
    res = minimize(fun=expReturn,
                   x0=x0,                # init_w,
                   args=returns,
                   bounds=None,
                   constraints=constraints,
                   method='SLSQP',
                   tol=1e-6,
                   options={'maxiter': 100000})
    
    return res.x

In [170]:
weight = {}
for factor in ['beta', 'momentum', 'size', 'earnings', 'volatility', 'growth', 'value', 'leverage', 'liquidity']:
    weight[factor] = pd.DataFrame(index = endOfMonth)
    for date in endOfMonth:
        weight[factor].loc[date, :] = optimizePortfolio(stock_returns.loc[date,:], date, factors_dict, factor, industry_factors, new_hs300weights.loc[date,:])

print('Done!')

(1, 3770)
(1, 3770)


ValueError: negative dimensions are not allowed