A portfolio manager submitted an alpha portfolio of ZZ500, but some of its industry exposures exceed the limit. Please provide a solution and program to implement the corresponding algorithm to control the industry exposures of the portfolio so that none of any industry exposure exceeds the limit. Mainwhile, please reasonably keep as much as possible the excess return of the portfolio.

Data: industry classification of each stock on the day, ZZ500 weights, alpha portfolio

The industry exposure limit is 7%.

In [1]:
import pandas as pd
import numpy as np
from scipy.optimize import minimize

In [2]:
# Part 1: read the data
df_industry = pd.read_csv('stock_industry.csv')[['SecurityID','ZXF']]
df_alpha = pd.read_csv('alpha.txt', delimiter = "|")

In [3]:
df_industry.head()

Unnamed: 0,SecurityID,ZXF
0,000001.SZE,40
1,000002.SZE,42
2,000004.SZE,35
3,000005.SZE,20
4,000006.SZE,42


In [4]:
df_alpha.head()

Unnamed: 0,[1]SecuCode,[2]TradingDay,[3]Weight
0,601318.SSE,20191231,6.715
1,600519.SSE,20191231,4.312
2,600036.SSE,20191231,2.812
3,000651.SZE,20191231,2.289
4,601166.SSE,20191231,2.088


In [5]:
# Part 2: count industry exposures
df_alpha = df_alpha.merge(df_industry, left_on='[1]SecuCode', right_on='SecurityID')
stock_weight_list = list(df_alpha['[3]Weight'])
stock_industry_list = list(df_alpha['ZXF'])
d = {}
for i in range(len(stock_weight_list)):
    d[stock_industry_list[i]] = d.get(stock_industry_list[i], 0) + stock_weight_list[i]
ind_list = list(d.keys())
ind_list.sort()
n_ind = len(ind_list)
weight_list = [d[ind] for ind in ind_list]

In [6]:
print(ind_list)
print(weight_list)

['10', '11', '12', '20', '21', '22', '23', '24', '25', '26', '27', '28', '30', '31', '32', '33', '34', '35', '36', '37', '40', '41', '42', '50', '60', '61', '62', '63', '70']
[1.6809999999999998, 0.9169999999999998, 2.3649999999999998, 2.4930000000000003, 0.817, 1.228, 2.553, 1.4309999999999998, 0.237, 2.012, 1.775, 1.282, 2.5829999999999997, 0.603, 0.825, 5.243, 0.074, 7.133000000000002, 9.733000000000002, 1.974, 16.314, 17.129, 4.753, 3.0460000000000003, 5.767, 1.9459999999999997, 2.252, 1.6520000000000001, 0.178]


In [7]:
# Part 3: my exposure control plan
# To find an intrustry exposure distribution satisfying the bound that no exposure is greater than 7
# and being closest to the old distribution in the sense of being measured by the KL-divergence.
def minimum_KL_divergence(old_weights):
    init_guess = np.repeat(100/n_ind, n_ind)
    bounds = ((0.0001, 7.0000),) * n_ind
    weights_sum_to_100 = {'type': 'eq', 'fun': lambda weights: np.sum(weights) - 100.0}
    def KL_divergence(weights, old_weights):
        the_old_weights = np.divide(old_weights, np.sum(old_weights))
        the_new_wrights = np.divide(weights, np.sum(weights))
        return np.sum(np.multiply(the_old_weights, np.log(np.divide(the_old_weights, the_new_wrights))))
    weights = minimize(KL_divergence, init_guess,
                       args=(old_weights,), method='SLSQP',
                       options={'disp': False},
                       constraints=(weights_sum_to_100,),
                       bounds=bounds)
    return weights.x
my_new_weight_list = minimum_KL_divergence(weight_list)

In [8]:
print(my_new_weight_list)

[2.51599696 1.3589545  3.58336235 3.7780598  1.22825423 1.80867947
 3.86864776 2.12229479 0.34873018 3.03709943 2.66444674 1.89115136
 3.91377703 0.97619156 1.23839112 7.         0.1118097  7.
 7.         2.97759427 7.         7.         6.89330516 4.59602356
 7.         2.93367786 3.4099082  2.47018772 0.27345626]


In [9]:
# Part 4: find the ratios for adjustment for each industry
ratio_list = np.divide(my_new_weight_list, weight_list)
ratio_dic = {}
for i in range(n_ind):
    ratio_dic[ind_list[i]] = ratio_list[i]

In [10]:
print(ratio_dic)

{'10': 1.496726328583671, '11': 1.481956928254611, '12': 1.5151637824307413, '20': 1.5154672276905856, '21': 1.5033711558405778, '22': 1.47286601850625, '23': 1.5153340210663104, '24': 1.4830851058379446, '25': 1.471435340549872, '26': 1.509492759999593, '27': 1.5010967568747724, '28': 1.4751570654169204, '30': 1.5152059755201537, '31': 1.6188914720189376, '32': 1.5010801419898139, '33': 1.335113484646195, '34': 1.5109418704517248, '35': 0.9813542688910695, '36': 0.7192027124216581, '37': 1.5084064161166868, '40': 0.42907931837685426, '41': 0.40866366979975477, '42': 1.4503061569478772, '50': 1.508871818348909, '60': 1.2138026703658746, '61': 1.507542579246179, '62': 1.5141688266185647, '63': 1.4952710180871807, '70': 1.5362711363036265}


In [11]:
# Part 5: adjust weights according to the ratios and save the result
ratios = [ratio_dic[ind] for ind in stock_industry_list]
new_weights = np.multiply(stock_weight_list, ratios)
df_alpha['NewWeight'] = new_weights
df_alpha = df_alpha[['[1]SecuCode','[2]TradingDay','[3]Weight','ZXF','NewWeight']]
df_alpha.to_csv('my_industry_compression_scheme.csv')

In [12]:
df_alpha.head()

Unnamed: 0,[1]SecuCode,[2]TradingDay,[3]Weight,ZXF,NewWeight
0,601318.SSE,20191231,6.715,41,2.744177
1,600519.SSE,20191231,4.312,36,3.101202
2,600036.SSE,20191231,2.812,40,1.206571
3,000651.SZE,20191231,2.289,33,3.056075
4,601166.SSE,20191231,2.088,40,0.895918
