In [117]:
from pypfopt.expected_returns import mean_historical_return
from pypfopt.risk_models import CovarianceShrinkage
import numpy as np
import pandas as pd
import torch

In [118]:
df = pd.read_csv("stock_prices.csv").drop("date", axis=1)

In [119]:
sector_mapper = {
        "GOOG": "tech",
        "AAPL": "tech",
        "FB": "tech",
        "AMZN": "tech",
        "BABA": "tech",
        "GE": "utility",
        "AMD": "tech",
        "WMT": "retail",
        "BAC": "fig",
        "GM": "auto",
        "T": "auto",
        "UAA": "airline",
        "SHLD": "retail",
        "XOM": "energy",
        "RRC": "energy",
        "BBY": "retail",
        "MA": "fig",
        "PFE": "pharma",
        "JPM": "fig",
        "SBUX": "retail",
    }


In [120]:
#df = df.iloc[:, :20]

In [121]:
mu = mean_historical_return(df)
S = CovarianceShrinkage(df).ledoit_wolf()

In [122]:
tech = mu.iloc[[0,1,2,3,4,6]]
utility = mu.iloc[[5]]
retail = mu.iloc[[7, 12, 15, 19]]
fig = mu.iloc[[8, 16, 18]]
auto = mu.iloc[[9,10]]
airline = mu.iloc[[11]]
energy = mu.iloc[[13, 14]]
pharma = mu.iloc[[17]]

In [123]:
mu

GOOG    0.247967
AAPL    0.294305
FB      0.284037
BABA    0.192316
AMZN    0.371328
GE      0.136009
AMD     0.032850
WMT     0.120012
BAC     0.105540
GM      0.042346
T       0.100256
UAA     0.144224
SHLD   -0.079260
XOM     0.143051
RRC     0.073636
BBY     0.238835
MA      0.388666
PFE     0.226718
JPM     0.156170
SBUX    0.231815
dtype: float64

In [124]:
S

Unnamed: 0,GOOG,AAPL,FB,BABA,AMZN,GE,AMD,WMT,BAC,GM,T,UAA,SHLD,XOM,RRC,BBY,MA,PFE,JPM,SBUX
GOOG,0.045529,0.022143,0.006389,0.00372,0.026085,0.015815,0.021761,0.008238,0.026957,0.006163,0.011058,0.021371,0.016418,0.013362,0.018714,0.016089,0.019593,0.010683,0.023249,0.018075
AAPL,0.022143,0.207037,0.004334,0.002954,0.0582,0.038102,0.084053,0.026429,0.045966,0.005791,0.019846,0.020659,0.022127,0.020236,0.029179,0.048719,0.019587,0.021324,0.046814,0.039789
FB,0.006389,0.004334,0.029233,0.00377,0.007619,0.003008,0.005804,0.001243,0.004766,0.003499,0.001395,0.009407,0.002191,0.002214,0.002741,0.003813,0.004727,0.00255,0.003842,0.004501
BABA,0.00372,0.002954,0.00377,0.013438,0.004176,0.002011,0.006332,0.001081,0.003077,0.002573,0.000891,0.003461,0.003114,0.001751,0.002973,0.002441,0.003182,0.001624,0.002654,0.002162
AMZN,0.026085,0.0582,0.007619,0.004176,0.276365,0.038169,0.075657,0.027457,0.049062,0.006527,0.01872,0.027889,0.027333,0.019637,0.029789,0.059925,0.023092,0.02417,0.053367,0.048322
GE,0.015815,0.038102,0.003008,0.002011,0.038169,0.083405,0.04858,0.029076,0.062871,0.007533,0.026464,0.025245,0.025666,0.025815,0.029488,0.041375,0.019308,0.029891,0.059131,0.034449
AMD,0.021761,0.084053,0.005804,0.006332,0.075657,0.04858,0.388916,0.027113,0.068272,0.011758,0.026781,0.030291,0.035381,0.027946,0.049354,0.066146,0.026956,0.027965,0.069592,0.051764
WMT,0.008238,0.026429,0.001243,0.001081,0.027457,0.029076,0.027113,0.069478,0.03056,0.003595,0.020985,0.012137,0.015605,0.017036,0.012011,0.037385,0.009599,0.02316,0.033017,0.025482
BAC,0.026957,0.045966,0.004766,0.003077,0.049062,0.062871,0.068272,0.03056,0.179868,0.012581,0.033848,0.040611,0.039452,0.032159,0.045077,0.052162,0.035707,0.034984,0.116165,0.047582
GM,0.006163,0.005791,0.003499,0.002573,0.006527,0.007533,0.011758,0.003595,0.012581,0.021594,0.003942,0.00972,0.011321,0.005865,0.007549,0.007759,0.007317,0.004823,0.010176,0.006414


In [125]:
from pypfopt.efficient_frontier import EfficientFrontier
import time
ds = []
for i in range(1000):
	max_tech = np.random.rand()
	max_retail = np.random.rand()
	max_fig = np.random.rand()
	max_energy = np.random.rand()
	utility_all = 0.2*np.random.rand()
	airline_all = 0.2*np.random.rand()
	rho = (1 - 1e-12)*np.random.rand() + 1e-12
	sector_upper = {
			"tech": max_tech,
			"retail":max_retail,
			"fig": max_fig,
			"energy": max_energy}

	ef = EfficientFrontier(mu, S)
	ef.add_sector_constraints(sector_mapper,  {}, sector_upper)
	ef.add_constraint(lambda x: x[5] == utility_all)
	ef.add_constraint(lambda x: x[11] == airline_all)
	
	weights = ef.max_quadratic_utility(rho)
	X = np.array([2*max_tech - 1, 2*max_retail - 1, 2*max_fig - 1, 2*max_energy - 1, 2*(utility_all/0.2) - 1, 2*(airline_all/0.2) - 1, 2*rho - 1])
	y = np.array([w for w in list(weights.values())])
	ds.append((X, y))
	

In [144]:
from pickle import dump
dump(ds, open("portfolio.pkl", "wb"))

In [127]:
max_tech, max_retail, max_fig, max_energy, utility_all, airline_all, rho

(0.749940274183224,
 0.7477612662035066,
 0.008066729025971386,
 0.8307800583266979,
 0.08004545290900518,
 0.12272165456902623,
 0.7124262547745772)

In [128]:
print(end-start)

-104.54088163375854


In [129]:
ds[0]

(array([ 0.55232835,  0.1317124 , -0.58406406, -0.22894384, -0.59037486,
        -0.15850001, -0.16822062]),
 array([0.        , 0.        , 0.        , 0.        , 0.66691952,
        0.04096251, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.08415   , 0.        , 0.        , 0.        ,
        0.        , 0.20796797, 0.        , 0.        , 0.        ]))

In [130]:
from pickle import dump
dump(ds, open("portfolio.pkl", "wb"))

In [131]:
sol = np.array([w/1 for w in list(weights.values())])

In [132]:
S.to_numpy().max()

0.38891555945324197

In [133]:
pred = np.array([sample[-1] for sample in ds])

In [134]:
pred[..., 0]

array([0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
      

In [135]:
pred.std(0)

array([2.87918736e-08, 3.57051707e-05, 6.43589772e-02, 2.78996420e-08,
       1.93863729e-01, 5.79094785e-02, 2.99923880e-08, 5.50539739e-08,
       2.18523523e-08, 4.76484260e-08, 4.77003083e-08, 5.79683829e-02,
       5.41676720e-08, 5.11936418e-08, 5.05069284e-08, 5.39463542e-02,
       2.62792818e-01, 1.04622059e-01, 2.13252323e-08, 4.17238970e-02])

In [136]:
np.linalg.cond(S.to_numpy())

61.968954265636754

In [137]:
matrixSize = 20 
A = np.random.rand(matrixSize, matrixSize)*0.2
B = np.dot(A, A.transpose())

In [138]:
B

array([[0.2543818 , 0.21359635, 0.21811447, 0.22308082, 0.17379906,
        0.21736575, 0.18067375, 0.16253288, 0.22023188, 0.2217319 ,
        0.16829948, 0.21064062, 0.21170862, 0.18447682, 0.16665777,
        0.1591098 , 0.20874406, 0.16787502, 0.22376798, 0.22716848],
       [0.21359635, 0.26924243, 0.24306383, 0.22947549, 0.1815414 ,
        0.19938378, 0.18686412, 0.15982721, 0.22761787, 0.18862034,
        0.17564223, 0.22710405, 0.21060038, 0.1940839 , 0.16880566,
        0.15586278, 0.2191011 , 0.17386384, 0.23191305, 0.24902467],
       [0.21811447, 0.24306383, 0.33609054, 0.24387896, 0.22086221,
        0.22809963, 0.20314905, 0.19555713, 0.2315555 , 0.18661029,
        0.22421354, 0.25794096, 0.27199683, 0.20008491, 0.19785092,
        0.16235014, 0.26742766, 0.18278474, 0.2496471 , 0.27643217],
       [0.22308082, 0.22947549, 0.24387896, 0.3243086 , 0.20792034,
        0.22133062, 0.18840377, 0.19425444, 0.23268724, 0.249488  ,
        0.22902942, 0.23094017, 0.22585018, 0

In [139]:
np.linalg.cond(B)

38634.74896508912

In [140]:
S.max()

GOOG    0.045529
AAPL    0.207037
FB      0.029233
BABA    0.013438
AMZN    0.276365
GE      0.083405
AMD     0.388916
WMT     0.069478
BAC     0.179868
GM      0.021594
T       0.065969
UAA     0.107597
SHLD    0.169301
XOM     0.054884
RRC     0.242590
BBY     0.268653
MA      0.051108
PFE     0.085783
JPM     0.146781
SBUX    0.139059
dtype: float64

In [141]:
B.min()

0.11905201858401432

In [142]:
A = torch.rand((512, 20, 20))*0.2

In [143]:
torch.bmm(torch.transpose(A, 1, 2), A).flatten(1).shape

torch.Size([512, 400])