In [1]:
# Part 1

# Set-up, data import, and check

import numpy as np
import pandas as pd
import scipy.optimize as sco

from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models, expected_returns, plotting

mf = pd.read_csv('https://raw.githubusercontent.com/aaiken1/fin-data-analysis-python/main/data/mutual_funds.csv')

mf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 529 entries, 0 to 528
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   ticker       529 non-null    object
 1   caldt        529 non-null    int64 
 2   crsp_fundno  529 non-null    int64 
 3   mret         528 non-null    object
dtypes: int64(2), object(2)
memory usage: 16.7+ KB


VMVFX is forms portfolios that try to minimize the variance of the portfolio of stocks. This is related to the **low risk** style-factor. Essentially, lower risk portfolios do better than you would expect, based on models like the CAPM. In other words, you get more return then you'd think. 

In [2]:
# Part 2

mf['mret'] = pd.to_numeric(mf['mret'], downcast='float', errors='coerce')


In [3]:
# Part 3

mf.groupby('ticker')['mret'].mean()

ticker
VBMFX    0.002848
VMVFX    0.007720
VTMGX    0.006345
VTSMX    0.012486
Name: mret, dtype: float32

In [4]:
mf = mf.drop('crsp_fundno', axis=1)

In [5]:
mf = pd.pivot(mf, values='mret', columns='ticker', index='caldt').dropna()


In [6]:
ann_rets = mf.mean() * 12
ann_rets

ticker
VBMFX    0.030438
VMVFX    0.092639
VTMGX    0.072681
VTSMX    0.148794
dtype: float32

In [7]:
ann_stdev = mf.std() * np.sqrt(12)
ann_stdev

ticker
VBMFX    0.031601
VMVFX    0.101537
VTMGX    0.142306
VTSMX    0.142804
dtype: float32

In [8]:
ann_cov = mf.cov() * 12
ann_cov

ticker,VBMFX,VMVFX,VTMGX,VTSMX
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
VBMFX,0.000999,0.000571,0.000195,1.1e-05
VMVFX,0.000571,0.01031,0.011837,0.01286
VTMGX,0.000195,0.011837,0.020251,0.017865
VTSMX,1.1e-05,0.01286,0.017865,0.020393


In [9]:
ann_cor = mf.corr()
ann_cor

ticker,VBMFX,VMVFX,VTMGX,VTSMX
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
VBMFX,1.0,0.177897,0.043343,0.002341
VMVFX,0.177897,1.0,0.819231,0.88688
VTMGX,0.043343,0.819231,1.0,0.879125
VTSMX,0.002341,0.88688,0.879125,1.0


In [10]:
# Part 5

weights = np.array([0.25, 0.25, 0.25, 0.25])

def port_ret(weights):
    return np.sum(ann_rets * weights)

def port_vol(weights):
    return np.sqrt(np.dot(weights.T, np.dot(ann_cov, weights)))

def min_func_sharpe(weights):  
    return -port_ret(weights) / port_vol(weights)  

cons = ({'type': 'eq', 'fun': lambda x:  np.sum(x) - 1})  
bnds = tuple((0, 0.5) for x in range(4))  

opts = sco.minimize(min_func_sharpe, weights,
                    method='SLSQP', bounds=bnds,
                    constraints=cons) 

opts


     fun: -1.2255838704333357
     jac: array([-0.29969248,  0.29983251,  1.10460117,  0.2996778 ])
 message: 'Optimization terminated successfully'
    nfev: 30
     nit: 6
    njev: 6
  status: 0
 success: True
       x: array([0.5       , 0.04741914, 0.        , 0.45258086])

In [11]:
port_ret(opts['x']) 

0.08695346399395425

In [12]:
port_vol(opts['x'])  

0.07094860343030598

In [13]:
# Part 6

ef = EfficientFrontier(ann_rets, ann_cov, weight_bounds=(0, 0.5))

raw_weights = ef.max_sharpe(risk_free_rate = 0)
raw_weights


OrderedDict([('VBMFX', 0.5),
             ('VMVFX', 0.0471988505521005),
             ('VTMGX', 0.0),
             ('VTSMX', 0.4528011494478995)])