### Using the [PyPortfolioOpt library](https://github.com/robertmartin8/PyPortfolioOpt) library to generate an optimzied portfolio from the S&P 500 list of companies
Reference: [Build A Killer Stock Portfolio Using Python](https://www.youtube.com/watch?v=bvDkel5whUY&t=2s&ab_channel=ComputerScience)

<b><font color="red">Click <a href="https://colab.research.google.com/github/ebharucha/Portfolio-Optimization/blob/master/PortfolioOpt.ipynb" target="#">here</a> to open/run the notebook in Google Colab</font></b>

@ebharucha 12/31/2020

### Install & import dependencies

In [216]:
!pip install PyPortfolioOpt
!pip install pulp

In [20]:
import pandas as pd
import pandas_datareader as web
import datetime
import pickle
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt.discrete_allocation import DiscreteAllocation, get_latest_prices
from pypfopt import risk_models, expected_returns
import warnings
warnings.filterwarnings("ignore")

### Get & initialize data

#### <font color="Red">Specify value of overall portfolio in USD</font>

In [4]:
portfolio_val = 50000

#### Get S&P 500 companies

In [25]:
DATADIR = './data'

table=pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
df_SP500_ = table[0]
df_SP500_.Symbol = df_SP500_.Symbol.replace('\.','-', regex=True)
df_SP500_.to_csv(f'{DATADIR}/S&P500-Info.csv')
df_SP500_.to_csv(f'{DATADIR}/S&P500-Symbols.csv', columns=['Symbol'])

#### Load daily closing prices for S&P 500 components over past 10 years in a DataFrame

<i><font color="Red">Only run this once per day to get updates.
    If already run earlier in the day, then load data from pickle file</font></i>

In [153]:
symbols = df_SP500_.Symbol
df_SP500 = pd.DataFrame(columns=symbols)

current_year = datetime.datetime.now().year
today = str(datetime.date.today())

f = lambda sym: web.get_data_yahoo(sym,
                            start = (f'{current_year-10}-01-01'),
                            end = today)['Adj Close']

for sym in symbols:
    df_SP500[sym] = f(sym)

#### Pickle the data

In [55]:
DATADIR = './data'

# with open (f'{DATADIR}/SP500.pkl', 'wb') as pklfile:
#     pickle.dump(df_SP500, pklfile)

with open(f'{DATADIR}/SP500.pkl', 'rb') as pklfile:
    df_SP500 = pickle.load(pklfile)

#### <font color="red">Specify how many years of past data to use  default is 10</font>

In [60]:
# Specify how many yers of past data to use 
no_of_years = 5  # default value is 10, which is the furthest back you can go
current_year = datetime.datetime.now().year
start_date = f'{current_year-no_of_years-1}-12-31'

df_SP500 = df_SP500.loc[start_date:]

#### Get latest prices

In [62]:
latest_prices = get_latest_prices(df_SP500)

### Portfolio optimization

#### Expected annualized returns & annualized covariance matrix of the daily asset returns

In [63]:
mu = expected_returns.mean_historical_return(df_SP500)
S = risk_models.sample_cov(df_SP500)

#### Optimzie for the maximal Sharpe ratio

In [64]:
ef = EfficientFrontier(mu, S)
weights = ef.max_sharpe()
cleaned_weights = ef.clean_weights()
weights = cleaned_weights
# print (cleaned_weights)
ef.portfolio_performance(verbose=True)

Expected annual return: 139.9%
Annual volatility: 27.4%
Sharpe Ratio: 5.04


(1.3988281706206545, 0.27377081505157264, 5.036432281362469)

#### Get stock allocations

In [65]:
da = DiscreteAllocation(weights, latest_prices, total_portfolio_value=portfolio_val)
allocation, leftover = da.lp_portfolio()
print (f'Discrete share allocations: {allocation}')
print (f'Funds remaining: ${leftover:.2f}')

Discrete share allocations: {'AMD': 19, 'AMZN': 1, 'CARR': 399, 'CLX': 14, 'DPZ': 11, 'LUMN': 1, 'NFLX': 2, 'NEM': 58, 'NVDA': 3, 'POOL': 14, 'TTWO': 5, 'VNT': 321}
Funds remaining: $0.25


In [66]:
# Get company name & sector
name = lambda sym: df_SP500_[df_SP500_.Symbol == sym].Security.values[0]
sector_ = lambda sym: df_SP500_[df_SP500_.Symbol == sym]['GICS Sector'].values[0]

name('NFLX'), sector_('NFLX')

('Netflix Inc.', 'Communication Services')

#### Construct portfolio DataFrame

In [27]:
port_df = pd.DataFrame(columns=['Symbol', 'Company Name', "Sector", "No. of Shares", "Allocation($)"])

In [67]:
symbols = allocation.keys()
company_name = []
sector = []
no_of_shares = []
allocation_dollar = []
total_allocation = 0

for sym in symbols:
    company_name.append(name(sym))
    sector.append(sector_(sym))
    no_of_shares.append(allocation.get(sym))
    allocation_dollar.append(f'{allocation.get(sym) * latest_prices[sym]:,.2f}')
    total_allocation = total_allocation + allocation.get(sym) * latest_prices[sym]

In [68]:
port_df['Symbol'] = symbols
port_df['Company Name'] = company_name
port_df['Sector'] = sector
port_df['No. of Shares'] = no_of_shares
port_df['Allocation($)'] = allocation_dollar

ValueError: Length of values (12) does not match length of index (11)

### <font color="red">Display portfolio allocations & expected performance</dont>

In [30]:
port_df

Unnamed: 0,Symbol,Company Name,Sector,No. of Shares,Allocation($)
0,AMD,Advanced Micro Devices Inc,Information Technology,20,1845.8
1,AMZN,Amazon.com Inc.,Consumer Discretionary,1,3285.85
2,CARR,Carrier Global,Industrials,399,15054.27
3,CLX,The Clorox Company,Consumer Staples,14,2814.56
4,DPZ,Domino's Pizza,Consumer Discretionary,11,4230.82
5,NFLX,Netflix Inc.,Communication Services,2,1049.18
6,NEM,Newmont Corporation,Materials,56,3381.84
7,NVDA,Nvidia Corporation,Information Technology,3,1577.49
8,POOL,Pool Corporation,Consumer Discretionary,14,5141.92
9,TTWO,Take-Two Interactive,Communication Services,5,1031.75


In [31]:
print (f'Total allocated amount = ${total_allocation:,.2f}')
print (f'Amount remaining: ${leftover:,.2f}')
ef.portfolio_performance(verbose=True)

Total allocated amount = $49,961.54
Amount remaining: $38.46
Expected annual return: 139.9%
Annual volatility: 27.4%
Sharpe Ratio: 5.04


(1.3986258505291957, 0.2737419087413329, 5.036225022570078)