This program uses annual returns and annual risks to create or build a portfolio

In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import math

In [2]:
#Load data
df_sp500 = pd.read_csv(r"Data\sp500_raw_data.csv", index_col = 0)

In [3]:
df_sp500

Unnamed: 0,MMM,AOS,ABT,ABBV,ACN,ADBE,AMD,AES,AFL,A,...,WY,WMB,WTW,WYNN,XEL,XYL,YUM,ZBRA,ZBH,ZTS
2019-08-30,110.107895,42.483967,78.250870,52.705837,184.090759,284.510010,31.450001,13.118478,44.396896,68.663673,...,21.432566,17.169605,184.098465,106.604218,54.882168,72.184723,106.459702,205.029999,129.832581,121.904976
2019-09-03,108.010849,41.643795,77.095268,52.433250,181.842712,282.450012,30.900000,13.127034,44.573845,67.987762,...,21.505873,17.133224,181.950317,101.939384,56.001675,70.328506,106.395866,198.979996,130.009766,122.502861
2019-09-04,108.984505,42.283066,77.242012,53.218948,183.663452,284.600006,30.950001,13.084249,44.936600,68.422272,...,21.685089,17.242353,182.750092,105.413811,56.249519,70.959831,107.471581,200.600006,128.722656,123.033195
2019-09-05,111.816849,43.488544,78.782837,53.740070,185.316971,287.750000,31.500000,13.212610,45.237396,70.990776,...,21.940887,17.511545,183.838104,107.746231,55.676937,73.258865,107.717735,205.850006,130.149658,123.496078
2019-09-06,111.619385,43.662064,78.911224,54.213097,186.831177,284.940002,30.559999,13.101366,45.626701,71.724625,...,22.039907,17.547915,183.893921,107.852699,55.377831,73.861900,108.674957,202.729996,130.336227,123.843185
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-08-23,129.850006,82.540001,112.690002,197.550003,333.269989,558.299988,154.979996,18.080000,107.339996,140.869995,...,31.154293,44.812897,280.614716,77.370003,59.604336,136.557083,134.847168,351.619995,114.794853,180.899994
2024-08-26,131.850006,82.360001,112.699997,197.440002,336.779999,559.440002,149.990005,17.900000,107.510002,140.490005,...,30.876129,45.010788,284.542877,77.239998,60.932842,135.888855,134.279999,347.690002,114.375778,181.559998
2024-08-27,131.399994,81.760002,113.099998,195.919998,340.380005,567.820007,150.500000,17.370001,107.709999,141.100006,...,30.399279,44.901947,287.174988,77.040001,60.367733,135.061066,134.500000,346.040009,114.415695,182.100006
2024-08-28,131.610001,81.839996,112.900002,195.399994,337.390015,560.539978,146.360001,17.200001,108.830002,141.059998,...,30.439014,44.456688,288.600677,75.300003,60.159534,135.460007,135.649994,343.829987,113.936760,182.910004


In [4]:
#calculate the daily return column rise, over each day using log return()
# log return account for compounding and can be used statistically
df_sp500_log_returns = np.log(df_sp500 / df_sp500.shift(1))
print(df_sp500_log_returns)

                 MMM       AOS       ABT      ABBV       ACN      ADBE  \
2019-08-30       NaN       NaN       NaN       NaN       NaN       NaN   
2019-09-03 -0.019229 -0.019974 -0.014878 -0.005185 -0.012287 -0.007267   
2019-09-04  0.008974  0.015234  0.001902  0.014874  0.009963  0.007583   
2019-09-05  0.025657  0.028111  0.019752  0.009744  0.008963  0.011007   
2019-09-06 -0.001768  0.003982  0.001628  0.008764  0.008138 -0.009813   
...              ...       ...       ...       ...       ...       ...   
2024-08-23  0.015049  0.017230  0.005249  0.005991  0.008134  0.001542   
2024-08-26  0.015285 -0.002183  0.000089 -0.000557  0.010477  0.002040   
2024-08-27 -0.003419 -0.007312  0.003543 -0.007728  0.010633  0.014868   
2024-08-28  0.001597  0.000978 -0.001770 -0.002658 -0.008823 -0.012904   
2024-08-29  0.009829  0.003172 -0.001241 -0.001127  0.008323  0.016086   

                 AMD       AES       AFL         A  ...        WY       WMB  \
2019-08-30       NaN       NaN  

In [5]:
#using the apply function to get the expected return
#this is based off a function
def exp_log(x):
    return np.exp(x)-1

df_sp500_returns = df_sp500_log_returns.mean().apply(exp_log).to_frame()

df_sp500_returns.columns=['Return']
df_sp500_returns.index.name='Tickers'

df_sp500_returns

Unnamed: 0_level_0,Return
Tickers,Unnamed: 1_level_1
MMM,0.000150
AOS,0.000524
ABT,0.000291
ABBV,0.001042
ACN,0.000489
...,...
XYL,0.000503
YUM,0.000190
ZBRA,0.000408
ZBH,-0.000101


In [6]:
#daily risk
df_sp500_risk = np.std(df_sp500_log_returns, axis = 0).to_frame()

In [7]:
#cleaning data frame
df_sp500_risk.columns= ['Risk']
df_sp500_risk.index.name = 'Tickers'
df_sp500_risk

Unnamed: 0_level_0,Risk
Tickers,Unnamed: 1_level_1
MMM,0.018460
AOS,0.018989
ABT,0.016467
ABBV,0.015228
ACN,0.018202
...,...
XYL,0.019678
YUM,0.016233
ZBRA,0.025612
ZBH,0.019533


In [8]:
#we join the two data frame together, note that we have to join on the indexes of each since that is the tickers
df_sp500_temp = pd.merge(df_sp500_returns, df_sp500_risk, left_index = True, right_index= True)

In [9]:
#getting the covariance matrix

df_sp500_covariance_matrix = df_sp500_log_returns.cov()

In [10]:

df_sp500_temp.to_csv(r'Data\df_sp500_price.csv')

In [11]:

df_sp500_covariance_matrix.to_csv(r'Data\df_sp500_cov.csv')