## Technology Stocks in the S&P 500 Portfolio using Markowitz's modern portfolio theory

The Historical dataset

#### Importing libraries

In [None]:
import numpy as np
import pandas as pd
import glob
import matplotlib.pyplot as plt
from scipy import optimize
import warnings
warnings.filterwarnings('ignore')

**The Historical stock datasets was downloaded in the form of cvs files and in the cell below, the data was read**

**A much faster approach is an API call, one of which is the use of a library such as yfinance to get the stock data from Yahoo finance**

In [None]:
# Get a list of all CSV files in a directory
csv_files = glob.glob('data/*.csv')

# Create an empty dataframe to store the combined data

combined_df = pd.DataFrame()

for csv_file in csv_files:
    df = pd.read_csv(csv_file)
    file_name = str(csv_file.split('.')[0].split('\\')[-1])
    updated_df = df[['Date', 'Close']]
    updated_df[file_name] = updated_df['Close']
    updated_df.drop(['Close'], axis=1, inplace=True)
    updated_df['Date'] = pd.to_datetime(arg=updated_df['Date'])
    updated_df.set_index('Date', inplace=True)
    
    combined_df = pd.concat([combined_df, updated_df], axis=1)

    
# While downloading the dataset, the ending dates considered for the stock was 30th of August but some
# stocks had their end dates beyond that.The line of code was used to make the stock end dates uniform
combined_df.drop([combined_df.index[-1], combined_df.index[-2]], inplace=True)


# Reversing the order of the sorting

<br>
<br>

**Viewing the dataset**

In [None]:
combined_df

<br>
<br>
<br>

**The historical stock data considered was ten years, using daily data - from 01-01-2012 to 30-08-2023**

**The start date for stock was 03-01-2012 as the first two days of 2012 were public holidays.**

**Some Technology companies on the S&P 500 do not have their stock options extend back to 2012, and they were therfore dropped from the dataset used leaving us with 52 portfolios**

In [None]:
# Dropping portfolios that are not up to 10 years

combined_df.dropna(axis=1, how='any', inplace=True)

In [None]:
combined_df.sort_values(['Date'], ascending=False)

<br>
<br>

**Viewing the info of the dataset to ensure that the available data is consistent**

In [None]:
combined_df.info()

## Include at least one plot, particularly, a line chart

# Daily Returns for Individual Assets

In [None]:
# Get a list of all CSV files in a directory
csv_files = glob.glob('data/*.csv')

# Create an empty dataframe to store the combined data

daily_returns = pd.DataFrame()

for csv_file in csv_files:
    df = pd.read_csv(csv_file)
    file_name = str(csv_file.split('.')[0].split('\\')[-1])
    if file_name in list(combined_df.columns):
        ind_daily_return = pd.DataFrame(combined_df[file_name].pct_change())
        daily_returns = pd.concat([daily_returns, ind_daily_return], axis=1)

In [None]:
daily_returns

In [None]:
daily_returns = daily_returns.sort_values(['Date'], ascending=False).mul(100)
daily_returns

In [None]:
daily_returns

# Monthly returns for individual asset

In [None]:
# Get a list of all CSV files in a directory
csv_files = glob.glob('data/*.csv')

# Create an empty dataframe to store the combined data

monthly_returns = pd.DataFrame()

for csv_file in csv_files:
    df = pd.read_csv(csv_file)
    file_name = str(csv_file.split('.')[0].split('\\')[-1])
    if file_name in list(combined_df.columns):
        ind_monthly_return = pd.DataFrame(combined_df[file_name].resample('M').ffill().pct_change())
        monthly_returns = pd.concat([monthly_returns, ind_monthly_return], axis=1)


In [None]:
monthly_returns = monthly_returns.sort_values(['Date'], ascending=False).mul(100)
monthly_returns

## Averages and Variance

In [None]:
daily_avg = pd.DataFrame(daily_returns.mean(), columns=['Average_Daily_Return'])
monthly_avg = pd.DataFrame(monthly_returns.mean(), columns=['Average_Monthly_Return'])


# Variance
daily_variance = pd.DataFrame(daily_returns.var(), columns=['Daily_Return_Variance'])
monthly_variance = pd.DataFrame(monthly_returns.var(), columns=['Monthly_Return_Variance'])

In [None]:
averages = pd.concat([daily_avg, monthly_avg, daily_variance, monthly_variance], axis=1)

averages['Average_Annual_Return'] = pd.DataFrame(monthly_avg.mul(12))

averages['Annual_Variance'] = pd.DataFrame(monthly_variance.mul(12))

In [None]:
averages

## Returns

In [None]:
returns = pd.DataFrame(daily_returns.mean(), columns=['Returns']).mul(252)

returns

In [None]:
returns = averages['Average_Annual_Return']

returns

## Variance-Covariance Matrix

In [None]:
# Using the monthly returns, we compute the covariance matrix

covariance = monthly_returns.cov()
covariance

In [None]:
# Using the monthly returns, we compute the covariance matrix

covariance2 = daily_returns.cov()
covariance2

## Equally-Weighted Portfolio

The sum of the individual weights is expected to be equal to 1

In [None]:
def equal_weights(df):
    data = []
    weight = 1/len(list(df.columns))
    for col in df.columns:
        data.append([col, weight])
    
    weight_df = pd.DataFrame(data, columns=['Ticker', 'Weights'])
    
    
    return weight_df.set_index('Ticker')

In [None]:
weight_df = equal_weights(combined_df)
weight_df

### Expected Returns

Expected return is the product of Weights and returns

In [None]:
expected_return = weight_df.transpose().dot(returns)

expected_return.values[0][0]

### Expected Return in percentage

In [None]:
expected_return_perc = expected_return['Returns'].apply('{:.2%}'.format)[0]

print(expected_return_perc)

## Standard Deviation

In [None]:
std_dev = weight_df.transpose().dot(covariance).dot(weight_df).apply(np.sqrt,axis=1)

std_dev['Weights'].values[0]

### Sharpe Ratio

**Using a Risk Free Rate of `3.00%`**

In [None]:
risk_free_rate = 3.8/100.0

sharpe_ratio = (expected_return['Returns'].values[0] - risk_free_rate)/std_dev['Weights'].values[0]

sharpe_ratio

### Optimal Risky Portfolio Weight

In [None]:
def equal_weights(df):
    data = []
    weight = 1/len(list(df.columns))
    for col in df.columns:
        data.append([col, weight])
    
    weight_df = pd.DataFrame(data, columns=['Ticker', 'Weights'])
    
    
    return weight_df.set_index('Ticker')

In [None]:
Optimal_weight_df = equal_weights(combined_df)

Optimal_weight_df

### Optimal Expected Returns

In [None]:
optimal_expected_return = weight_df.transpose().dot(returns)

optimal_expected_return.values[0][0]

#### Optimal Expected Return in percentage

In [None]:
optimal_expected_return_perc = optimal_expected_return['Returns'].apply('{:.2%}'.format)[0]

print(optimal_expected_return_perc)

#### Standard Deviation

In [None]:
optimal_std_dev = weight_df.transpose().dot(covariance).dot(weight_df).apply(np.sqrt,axis=1)

optimal_std_dev['Weights'].values[0]

In [None]:
optimal_std_dev_perc = optimal_std_dev['Weights'].apply('{:.2%}'.format)[0]

print(optimal_std_dev_perc)

### Sharpe Ratio

**Using a Risk Free Rate of `3.00%`**

In [None]:
#set risk free asset rate of return
Rf=3.8  # August 2023 average risk  free rate of return in USA approx 3.8%
annRiskFreeRate = Rf/100

#compute daily risk free rate in percentage
risk_free_rate = (np.power((1 + annRiskFreeRate),  (1.0 / 360.0)) - 1.0) * 100 
print('\nRisk free rate (daily %): ', end="")
print ("{0:.3f}".format(risk_free_rate)) 

In [None]:
risk_free_rate = 3.8/100

sharpe_ratio = (expected_return['Returns'].values[0] - risk_free_rate)/std_dev['Weights'].values[0]

sharpe_ratio

In [None]:
risk_free_rate

## Sharpe Ratio based Portfolio Optimization

### The Principle of duality

SInce Scipy Optimization Library only minimize, the principle of duality from optimization theory can be used to undertake transformation to obtain maximization.

In [None]:
# Function to undertake Sharpe Ratio maximization subject to basic constraints of the portfolio

#dependencies

def MaximizeSharpeRatioOptmzn(AverageReturns, VarianceCovariancce, RiskFreeRate, PortfolioSize):
    
    # define maximization of Sharpe Ratio using principle of duality
    
    def  ObjectiveFunction(weights, AverageReturns, VarianceCovariancce, RiskFreeRate, PortfolioSize):
        Expected_Return = weights.transpose().dot(AverageReturns)
        Standard_Deviation = np.sqrt(weights.transpose().dot(VarianceCovariancce).dot(weights))
        funcDenom = Standard_Deviation
        funcNumer = Expected_Return - RiskFreeRate
        
        func = -(funcNumer / funcDenom)
        return func

    # define equality constraint representing fully invested portfolio
    def constraintEq(weights):
        A=np.ones(weights.shape)
        b=1
        constraintVal = np.matmul(A,weights.T)-b 
        return constraintVal
    
    
    #define bounds and other parameters
    xinit=np.repeat(0.33, PortfolioSize)
    cons = ({'type': 'eq', 'fun':constraintEq})
    lb = 0
    ub = 1
    bnds = tuple([(lb,ub) for x in xinit])
    
    #invoke minimize solver
    OptimizedSharpeRatio = optimize.minimize (ObjectiveFunction, x0 = xinit, args = (AverageReturns, VarianceCovariancce, 
                                                    RiskFreeRate, PortfolioSize), method = 'SLSQP', 
                                                     bounds = bnds, constraints = cons, tol = 10**-3)
    
    return OptimizedSharpeRatio

In [None]:
portfolioSize = len(returns)

sharpeRatio = MaximizeSharpeRatioOptmzn(returns, covariance, risk_free_rate, portfolioSize)

In [None]:
sharpeRatio

In [None]:
pd.DataFrame(sharpeRatio)

In [None]:
# obtain maximal Sharpe Ratio for k-portfolio 1 of Dow stocks

#set portfolio size
#portfolioSize = Columns

#set risk free asset rate of return
Rf=3.8  # August 2023 average risk  free rate of return in USA approx 3.8%
annRiskFreeRate = Rf/100

#compute daily risk free rate in percentage
risk_free_rate = (np.power((1 + annRiskFreeRate),  (1.0 / 360.0)) - 1.0) * 100 
print('\nRisk free rate (daily %): ', end="")
print ("{0:.3f}".format(risk_free_rate)) 

#initialization
xOptimal =[]
minRiskPoint = []
expPortfolioReturnPoint =[]
maxSharpeRatio = 0

#compute maximal Sharpe Ratio and optimal weights
result = MaximizeSharpeRatioOptmzn(returns, covariance2, risk_free_rate, portfolioSize)
xOptimal.append(result.weights)

    
#compute risk returns and max Sharpe Ratio of the optimal portfolio   
xOptimalArray = np.array(xOptimal)
Risk = np.matmul((np.matmul(xOptimalArray,covReturns)), np.transpose(xOptimalArray))
expReturn = np.matmul(np.array(meanReturns),xOptimalArray.T)
annRisk =   np.sqrt(Risk*251) 
annRet = 251*np.array(expReturn) 
maxSharpeRatio = (annRet-Rf)/annRisk 

#set precision for printing results
np.set_printoptions(precision=3, suppress = True)


#display results
print('Maximal Sharpe Ratio: ', maxSharpeRatio, '\nAnnualized Risk (%):  ',
      annRisk, '\nAnnualized Expected Portfolio Return(%):  ', annRet)
print('\nOptimal weights (%):\n',  xOptimalArray.T*100 )