In [4]:
# SSMIF Quant Coding Challenge
# Author: Cavin Gada

""" IMPORTS """
import pandas as pd
import pandas_datareader.data as web
import numpy as np
import datetime
from scipy import stats
import matplotlib.pyplot as plt


In [5]:
class Portfolio:

    """ to make this class usable for a broader purpose, I allow the constructor to take in different 
    values to build a portfolio to the user's liking. The default values are the specifics the assignment wants.
    """
    def __init__(self, start = datetime.datetime(2021, 1, 1), end = datetime.datetime(2021, 12, 31), tickers = ['KO', 'TSLA', 'SPY']):
        self.start = start
        self.end = end
        self.tickers = tickers
        self.treasury_yield_10_year = .0412 # we'll use this as the risk free rate
        self.df = (web.DataReader(self.tickers, 'yahoo', start = self.start, end = self.end)['Adj Close']).dropna()
    
    def getDailyReturns(self, ticker):
        return ((self.df[ticker]/self.df[ticker].shift(1))-1).dropna() # shift allows us to divide the next day by previous

    @staticmethod
    def getDailyVolatility(returns):
        return returns.rolling(window=2).std(ddof=0) # we'd like to calculate a running volatility (based on the next day's info). we are NOT looking for total volatility. 

    def statistics(self, a = 0.05):
        # comparing means of daily returns using two-sided T-test
        KO_RETURNS = self.getDailyReturns('KO')
        TSLA_RETURNS = self.getDailyReturns('TSLA')

        returns_t_value, returns_p_value = stats.ttest_ind(KO_RETURNS,TSLA_RETURNS) # two tailed t test
        
        KO_VOLATILITIES = self.getDailyVolatility(KO_RETURNS)
        TSLA_VOLATILITIES = self.getDailyVolatility(TSLA_RETURNS)

        # Comparing volatilities using two-sided F-test. We use an F-test since we are comparing standard deviations (volatilities)
        f = np.var(TSLA_VOLATILITIES, ddof=1)/np.var(KO_VOLATILITIES, ddof=1) # calculate F-statistic (higher variance in numerator)
        df1 = TSLA_VOLATILITIES.size-1 
        df2 = KO_VOLATILITIES.size-1
        volatility_p_value =  2 * (1-stats.f.cdf(f, df1, df2)) # we are performing a two-tailed test, thus, need to multiply the tail area twice

        return returns_p_value, volatility_p_value

    def valueAtRisk(self, meanDailyReturn, stdDailyReturn, tolerance = 0.95):
        return stats.norm.ppf(1-tolerance, meanDailyReturn, stdDailyReturn) # essentially just an inverse cdf to find the value at which the significance hits on the normal curve.

    def annualizedSharpeRatio(self, meanDailyReturn, stdDailyReturn):
        # The returns of the portfolio are a Wiener process, in which volatility scales with the square-root of time
        # reference: https://medium.datadriveninvestor.com/the-sharpe-ratio-with-python-from-scratch-fbb1d5e490b9#:~:text=Evaluating%20a%20Stock's%20Risk%20with%20Python&text=Any%20Sharpe%20Ratio%20above%201.00,3.00%20is%20considered%20very%20good.
        # take mean of daily returns and subtract it by the mean daily return of the benchmark (in this case we have the 10 year treasury bond in 2021 divided by 252 to get a daily compounding rate)
        # divide by the standard deviation and multiply by sqrt(number of trading days) to annualize it. 
        return (meanDailyReturn-self.treasury_yield_10_year/252)/(stdDailyReturn) * np.sqrt(252) 
    
    def downsideDeviation(self, returns):

        # used https://www.investopedia.com/terms/d/downside-deviation.asp as a guide for calculating the downside deviation
        minimumAccpetableRate = self.treasury_yield_10_year / 252 # we divide by 252 since we assume the daily rate stays constant for the yearly rate of 1.59%
        returnsMinusMAR = pd.Series(returns - minimumAccpetableRate) # must ensure its a direct series in order to call loc. we want to get the difference between stock return and RFR rate
        returnsBelowZero = returnsMinusMAR.loc[lambda x : x < 0] # we only want the downside values (negatives, meaning the return was less than the RFR)
        sumOfSquaredReturns = np.sum(np.square(returnsBelowZero)) # square each return and take the sum 
        downsideDeviation = np.sqrt(sumOfSquaredReturns/returnsMinusMAR.size) # divide by the total number of returns and take the square root to find the DD
        return downsideDeviation
        
    def maximumDrawdown(self, prices, window=252):
        # shoutout to: https://medium.com/cloudcraftz/measuring-maximum-drawdown-and-its-python-implementation-99a3963e158f
        # last time I implemented this function, I used more complex logic where I analyzed the globa max/min and local max/mins before or after.
        # this resource helped implement the max drawdown in easier to read and more simplistic code
        Roll_Max = prices.rolling(window,min_periods=1).max() # over the course of the year, set rolling on 1 day intervals and calculate the max
        Daily_Drawdown = prices/Roll_Max - 1.0 # calculate the losses at each point from its relative rolling max
        Max_Daily_Drawdown = Daily_Drawdown.rolling(window, min_periods=1).min() # calculate the 'smallest' gain or the greatest loss
        return Max_Daily_Drawdown.iat[-1] # we can take the lat value in the df instead of searching for the min since we know the last value is the minimum due to the rolling
    
    def metrics(self, ticker = "KO", tolerance = .95):

        dailyReturns = self.getDailyReturns(ticker)
        meanDailyReturn = np.mean(dailyReturns)
        stdDailyReturn = np.std(dailyReturns)

        averageDailyVolatility = self.getDailyVolatility(dailyReturns).mean()
        valueAtRisk = self.valueAtRisk(tolerance, meanDailyReturn, stdDailyReturn) 
        annualizedSharpeRatio = self.annualizedSharpeRatio(meanDailyReturn, stdDailyReturn)
        downsideDeviation = self.downsideDeviation(dailyReturns)
        maxDrawdown = self.maximumDrawdown(self.df[ticker])
        
        return averageDailyVolatility, valueAtRisk, annualizedSharpeRatio, downsideDeviation, maxDrawdown

    def capm(self, ticker = "TSLA"):
        # resource used to find polyfit: https://www.mlq.ai/capital-asset-pricing-model-python/
        beta, alpha = np.polyfit(self.getDailyReturns('SPY'), self.getDailyReturns(ticker), 1)
        return beta,alpha



In [6]:
p1 = Portfolio()

print("Part 1: Statistics()")

returns_p_value, volatility_p_value = p1.statistics() 
volatility_p_value_scientific="{:e}".format(volatility_p_value)

print("\n")

print("Is there a statistically significant difference in the mean daily returns?")
print("To test for a significant difference in mean daily returns, I used a Student's t-test. To perform this test, I am assuming that the observations in each sample are: independent and identically distributed, randomly sampled, normally distributed, and have the same variance.")
print("p-value: %f" % (returns_p_value))
print("result: because the p-value is greater than the significance level of 0.05, we fail to reject the null hypothesis, meaning that there is no significant difference between the mean daily returns of the two equities")

print("\n")

print("Is there a statistically significant difference in their volatilities?")
print("To test for a significant difference in volatilities, I used an F-test. To perform this test, I am assuming that the observations in each sample are: independent and identically distributed, approximately normally distributed, and have the same population variance. I'm also comparing standard deviations and the data may be skewed.")
print("p-value: %s" % (volatility_p_value_scientific))
print("result: because the p-value is less than the significance level of 0.05, we reject the null hypothesis, meaning that there is a significant difference between the volatilities of the two equities")

print("\n")

print("metrics()")
averageDailyVolatility, valueAtRisk, annualizedSharpeRatio, downsideDeviation, maxDrawdown = p1.metrics()
print("Volatility: %f (this value represents the average amount by which the returns may swing from the mean on a daily basis, it measures risk)" % (averageDailyVolatility))
print("95 Pecent Value at Risk (VaR): %f (this value represents the max amount expected (with 95 percent confidence) to be lost over the next day)" % (valueAtRisk))
print("Sharpe Ratio: %f (this value measures the performance of an investment compared to a risk-free asset, after adjusting for its risk. A ratio of just under 1 indicates that risk of holding the equity is just not quite offsetting its return.)" % (annualizedSharpeRatio))
print("Downside Deviation: %f (this value measures how risky an investment may be if the upside deviation is 'safer' or less risky than it seems. It only looks at the downside risk)" % (downsideDeviation))
print("Maximum Drawdown: %f (this value measures the most by which a stock dropped between two chronological points on the timeline, in this case the largest drop in value was 8.7 percent)" % (maxDrawdown))

print("\n")

print("capm()")
beta, alpha= p1.capm()
print("Beta: %f (this value refers to relative volatility of the investment. At a score of 2, the beta indicates that the equity value is very volatile and risky)" % (beta))
print("Alpha: %f (this value refers to the amount returned in comparison to the market (SPY). Since the value is at about 0, it seems that the return was on par with the benchmark)" % (alpha))

Part 1: Statistics()


Is there a statistically significant difference in the mean daily returns?
To test for a significant difference in mean daily returns, I used a Student's t-test. To perform this test, I am assuming that the observations in each sample are: independent and identically distributed, randomly sampled, normally distributed, and have the same variance.
p-value: 0.525039
result: because the p-value is greater than the significance level of 0.05, we fail to reject the null hypothesis, meaning that there is no significant difference between the mean daily returns of the two equities


Is there a statistically significant difference in their volatilities?
To test for a significant difference in volatilities, I used an F-test. To perform this test, I am assuming that the observations in each sample are: independent and identically distributed, approximately normally distributed, and have the same population variance. I'm also comparing standard deviations and the data may b