# Financial Engineering - Instrument Analysis

###### Andrew Cachia, Dec 2018

In [1]:
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt  
from pandas_datareader import data as pdr
import fix_yahoo_finance as yf
from scipy.optimize import minimize
from scipy.special import ndtri

### Data
Retrieving daily closing price data for the following stocks:
- S&P500 Index [SPY]
- FTSE 100 Index [UKX]
- Gold SPDY [GLD]
- Google [GOOG]
- Amazon [AMZN]

In [2]:
ftse = pd.read_csv('Assignment/ftse-100.csv', parse_dates=True,index_col=0, date_parser=lambda x: pd.to_datetime(x).strftime("%d/%m/%Y"))
ftse = ftse['FTSE 100']
ftse.head()

Date
2017-12-29    7687.77
2017-12-28    7622.88
2017-12-27    7620.68
2017-12-22    7592.66
2017-12-21    7603.98
Name: FTSE 100, dtype: float64

In [3]:
date_start = "2014-01-01"
date_end = "2017-12-31"

symbols = ['^GSPC','GLD','GOOG','AMZN']

data = pd.DataFrame()
yf.pdr_override() 
data = pdr.get_data_yahoo(symbols, start=date_start, end=date_end)
data.head()

[*********************100%***********************]  4 of 4 downloaded


Unnamed: 0_level_0,Open,Open,Open,Open,High,High,High,High,Low,Low,...,Close,Close,Adj Close,Adj Close,Adj Close,Adj Close,Volume,Volume,Volume,Volume
Unnamed: 0_level_1,AMZN,GLD,GOOG,^GSPC,AMZN,GLD,GOOG,^GSPC,AMZN,GLD,...,GOOG,^GSPC,AMZN,GLD,GOOG,^GSPC,AMZN,GLD,GOOG,^GSPC
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2013-12-31,394.579987,114.610001,552.526367,1842.609985,398.829987,117.129997,556.878052,1849.439941,393.799988,114.459999,...,556.734009,1848.359985,398.790009,116.120003,556.734009,1848.359985,1996500,12835900,2733400,2312840000
2014-01-02,398.799988,117.93,554.125916,1845.859985,399.359985,118.730003,555.26355,1845.859985,394.019989,117.75,...,552.963501,1831.97998,397.970001,118.0,552.963501,1831.97998,2137800,7551000,3666400,3080600000
2014-01-03,398.290009,118.639999,553.897461,1833.209961,402.709991,119.620003,554.856201,1838.23999,396.220001,118.589996,...,548.929749,1831.369995,396.440002,119.290001,548.929749,1831.369995,2210200,5874400,3355000,2774270000
2014-01-06,395.850006,119.760002,552.908875,1832.310059,397.0,120.389999,555.814941,1837.160034,388.420013,117.110001,...,555.049927,1826.77002,393.630005,119.5,555.049927,1826.77002,3170600,10106500,3561600,3294850000
2014-01-07,395.040009,118.459999,558.865112,1828.709961,398.470001,118.919998,566.162659,1840.099976,394.290009,118.129997,...,565.750366,1837.880005,398.029999,118.82,565.750366,1837.880005,1916000,6433700,5138400,3511750000


In [4]:
data = data['Adj Close']
data['FTSE'] = ftse
data.head()

Unnamed: 0_level_0,AMZN,GLD,GOOG,^GSPC,FTSE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2013-12-31,398.790009,116.120003,556.734009,1848.359985,
2014-01-02,397.970001,118.0,552.963501,1831.97998,6717.91
2014-01-03,396.440002,119.290001,548.929749,1831.369995,6730.67
2014-01-06,393.630005,119.5,555.049927,1826.77002,6730.73
2014-01-07,398.029999,118.82,565.750366,1837.880005,6755.45


## Calculating logarithmic returns
We use logarithmic returns as this will allow us to assume that prices are distributed log normally. This is convenient since much of classic statistics assumes log normality. 

In [5]:
returns = data / data.shift(1)
returns = np.log(returns)
returns = returns.dropna()
returns.head()

Unnamed: 0_level_0,AMZN,GLD,GOOG,^GSPC,FTSE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-01-03,-0.003852,0.010873,-0.007322,-0.000333,0.001898
2014-01-06,-0.007113,0.001759,0.011088,-0.002515,9e-06
2014-01-07,0.011116,-0.005707,0.019095,0.006063,0.003666
2014-01-08,0.009726,-0.005909,0.002079,-0.000212,-0.004997
2014-01-09,-0.002267,0.002874,-0.009677,0.000348,-0.004539


##### Generic Functions

In [6]:
def mean(list):
    return list.sum() / len(list)

def generic_function(X,Y,exp):
    results = {}
    for i in X:
        sum = 0
        for j in X[i]:
            sum += (j - Y[i])**exp
        results[i] = sum / len(X[i])
    
    return results

def variance(list,mean_list):
    return generic_function(list, mean_list, 2)

##### Standard Deviation
Standard Deviation is used to describe the hisotrical volatility of an instrument. 
The greater the standard deviation, the larger the price range and fluctuations.
\begin{equation*}
\sqrt{\frac{\sum(X - \overline{X}) ^2} {N}}
\end{equation*}

In [7]:
def std_dev(list,mean_list):
    var = variance(list,mean_list)
    for i in var:
        var[i] = np.sqrt(var[i])
    return var

##### Skewness
Skewness is used to measure the differnce in the length of the tails, i.e. how symmetrical or not a distribution is.
Negative skewness means there is a substantial probability of a large negative return. 
Positive skewness means that there is a greater-than-normal probability of a large positive return.
Fisher-Pearson coefficient of skewness:
\begin{equation*}
\frac
{\sum\frac{(X - \overline{X})^3}{N} } 
{s^3}
\end{equation*}

In [8]:
def skewness(list,mean_list):
    results = {}
    num = generic_function(list, mean_list, 3)
    list_std_dev = std_dev(list,mean_list)
    for i in list_std_dev:
        list_std_dev[i] = list_std_dev[i]**3
        results[i] = num[i] / list_std_dev[i]
    return results

##### Kurtosis
Kurtosis is used to measure the extreme values within either tail of the distribution.
It is used in finance to determine the likelihood of the investor experiencing extreme positive or negative returns.
\begin{equation*}
\frac
{\sum\frac{(X - \overline{X})^4}{N} } 
{s^4}
\end{equation*}

In [9]:
def kurtosis(list,mean_list):
    results = {}
    num = generic_function(list, mean_list, 4)
    var = variance(list,mean_list)
    for i in var:
        var[i] = var[i]**2
        results[i] = num[i] / var[i]
    return results

##### Annual Volatility

In [10]:
def annualVolatility(volatility_list):
    results = {}
    for i in volatility_list:
        results[i] = (volatility_list[i] * np.sqrt(250))# * 100
        
    return results

### Calculating Distribution Moments

In [11]:
avg_return = mean(returns)
volatility = std_dev(returns, avg_return)
dkurtosis = kurtosis(returns, avg_return)
dskewness = skewness(returns, avg_return)

#annual_return = (avg_return * 250) * 100
annual_return = (((1 + avg_return)**250 - 1)) # * 100)
annual_volatility = annualVolatility(volatility)

I = pd.Index(["Average Return", "Volatility", "Kurtosis", "Skewness", "Annual Return", "Annual Volatility"])
data = [avg_return.to_dict(), volatility, dkurtosis, dskewness, annual_return.to_dict(), annual_volatility]
pd.DataFrame(data, index=I)

Unnamed: 0,AMZN,FTSE,GLD,GOOG,^GSPC
Average Return,0.000944,0.000164,-1e-05,0.000686,0.000397
Volatility,0.018579,0.008867,0.008811,0.013846,0.00762
Kurtosis,15.184563,5.645759,5.330956,18.595021,6.054978
Skewness,0.414527,-0.11316,0.218417,1.307668,-0.365058
Annual Return,0.265922,0.041971,-0.002429,0.18704,0.104328
Annual Volatility,0.293759,0.140198,0.139309,0.218928,0.120475


From the results shown above, it seems that the FTSE was not only the worst perfoming index, but also that with the highest risk. This is probably due to a number of economic factors, including Brexit, which created a lot of uncertainty and instability within the market.

The S&P500 index was the highest perfoming in terms of average return, and also managed to retain stable with the lowest volatility.

An interesting factor is that of the Kurtosis. The incredibly high kurtosis displayed by the FTSE index indicates the likelihood of extremes being present in the market. Whilst here the kurtosis is positive, meaning that the investor has a good chance of experiencing high positive extremes, it also further indicates the amount of instability within this market.

The S&P 500 had a solid 8% annual return, solidfying it's status as the industry benchmark. It also kept a low and stable volatility, as did Gold, whereas the FTSE was way too high.



##### Covariance
Covariance is a measure of the directional relationship between the returns on two risky assets. A positive covariance means that asset returns move together while a negative covariance means returns move inversely.
\begin{equation*}
\frac{1}{N}\sum_{i=1}^N(X - \overline{X})(Y - \overline{Y})
\end{equation*}

In [12]:
def covariance(X,Y):
    meanX = mean(X)
    meanY = mean(Y)

    sum = 0

    for i in range(0, len(X)):
        sum += ((X[i] - meanX) * (Y[i] - meanY))

    return sum/(len(X)-1)

##### Beta
Beta is used to determine the sensitivity of a stock's volatility in relation to the market.

\begin{equation*}
\frac{Cov(r_p, r_b)}{Var(r_b)}
\end{equation*}

In [13]:
def beta(X,Y):
    meanX = mean(X)
    covarianceXY = covariance(X,Y)
    varianceY = sum([(i-meanX)**2 for i in Y]) / len(Y)
    
    return covarianceXY / varianceY

In [14]:
beta_google = beta(returns['GOOG'],returns['^GSPC'])
beta_amazon = beta(returns['AMZN'],returns['^GSPC'])
print(beta_google)
print(beta_amazon)

1.094521461356741
1.192641501288301
