In [None]:
# Deviations From Normality

In [9]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import edhec_risk_kit as erk

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
hfi = erk.get_hfi_returns()

In [5]:
hfi.head()

Unnamed: 0_level_0,Convertible Arbitrage,CTA Global,Distressed Securities,Emerging Markets,Equity Market Neutral,Event Driven,Fixed Income Arbitrage,Global Macro,Long/Short Equity,Merger Arbitrage,Relative Value,Short Selling,Funds Of Funds
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1997-01,0.0119,0.0393,0.0178,0.0791,0.0189,0.0213,0.0191,0.0573,0.0281,0.015,0.018,-0.0166,0.0317
1997-02,0.0123,0.0298,0.0122,0.0525,0.0101,0.0084,0.0122,0.0175,-0.0006,0.0034,0.0118,0.0426,0.0106
1997-03,0.0078,-0.0021,-0.0012,-0.012,0.0016,-0.0023,0.0109,-0.0119,-0.0084,0.006,0.001,0.0778,-0.0077
1997-04,0.0086,-0.017,0.003,0.0119,0.0119,-0.0005,0.013,0.0172,0.0084,-0.0001,0.0122,-0.0129,0.0009
1997-05,0.0156,-0.0015,0.0233,0.0315,0.0189,0.0346,0.0118,0.0108,0.0394,0.0197,0.0173,-0.0737,0.0275


## We are going to develop the code to measure the skewness and kurtosis...
There already is skewness and kurtosis calculation code in <code>scipy.stats</code> Python module (we will use this later)

Let's revisit **skewness**:
* A negative skew of the returns means that you get more negative returns than you would otherwise expect
* How do you define "otherwise expect"? By the **mean**
* If you have a return series where the **median is less than the mean** (the median is the thing that tends to happen a lot, the mean is the expected value), you have **negatively skewed returns**

In [42]:
# Let's add three columns with the mean, media, and a boolean column for each Hedge Fund index to take a look
pd.concat([hfi.mean(), hfi.median(), hfi.mean()>hfi.median()], axis="columns")

Unnamed: 0,0,1,2
Convertible Arbitrage,0.005508,0.0065,False
CTA Global,0.004074,0.0014,True
Distressed Securities,0.006946,0.0089,False
Emerging Markets,0.006253,0.0096,False
Equity Market Neutral,0.004498,0.0051,False
Event Driven,0.006344,0.0084,False
Fixed Income Arbitrage,0.004365,0.0055,False
Global Macro,0.005403,0.0038,True
Long/Short Equity,0.006331,0.0079,False
Merger Arbitrage,0.005356,0.006,False


In virtually every single case, we see that the mean is greater than the median. That means that all of these have significant **negative skewness**. 

Now how do we measure it? 
In this equation, E(R) is "Expected Value of R" - which is the mean!

$$ S(R) = \frac{E[ (R-E(R))^3 ]}{\sigma_R^3} $$

What does the code for this calculation look like? (Coded in the toolkit)

In [10]:
# Now we want to see the most skewed, so let's calculate skewness with our coded equation, and then sort
erk.skewness(hfi).sort_values()

Fixed Income Arbitrage   -3.940320
Convertible Arbitrage    -2.639592
Equity Market Neutral    -2.124435
Relative Value           -1.815470
Event Driven             -1.409154
Merger Arbitrage         -1.320083
Distressed Securities    -1.300842
Emerging Markets         -1.167067
Long/Short Equity        -0.390227
Funds Of Funds           -0.361783
CTA Global                0.173699
Short Selling             0.767975
Global Macro              0.982922
dtype: float64

In [11]:
# Let's see if we get the same result when we use Python's built-in skewness function
import scipy.stats
scipy.stats.skew(hfi)

array([-2.63959223,  0.17369864, -1.30084204, -1.16706749, -2.12443538,
       -1.40915356, -3.94032029,  0.98292188, -0.39022677, -1.32008333,
       -1.81546975,  0.76797484, -0.36178308])

In [12]:
# Let's compare this to the unsorted skewness 
erk.skewness(hfi)

Convertible Arbitrage    -2.639592
CTA Global                0.173699
Distressed Securities    -1.300842
Emerging Markets         -1.167067
Equity Market Neutral    -2.124435
Event Driven             -1.409154
Fixed Income Arbitrage   -3.940320
Global Macro              0.982922
Long/Short Equity        -0.390227
Merger Arbitrage         -1.320083
Relative Value           -1.815470
Short Selling             0.767975
Funds Of Funds           -0.361783
dtype: float64

In [19]:
# Let's do another check. Let's generate a set of random normal numbers and check the skewness of that (should be zero)
import numpy as np

normal_rets = np.random.normal(0, 0.15, size=(263, 1))  # average of 0, standard deviation of 15%, and exactly same # of returns as we had before (size=263x1 array)

In [20]:
erk.skewness(normal_rets)

-0.14115356057846346

## Kurtosis

$$ K(R) = \frac{E[ (R-E(R))^4 ]}{\sigma_R^4} $$

If you notice, this is the same equation as skewness, just with values raised to the 4th power as opposed to the third!

In [22]:
# For normal returns, we expect a kurtosis of about 3
erk.kurtosis(normal_rets)

2.918858346942901

In [23]:
# Let's calculate the kurtosis of our Hedge Fund Index
erk.kurtosis(hfi)

Convertible Arbitrage     23.280834
CTA Global                 2.952960
Distressed Securities      7.889983
Emerging Markets           9.250788
Equity Market Neutral     17.218555
Event Driven               8.035828
Fixed Income Arbitrage    29.842199
Global Macro               5.741679
Long/Short Equity          4.523893
Merger Arbitrage           8.738950
Relative Value            12.121208
Short Selling              6.117772
Funds Of Funds             7.070153
dtype: float64

In [24]:
# Now let's compare with Python's built-in kurtosis function for our normal returns
scipy.stats.kurtosis(normal_rets)

array([-0.08114165])

This built-in equation gives us the EXCESS kurtosis over the EXPECTED kurtosis of 3, so we'd expect this value to be close to 0 for normal returns 

In [25]:
# The Jarque-Bera test helps us answer the question of whether our returns are considered "normal" or not
scipy.stats.jarque_bera(normal_rets)

(0.9454990104127243, 0.6232861815678508)

Second number returned is the degree of confidence that these numbers are a normal distribution

In [26]:
# This doesn't exactly work, what we want to see is which of our indices are normally distributed and which are not
# This call treats the whole DF as one big set of data
scipy.stats.jarque_bera(hfi)

(25656.585999171326, 0.0)

Let's use our <code>is_normal</code> function from our toolkit

In [27]:
erk.is_normal(normal_rets)

True

In [29]:
erk.is_normal(hfi)

False

In [33]:
# We want to apply this function to just the columns
# This applies the function given (erk.is_normal) on every column within the hfi dataset
hfi.aggregate(erk.is_normal)

Convertible Arbitrage     False
CTA Global                 True
Distressed Securities     False
Emerging Markets          False
Equity Market Neutral     False
Event Driven              False
Fixed Income Arbitrage    False
Global Macro              False
Long/Short Equity         False
Merger Arbitrage          False
Relative Value            False
Short Selling             False
Funds Of Funds            False
dtype: bool

### Let's look at the other FFME data 

In [34]:
ffme = erk.get_ffme_returns()

In [37]:
erk.skewness(ffme)

SmallCap    4.410739
LargeCap    0.233445
dtype: float64

In [38]:
erk.kurtosis(ffme)

SmallCap    46.845008
LargeCap    10.694654
dtype: float64

In [39]:
erk.is_normal(ffme)

False

In [41]:
ffme.aggregate(erk.is_normal)

SmallCap    False
LargeCap    False
dtype: bool