# Gaussian fit
In this notebook we fit gaussian distributions to the stock returns


In [5]:
import numpy as np
import pandas as pd
from scipy.stats import norm

stocks = pd.read_csv('data/DJIA30stockreturns.csv', header = None)


#To obtain percentage log returns:
stocks = stocks.T
stocks = 100 * np.log1p(stocks/100.0)

print(stocks.head())

stocks = stocks.to_numpy()
print(stocks.shape)

np.save('data/plr_stocks.npy', stocks)

       0         1         2         3         4         5         6     \
0  0.666672  0.345470  0.317881  0.657899 -0.983274 -4.529309 -4.349182   
1  1.049665 -1.060800 -0.446366 -0.729933 -2.238992 -0.751887  1.481536   
2  2.381179 -0.398407 -1.405993  1.181759 -1.195891  0.386350 -1.420456   
3  1.042036  0.418411 -0.623109 -0.644072 -0.648247  0.221277  0.000000   
4 -1.647133 -1.311122 -1.512459  0.943834 -0.754724  0.000000  0.180805   

       7         8         9     ...      7990      7991      7992      7993  \
0  0.000000 -0.729933  1.806291  ... -2.182906  0.341631  1.659315  0.667443   
1 -2.272922 -0.763366 -1.142705  ... -2.086468  1.653778  2.519822  3.386670   
2 -2.083483 -1.044142  0.818449  ... -1.842586 -0.474799  3.904587  1.863519   
3 -1.731688 -0.434784 -0.349041  ... -4.681493 -0.516226  0.789138  2.680620   
4 -3.300205  0.572581  1.897792  ... -2.089200  0.073801  2.857827  2.418786   

       7994      7995      7996      7997      7998      7999  
0  0

## Fitting the Gaussians

In [6]:
results = []
i = 1
k = 2 # all gaussian fits have k=2 parameters

# loop iterating over all stocks
for stock in stocks:
    mu_fit, sigma_fit = norm.fit(stock)

    log_likelihood = norm.logpdf(stock, loc = mu_fit, scale = sigma_fit).sum()
    
    aic = 2*k - 2*log_likelihood
    bic = (k * np.log(len(stock))) - 2*log_likelihood
    
    results.append({'stock': i, 'mu': mu_fit, 'sigma':sigma_fit,
                    'log-likelihood' : log_likelihood,
                    'AIC':aic, 'BIC': bic})
    i += 1 
    
results = pd.DataFrame(results)
results.head()
results.to_csv('results/gaussian.csv', index=False)

# Value at Risk estimation

In [7]:
#print(results.head())
#print(results.iloc[0]['mu'])

results_var = []

for i in range(results.shape[0]):

    mu_hat = results.iloc[i]['mu']
    sigma_hat = results.iloc[i]['sigma']

    alpha = 0.01

    empirical_var = -np.percentile(stocks[i], alpha * 100)
    var = -norm.ppf(alpha, loc = mu_hat, scale = sigma_hat)

    results_var.append({'stock': i + 1,'estimated-var':var, 'empirical-var':empirical_var})

    
results_var = pd.DataFrame(results_var)
results_var.head()
results_var.to_csv('results/ValueAtRiskResults/gaussian_var.csv', index=False)
results_var.head()
    

Unnamed: 0,stock,estimated-var,empirical-var
0,1,7.09116,7.46091
1,2,5.159916,6.033781
2,3,4.90749,5.821528
3,4,4.69477,5.680409
4,5,3.836294,4.167985
