# Gaussian mixtures
In this notebook we fit gaussian mixtures to the 25 stocks, using 2 components, then 3 components.

## Two-component models

In [34]:
import pandas as pd
import numpy as np
from sklearn.mixture import GaussianMixture


stocks = np.load('data/plr_stocks.npy')

model1 = GaussianMixture(n_components=2, tol=1e-5, n_init=10, max_iter=500)
model1.fit(stocks[0].reshape(-1, 1))

weights = model1.weights_        # shape (K,)
means = model1.means_.ravel()    # shape (K,)
variances = model1.covariances_.ravel()  # shape (K,) in 1D if covariance_type='full'

print("weights:", weights)
print("means:", means)
print("variances:", variances)
print('aic : ', model1.aic(stocks[0].reshape(-1, 1)), "\n")

model_test_init = GaussianMixture(n_components=2, tol=1e-5, weights_init=[0.49, 0.51], means_init=[[0], [0]])
model_test_init.fit(stocks[0].reshape(-1, 1))
print(model_test_init.weights_)
print(model_test_init.means_)
print(model_test_init.covariances_.ravel())


weights: [0.9362728 0.0637272]
means: [ 0.08383665 -0.67984417]
variances: [ 4.34503436 82.86802793]
aic :  37524.17321622367 

[0.93617087 0.06382913]
[[ 0.08385764]
 [-0.67893258]]
[ 4.34346467 82.76611587]


In [21]:
paramsfit2c = []
resultsfit2c = []
i = 1

print('Fitting 2 component gaussian mixtures to the 25 stocks...')
print('Number of initializations : 75 \n')

for stock in stocks:
    gmm2c = GaussianMixture(n_components = 2, tol = 1e-4, n_init = 75, max_iter = 750, random_state = 42)
    gmm2c.fit(stock.reshape(-1, 1))

    weights = gmm2c.weights_
    means = gmm2c.means_
    variances = gmm2c.covariances_.ravel()

    paramsfit2c.append({'stock': i, 'pi1': weights[0], 'pi2' : weights[1],
                        'mu1': means[0, 0], 'mu2' : means[1, 0],
                        'sigma1':np.sqrt(variances[0]), 'sigma2': np.sqrt(variances[1])})
    
    loglikelihood = gmm2c.score(stock.reshape(-1, 1)) * len(stock.reshape(-1, 1))
    aic = gmm2c.aic(stock.reshape(-1, 1))
    bic = gmm2c.bic(stock.reshape(-1, 1))

    resultsfit2c.append({'stock':i, 'log-likelihoog':loglikelihood,
                         'aic':aic, 'bic':bic})

    i += 1

# saving results
results = pd.DataFrame(resultsfit2c)
fitted_params = pd.DataFrame(paramsfit2c)

print(results.head())
print(fitted_params.head())

results.to_csv('results/gmm2c_results.csv', index = False)
fitted_params.to_csv('results/gmm2c_fitted_params.csv', index = False)


Fitting 2 component gaussian mixtures to the 25 stocks...
Number of initializations : 75 

   stock  log-likelihoog           aic           bic
0      1   -18759.265524  37528.531048  37563.467032
1      2   -16709.323647  33428.647295  33463.583279
2      3   -16269.033218  32548.066435  32583.002419
3      4   -16480.238682  32970.477364  33005.413348
4      5   -14569.136671  29148.273342  29183.209326
   stock       pi1       pi2       mu1       mu2    sigma1    sigma2
0      1  0.925668  0.074332  0.085707 -0.594187  2.046978  8.581059
1      2  0.271344  0.728656 -0.122873  0.072099  3.790484  1.200544
2      3  0.862367  0.137633  0.060680 -0.267321  1.387063  4.515586
3      4  0.695888  0.304112  0.064194 -0.048433  1.266133  3.144241
4      5  0.117285  0.882715 -0.222450  0.056158  3.573392  1.188633


## Three component models

In [22]:
paramsfit3c = []
resultsfit3c = []
i = 1

print('Fitting 3 component gaussian mixtures to the 25 stocks...')
print('Number of initializations : 75 \n')

for stock in stocks:
    gmm3c = GaussianMixture(n_components = 3, tol = 1e-4, n_init = 75, max_iter = 750, random_state=42)
    gmm3c.fit(stock.reshape(-1, 1))

    weights = gmm3c.weights_
    means = gmm3c.means_
    variances = gmm3c.covariances_.ravel()

    paramsfit3c.append({'stock': i, 'pi1': weights[0], 'pi2' : weights[1], 'pi3':weights[2],
                        'mu1': means[0, 0], 'mu2' : means[1, 0], 'mu3': means[2, 0],
                        'sigma1':np.sqrt(variances[0]), 'sigma2': np.sqrt(variances[1]), 
                        'sigma3': np.sqrt(variances[2])})
    
    loglikelihood = gmm3c.score(stock.reshape(-1, 1)) * len(stock.reshape(-1, 1))
    aic = gmm3c.aic(stock.reshape(-1, 1))
    bic = gmm3c.bic(stock.reshape(-1, 1))

    resultsfit3c.append({'stock':i, 'log-likelihoog':loglikelihood,
                         'aic':aic, 'bic':bic})

    i += 1

# saving results
results = pd.DataFrame(resultsfit3c)
fitted_params = pd.DataFrame(paramsfit3c)

print(results.head())
print(fitted_params.head())

results.to_csv('results/gmm3c_results.csv', index = False)
fitted_params.to_csv('results/gmm3c_fitted_params.csv', index = False)


Fitting 3 component gaussian mixtures to the 25 stocks...
Number of initializations : 75 

   stock  log-likelihoog           aic           bic
0      1   -18533.050015  37082.100030  37137.997604
1      2   -16589.846504  33195.693007  33251.590582
2      3   -16265.287350  32546.574699  32602.472274
3      4   -16452.275015  32920.550030  32976.447605
4      5   -14450.007474  28916.014949  28971.912523
   stock       pi1       pi2       pi3        mu1       mu2       mu3  \
0      1  0.002343  0.754347  0.243310 -11.469241  0.090938 -0.026957   
1      2  0.570063  0.042996  0.386941   0.084211 -0.410870 -0.028803   
2      3  0.681236  0.196271  0.122493  -0.160798  0.998106 -0.578168   
3      4  0.212284  0.653632  0.134084   0.594432 -0.018518 -0.627528   
4      5  0.421146  0.566943  0.011912  -0.040093  0.086229 -0.715284   

      sigma1    sigma2    sigma3  
0  32.847965  1.699680  4.234796  
1   1.033160  6.535363  2.541931  
2   1.275181  1.539361  4.652653  
3   2.200748