In [1]:
import pandas as pd
import numpy as np
import itertools
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
import scipy.stats as stats

In [3]:
import stylised_facts_data_utilities.TukeyGH as tukey

In [4]:
locParams = np.linspace( -3.5, 3.5, 1 )
scaleParams = np.linspace( -.1, 3.5, 1 )
gParams = (-3., 3., 1.)
hParams = 10**np.linspace( -3, -1, 3 )


In [5]:
def z2gh(z, A, B, g, h, c=0.8):
    """
    x= A + B*(1+c*tanh(g*z/2))* z*exp(0.5*h*z^2)
    :param z:
    :param A:
    :param B:
    :param g:
    :param h:
    :param c:
    :return:
    """
    term1 = 1 + c*np.tanh(g*z /2.0)
    term2 = z * np.exp(0.5 * h * z **2)
    if g==0:
        term1 = 1
    if h ==0:
        term2 = z
    return A + B * term1 * term2

In [6]:
def qgh(p, A, B, g, h, c=0.8):
    '''
    g- and h- distribution functions
    Density, distribution function, quantile function and random generation for the generalised g- and h- distribution
    uses stats.norm.ppf which is the percent point function - inversion of CDF and returns percentiles
    :param p: vector of probabilities
    :param A: vector of loc params
    :param B: vector of scale params. must be positive
    :param g: vector of g params
    :param h: vector of h params
    :param c: default is 0.8
    :return:
    '''
    return z2gh(stats.norm.ppf(p), A, B, g, h, c)

In [7]:
def rgh(n, A, B, g, h, c=0.8):
    """
    random sampling from g, h
    :param n:
    :param A:
    :param B:
    :param g:
    :param h:
    :param c:
    :return:
    """
    return z2gh(stats.norm.rvs(size=n), A, B, g, h, c)

In [12]:
for i in (100,200,300, 500, 1000):
    print(rgh(int(i), 0.5, -0.5, 12.5, 0.5))

[ 7.12244526e-01  4.85271684e-02  5.65545548e-01  1.65816380e-01
  1.12004383e+00  5.33025659e-01  5.31627400e-01  5.10026707e-01
 -1.34469072e-02 -4.54576591e-01  5.38681825e-01  9.36754882e-01
  5.90147133e-01  7.17323610e-01  5.86216632e-01  9.30931000e-02
  9.90536427e-02  5.71260062e-01  8.85083151e-01  2.23332051e-01
 -5.57864784e-02  5.42179960e-01 -4.31378894e-01 -8.50403256e-01
  5.29778064e-01  3.37203574e-01  1.95523990e-01 -5.41026652e-01
 -1.31180798e+00 -4.97468922e-01 -9.01028722e+00  6.47604314e-01
 -2.78434164e+00  5.32027285e-01 -5.49210750e-01  1.09891756e-01
  5.08181865e-01  1.67046231e-01 -3.68543373e-01  7.46285052e-01
  5.55722375e-01  9.36789631e-01  5.44032283e-01  7.08374758e-01
 -5.66489361e-01  5.17256538e-01 -1.69478091e+00 -1.51346491e-01
 -2.72614168e-01  5.50356985e-01 -9.50228653e-01  1.28295684e+00
 -6.53561211e-01  6.34740488e-01  4.73109092e-02  5.26037004e-01
 -1.75418740e-01 -5.98728362e-01  6.05828608e-01  5.34686188e-01
  5.99362354e-01  5.30124

In [None]:
paramList = pd.DataFrame( list( itertools.product(locParams, scaleParams, gParams, hParams ) ),
                        columns=['loc', 'scale', 'g', 'h'])

In [None]:
def generateTukeyDistribution( loc, scale, g, h, nSamples):
    z = np.random.random( size =(nSamples,) )
    return tukey.qpk(z, loc, scale, g, h)

In [None]:
results = []
for _, params in tqdm( paramList.iterrows() ):
    Y =generateTukeyDistribution(nSamples=5000, **params.to_dict())
    estimatedParams = tukey.estimateTukeyDistribution( Y )
    estimatedParams = {'%s_estimated' % k : v for k, v in estimatedParams.items()}
    
    params = params.to_dict()
    params.update( estimatedParams )
    results += [ pd.DataFrame([params]) ]
results = pd.concat(results, axis=0, ignore_index=True)
for param in ['g', 'h', 'loc', 'scale']:
    results['%s_diff' % param] = results['%s_estimated' % param] -results['%s' % param]

In [None]:
params

In [None]:
 Y =generateTukeyDistribution(loc = params['loc'],
                              scale = params['scale'],
                              g = params['g'],
                              h = params['h'],
                              nSamples=5000)

In [None]:
tukey.estimateTukeyDistribution( Y )

## Rayleigh

In [None]:
Y_rayleigh = np.random.normal( 5, size = (10000, ))
estimatedParams = tukey.estimateTukeyDistribution( Y_rayleigh)

In [None]:
Y_sampled = generateTukeyDistribution( loc=estimatedParams['loc'], scale=estimatedParams['scale'], g=estimatedParams['g'], 
                                      h=estimatedParams['h'],
                                     nSamples = 50000,)

In [None]:
fig, ax =plt.subplots(1, 1)
sns.distplot(Y_rayleigh)
sns.distplot(Y_sampled, ax=ax, hist= False, label='Tukey Fit')