In [1]:
import numpy as np
import pandas as pd

import scipy
from scipy import stats

In [5]:
def mvstdnormcdf(lower, upper, corrcoef, **kwds):
    '''standardized multivariate normal cumulative distribution function

    This is a wrapper for scipy.stats.kde.mvn.mvndst which calculates
    a rectangular integral over a standardized multivariate normal
    distribution.

    This function assumes standardized scale, that is the variance in each dimension
    is one, but correlation can be arbitrary, covariance = correlation matrix

    Parameters
    ----------
    lower, upper : array_like, 1d
       lower and upper integration limits with length equal to the number
       of dimensions of the multivariate normal distribution. It can contain
       -np.inf or np.inf for open integration intervals
    corrcoef : float or array_like
       specifies correlation matrix in one of three ways, see notes
    optional keyword parameters to influence integration
        * maxpts : int, maximum number of function values allowed. This
             parameter can be used to limit the time. A sensible
             strategy is to start with `maxpts` = 1000*N, and then
             increase `maxpts` if ERROR is too large.
        * abseps : float absolute error tolerance.
        * releps : float relative error tolerance.

    Returns
    -------
    cdfvalue : float
        value of the integral


    Notes
    -----
    The correlation matrix corrcoef can be given in 3 different ways
    If the multivariate normal is two-dimensional, then only the
    correlation coefficient needs to be provided.
    For general dimension the correlation matrix can be provided either
    as a one-dimensional array of the upper triangular correlation
    coefficients stacked by rows, or as full square correlation matrix

    See Also
    --------
    mvnormcdf : cdf of multivariate normal distribution without
        standardization

    Examples
    --------

    >>> print(mvstdnormcdf([-np.inf,-np.inf], [0.0,np.inf], 0.5))
    0.5
    >>> corr = [[1.0, 0, 0.5],[0,1,0],[0.5,0,1]]
    >>> print(mvstdnormcdf([-np.inf,-np.inf,-100.0], [0.0,0.0,0.0], corr, abseps=1e-6))
    0.166666399198
    >>> print(mvstdnormcdf([-np.inf,-np.inf,-100.0],[0.0,0.0,0.0],corr, abseps=1e-8))
    something wrong completion with ERROR > EPS and MAXPTS function values used;
                        increase MAXPTS to decrease ERROR; 1.048330348e-006
    0.166666546218
    >>> print(mvstdnormcdf([-np.inf,-np.inf,-100.0],[0.0,0.0,0.0], corr, \
                            maxpts=100000, abseps=1e-8))
    0.166666588293

    '''
    
    n = len(lower)
    #don't know if converting to array is necessary,
    #but it makes ndim check possible
    lower = np.array(lower)
    upper = np.array(upper)
    corrcoef = np.array(corrcoef)

    correl = np.zeros(int(n * (n - 1) / 2.0))  #dtype necessary?

    if (lower.ndim != 1) or (upper.ndim != 1):
        raise ValueError('can handle only 1D bounds')
    if len(upper) != n:
        raise ValueError('bounds have different lengths')
    if n == 2 and corrcoef.size == 1:
        correl = corrcoef
        #print 'case scalar rho', n
    elif corrcoef.ndim == 1 and len(corrcoef) == n * (n - 1) / 2.0:
        #print 'case flat corr', corrcoeff.shape
        correl = corrcoef
    elif corrcoef.shape == (n, n):
        #print 'case square corr',  correl.shape
        correl = corrcoef[np.tril_indices(n, -1)]
#        for ii in range(n):
#            for jj in range(ii):
#                correl[ jj + ((ii-2)*(ii-1))/2] = corrcoef[ii,jj]
    else:
        raise ValueError('corrcoef has incorrect dimension')

    if 'maxpts' not in kwds:
        if n > 2:
            kwds['maxpts'] = 10000 * n

    lowinf = np.isneginf(lower)
    uppinf = np.isposinf(upper)
    infin = 2.0 * np.ones(n)

    np.putmask(infin, lowinf, 0) # infin.putmask(0,lowinf)
    np.putmask(infin, uppinf, 1) #infin.putmask(1,uppinf)
    #this has to be last
    np.putmask(infin, lowinf * uppinf, -1)

##    #remove infs
##    np.putmask(lower,lowinf,-100)# infin.putmask(0,lowinf)
##    np.putmask(upper,uppinf,100) #infin.putmask(1,uppinf)

    #print lower,',',upper,',',infin,',',correl
    #print correl.shape
    #print kwds.items()
    error, cdfvalue, inform = scipy.stats.kde.mvn.mvndst(lower, upper, infin, correl, **kwds)
    if inform:
        print('something wrong', informcode[inform], error)
    return cdfvalue

In [6]:
def mvnormcdf(upper, mu, cov, lower=None,  **kwds):
    '''multivariate normal cumulative distribution function

    This is a wrapper for scipy.stats.kde.mvn.mvndst which calculates
    a rectangular integral over a multivariate normal distribution.
    
    It standardises the two input normal distributions and applies the function
    'mvstdnormcdf'.

    Parameters
    ----------
    lower, upper : array_like, 1d
       lower and upper integration limits with length equal to the number
       of dimensions of the multivariate normal distribution. It can contain
       -np.inf or np.inf for open integration intervals
    mu : array_like, 1d
       list or array of means
    cov : array_like, 2d
       specifies covariance matrix
    optional keyword parameters to influence integration
        * maxpts : int, maximum number of function values allowed. This
             parameter can be used to limit the time. A sensible
             strategy is to start with `maxpts` = 1000*N, and then
             increase `maxpts` if ERROR is too large.
        * abseps : float absolute error tolerance.
        * releps : float relative error tolerance.

    Returns
    -------
    cdfvalue : float
        value of the integral

    Notes
    -----
    This function normalizes the location and scale of the multivariate
    normal distribution and then uses `mvstdnormcdf` to call the integration.

    See Also
    --------
    mvstdnormcdf : location and scale standardized multivariate normal cdf
    '''

    upper = np.array(upper)
    if lower is None:
        lower = -np.ones(upper.shape) * np.inf
    else:
        lower = np.array(lower)
    cov = np.array(cov)
    stdev = np.sqrt(np.diag(cov)) # standard deviation vector
    #do I need to make sure stdev is float and not int?
    #is this correct to normalize to corr?
    lower = (lower - mu) / stdev
    upper = (upper - mu) / stdev
    divrow = np.atleast_2d(stdev)
    corr = cov / divrow / divrow.T
    #v/np.sqrt(np.atleast_2d(np.diag(covv)))/np.sqrt(np.atleast_2d(np.diag(covv))).T

    return mvstdnormcdf(lower, upper, corr, **kwds)

In [7]:
# def pdfMarginals(distribution, parameters, values):
#     """It returns an array with the probability density function (pdf) of the given values.
    
#     Inputs:
#     -------
#     distribution: string. Name of the distribution in scipy.stats, such as 'frechet_r', 'genextreme'...
#     parameters:   list of floats. Fitter parameters for the distribution above
#     values:       array of floats. Series values
    
#     Ouput:
#     ------
#     pdf:          array of floats. Values of the variable of interest"""
    
#     if distribution == 'frechet_r':
#         pdf = weibull_min(*parameters).pdf(values)
#     elif distribution == 'genextreme':
#         pdf = genextreme(*parameters).pdf(values)
#     elif distribution == 'gumbel_r':
#         pdf = gumbel_r(*parameters).pdf(values)
#     elif distribution == 'weibull_max':
#         pdf = weibull_max(*parameters).pdf(values)
#     elif distribution == 'weibull_min':
#         pdf = weibull_min(*parameters).pdf(values)
#     elif distribution == 'norm':
#         pdf = norm(*parameters).pdf(values)
#     else:
#         return 'ERROR in the distribution or parameters.'
    
#     return pdf

In [8]:
# def cdfMarginals(distribution, parameters, values):
#     """It returns an array of non-exceedance probabilities for the input values given a distribution and its parameters.
    
#     Inputs:
#     -------
#     distribution: string. Name of the distribution in scipy.stats, such as 'frechet_r', 'genextreme'...
#     parameters:   list of floats. Fitter parameters for the distribution above
#     values:       array of floats. Series of values of the variable of interest for which to calculate the cuantile
    
#     Ouput:
#     ------
#     cdf:          array of floats. Quantiles"""
    
#     if distribution == 'frechet_r':
#         cdf = stats.weibull_min(*parameters).cdf(values)
#     elif distribution == 'genextreme':
#         cdf = stats.genextreme(*parameters).cdf(values)
#     elif distribution == 'gumbel_r':
#         cdf = stats.gumbel_r(*parameters).cdf(values)
#     elif distribution == 'weibull_max':
#         cdf = stats.weibull_max(*parameters).cdf(values)
#     elif distribution == 'weibull_min':
#         cdf = stats.weibull_min(*parameters).cdf(values)
#     elif distribution == 'norm':
#         cdf = stats.norm(*parameters).cdf(values)
#     else:
#         return 'ERROR in the distribution or parameters.'
        
#     return cdf

In [9]:
# def ppfMarginals(distribution, parameters, quantiles):
#     """It returns an array of values corresponding to the input quantiles given a distribution and its parameters.
    
#     Inputs:
#     -------
#     distribution: string. Name of the distribution in scipy.stats, such as 'frechet_r', 'genextreme'...
#     parameters:   list of floats. Fitter parameters for the distribution above
#     quantiles:    array of floats. Series of quantiles for which to calculate the value of the variable of interes
    
#     Ouput:
#     ------
#     ppf:          array of floats. Values of the variable of interest"""
    
#     if distribution == 'frechet_r':
#         ppf = weibull_min(*parameters).ppf(quantiles)
#     elif distribution == 'genextreme':
#         ppf = genextreme(*parameters).ppf(quantiles)
#     elif distribution == 'gumbel_r':
#         ppf = gumbel_r(*parameters).ppf(quantiles)
#     elif distribution == 'weibull_max':
#         ppf = weibull_max(*parameters).ppf(quantiles)
#     elif distribution == 'weibull_min':
#         ppf = weibull_min(*parameters).ppf(quantiles)
#     elif distribution == 'norm':
#         ppf = norm(*parameters).ppf(quantiles)
#     else:
#         return 'ERROR in the distribution or parameters.'
    
#     return ppf