In [4]:
import numpy as np
import math
import operator as op
import scipy
from scipy.stats import norm
from scipy.stats import gamma

In [5]:
class Distribution:
    'Parent Class for all Distributions'
    
    def __init__(self):
        pass
        
    def pdfEstimation(self, x):
        print("PDF doesn't exist for this method.")
    
    def cdfEstimation(self, x):
        print("CDF doesn't exist for this method.")
        
    def sample(self, sampleSize):
        print("Sampling doesn't exist for this method.")
    
    def printParameters(self):
        print("printParameters doesn't exist for this method.")
    
    def Mean(self):
        print("Mean doesn't exist for this method.")
    
    def Median(self):
        print("Median doesn't exist for this method.")
    
    def Mode(self):
        print("Mode doesn't exist for this method.")
    
    def Variance(self):
        print("Variance doesn't exist for this method.")
    
    def MaxLikelihoodEstimation(self, data):
        print("Maximum Likelihood Estimation Method doesn't exist for this method.")
        
    def BayesianEstimation(self):
        print("Bayesian Estimation Method doesn't exist for this method.")

In [6]:
class GaussianDistribution(Distribution):
    'General n-dimensional Gaussian Distribution'
    
    def __init__(self, dimension):
        self.dimension = dimension
        self.mean = np.zeros(shape=(dimension, 1))
        self.covariance = np.zeros(shape=(dimension, dimension))
        
    def pdfEstimation(self, X):
        return multivariate_normal.pdf(X, self.mean, self.covariance)
    
    def cdfEstimation(self, x):
        if self.dimension != 1:
            return -1
        return norm.cdf(x-self.mean)
        
    def sample(self, sampleSize):
        observation = np.zeros(shape=(sampleSize, self.dimension))
        for row in range(0, self.sampleSize):
            oservation[row] = np.random.multivariate_normal(self.mean, self.covariance)
        return observation
    
    def printParameters(self):
        print ("Gaussian Distribution with dimension: " + str(self.dimension))
        print ("Mean of the Distribution: " + str(self.mean))
        print ("Covariance of the Distribution: " + str(self.covariance))
    
    def Mean(self):
        return self.mean
    
    def Median(self):
        return self.mean
    
    def Mode(self):
        return self.mean
    
    def Variance(self):
        return self.covariance
    
    def MaxLikelihoodEstimation(self, data):
        if (data.shape[1] != self.dimension):
            print ("Data's Dimension: " + str(data.shape[0]) + ", doesn't match with Gaussions Distribution Dimension " + str(self.dimension))
        self.mean = np.mean(data, axis=0)
        self.covariance = np.cov(data, rowvar=0)
        
    def BayesianEstimation(self, data, mu0, sigma0, sigma):
        if self.dimension != 1:
            print ("Dimension not equal to 1, can't apply Bayesian Estimation Technique.")
        else:
            n = data.shape[0]
            mu = np.mean(data, axis=0)
            self.covariance = (sigma0*sigma0)*(sigma*sigma) / ((sigma*sigma)+(n*sigma0*sigma0))
            self.mean = ((sigma*sigma*mu0)+(n*sigma0*sigma0*mu)) / ((sigma*sigma)+(n*sigma0*sigma0))

In [7]:
class UniformDistribution(Distribution):
    'General n-dimensional Uniform Distribution'
    
    def __init__(self, dimension):
        self.dimension = dimension
        self.parameters = np.zeros(shape=(dimension,2))
        
    def pdfEstimation(self, X):
        pdf = 1.0
        for row in range(0, self.dimension):
            if (X[row] >= self.parameters[row][0]) and (X[row] <= self.parameters[row][1]):
                pdf *= (1/(self.parameters[row][1]-self.parameters[row][0]))
            else:
                return 0.0
        return pdf
        
    def cdfEstimate(self, X):
        cdf = 1.0
        for row in range(0, self.dimension):
            if (X[row] >= self.parameters[row][0]):
                cdf *= ((min(self.parameters[row][1], X[row]) - self.parameters[row][0])/(self.parameters[row][1] - self.parameters[row][0]))
            else:
                return 0.0
        return cdf
        
    def sample(self, sampleSize):
        observation = np.zeros(shape=(sampleSize, self.dimension))
        for row in range(0, sampleSize):
            for col in range(0, self.dimesion):
                observation[row][col] = np.random.uniform(self.parameters[col][0], self.parameters[col][1], 1)
        return observation
    
    def printParameters(self):
        print ("Uniform Distribution in " + str(self.dimension) + " Dimension with paramters: ")
        for row in range(0, self.dimension):
            print ("( " + str(self.parameters[row][0]) + ", " + str(self.parameters[row][1]) + " )")
        
    def Mean(self):
        mean = np.zeros(shape=(self.dimension, 1))
        for row in range(0, self.dimension):
            mean[row]  = ((self.parameters[row][0] + self.parameters[row][1])/2)
        return mean
    
    def Median(self):
        median = np.zeros(shape=(self.dimension, 1))
        for row in range(0, self.dimension):
            median[row]  = ((self.parameters[row][0] + self.parameters[row][1])/2)
        return median
    
    def Variance(self):
        variance = np.zeros(shape=(self.dimension, 1))
        for row in range(0, self.dimension):
            variance[row]  = (((self.parameters[row][1] - self.parameters[row][0])**2)/12)
        return variance
    
    def Covariance(self):
        covariance = np.zeros(shape=(self.dimension, self.dimension))
        for row in range(0, self.dimension):
            for col in range(0, self.dimension):
                if (row != col):
                    covariance[row][col] = 0
                else:
                    covariance[row][col]  = (((self.parameters[row][1] - self.parameters[row][0])**2)/12)
        return covariance
    
    def MaxLikelihoodEstimation(self, data):
        n = data.shape[0]
        for col in range(0, self.dimension):
            self.parameters[col][0] = data[0][col]
            self.parameters[col][1] = data[0][col]
        for row in range(1, n):
            for col in range(0, self.dimension):
                self.parameters[col][0] = min(self.parameters[col][0], data[row][col])
                self.parameters[col][1] = max(self.parameters[col][1], data[row][col])

In [8]:
class BernoulliDistribution(Distribution):
    '1-dimensional Bernoulli Distribution'
    
    def __init__(self, p=0):
        self.p = p
        
    def pdfEstimation(self, x):
        if x == 1:
            return self.p
        else:
            return (1-self.p)
        
    def cdfEstimate(self, x):
        if x < 0:
            return 0
        elif x >= 1:
            return 1
        else:
            return (1-self.p)
        
    def sample(self, sampleSize):
        observation = np.random.binomial(1, self.p, sampleSize)
        return observation
    
    def printParameters(self):
        print ("Bernoulli Distribution with probability of success(1): " + str(self.p))
        
    def Mean(self):
        return self.p
    
    def Median(self):
        if (1-self.p) > self.p:
            return 0
        elif self.p > (1-self.p):
            return 1
        else:
            return 0.5
        
    def Mode(self):
        if (1-self.p) >= self.p:
            return 0
        else:
            return 1
    
    def Variance(self):
        return (self.p*(1-self.p))
    
    def MaxLikelihoodEstimation(self, data):
        n = data.shape[0]
        numOfSucess = 0
        for i in range(0, n):
            numOfSucess += data[i]
        self.p = float(numOfSucess/n)
        
    def BayesianEstimation(self, data, a, b):
        n = data.shape[0]
        numOfSucess = 0
        for i in range(0, n):
            numOfSucess += data[i]
        self.p = float((numOfSucess + a - 1)/(n + a + b -2))

In [9]:
class BetaDistribution(Distribution):
    'Beta Distribution for 1 dimension'
    
    def __init__(self,alpha,beta):
        self.alpha = alpha
        self.beta = beta
        
        
    def pdfEstimation(self, X):
        return scipy.beta.pdf(X,self.alpha,self.beta,loc=0,scale=1)
    
    def cdfEstimation(self, x):
        return scipy.beta.cdf(X,self.alpha,self.beta,loc=0,scale=1)
        
    def sample(self):
        return beta.rvs(self.alpha, self.beta, loc=0, scale=1, size=1, random_state=None)
    
    def printParameters(self):
        print ("Beta Distribution with alpha: " + str(self.alpha)+", beta: "+str(self.beta))
    
    def Mean(self):
        return beta.mean(self.alpha,self.beta,loc=0,scale=1)
    
    def Median(self):
        return beta.median(self.alpha,self.beta,loc=0,scale=1)
    
    def Mode(self):
        return -1
    
    def Variance(self):
        return beta.var(self.alpha,self.beta,loc=0,scale=1)
    
    def MaxLikelihoodEstimation(self, data):
        #the parameters are estimated using inbuilt functions so not sure of mle or not
        self.alpha, self.beta,loc,scale = beta.fit(data)
        return beta.fit(data)

In [10]:
def ncr(n, r):
    r = min(r, n-r)
    numer = reduce(op.mul, xrange(n, n-r, -1), 1)
    denom = reduce(op.mul, xrange(1, r+1), 1)
    return numer//denom

In [11]:
class BinomialDistribution(Distribution):
    'Binomial ditribution for 1 dimension'
    
    def __init__(self,numberOfTrials,probOfSuccess):
        #self.dimension = dimension
        self.n = numberOfTrials
        self.p = probOfSuccess

    def pdfEstimation(self,k):
        p1 = pow(self.p,k)
        p2 = pow(1-self.p,self.n-k)
        p3 = ncr(self.n,k)
        p4 = p1*p2*p3
        return p4
    
    def cdfEstimation(self,X):
        return scipy.stats.binom.cdf(X, self.n, self.p)
        
    def Mean(self):
        return self.n*self.p
    
    def Variance(self):
        return self.n*self.p*(1-self.p)
    
    def Mode(self):
        temp (int)((self.n+1)*self.p)
        return temp
    
    def Median(self):
        temp = (int)(self.n*self.p)
        return temp
    
    def printParameters(self):
        print ("Binomial Distribution with n and p: " + str(self.n)+" , "+str(self.p))
        
    def MaxLikelihoodEstimation(self,data):
        NumberOfSamples = data.shape[0]
        sumTemp = 0
        for i in range(0,NumberOfSamples):
            sumTemp += data[i]/self.n
        self.p = sumTemp/NumberOfSamples

In [12]:
class ExponentialDistribution(Distribution):
    'Exponential ditribution for 1 dimension'
    
    def __init__(self,lambdaVariable):
        #self.dimension = dimension
        self.lambdaVar = lambdaVariable
        self.beta = 1/self.lambdaVar

    def pdfEstimation(self,X):
        p = self.lambdaVar*(math.exp(-1*self.lambdaVar*X))
        return p
    
    def cdfEstimation(self,X):
        if x >=0:
            temp = 1 - exp(-1*self.lambdaVar*X)
            return temp
        else:
            return -1
        
    def Mean(self):
        return self.beta
    
    def Variance(self):
        return math.pow(self.beta,2)
    
    def Mode(self):
        return 0
    
    def Median(self):
        return self.beta*log(2)
    
    def printParameters(self):
        print ("Exponential Distribution with lambda: " + str(self.lambdaVar))

    def MaxLikelihoodEstimation(self,data):
        n = data.shape[0]
        sum=0
        for i in range (0,n):
            sum+=data[i]
        self.lambdaVar = n/sum[0]
        self.beta = 1/self.lambdaVar

In [13]:
class GammaDistribution(Distribution):
    '1-dimensional Gamma Distribution'
    
    def __init__(self, k, t):
        self.k = k
        self.t = t
        
    def pdfEstimation(self, x):
        if x>=0 and self.k>0 and self.t>0:
            return gamma.pdf(x, self.k, 0, self.t)
        else:
            return "There is no simple way to find out the pdf of this distribution."
        
    def cdfEstimate(self, x):
        if x>=0 and self.k>0 and self.t>0:
            return gamma.cdf(x, self.k, 0, self.t)
        else:
            return "There is no simple way to find out the cdf of this distribution."
        
    def sample(self, sampleSize):
        if self.k>0 and self.t>0:
            observation = gamma.rvs(self.k, 0, self.t, sampleSize)
            return observation
        else:
            return "Assumptions are not upto the mark."
    
    def printParameters(self):
        print ("Shape parameter of Gamma Distribution is " + str(self.k))
        print ("Scale parameter of Gamma Distribution is " + str(self.t))
        
    def Mean(self):
        return self.k*self.t
    
    def Median(self):
        return "No simple way to produce median of gamma distribution."
        
    def Mode(self):
        if self.k>=1:
            return (self.k-1)*self.t
        else:
            return "Shape parameter is less than 1 so we can't fnd the mode."
    
    def Variance(self):
        return self.k*(self.t**2)
    
    def MaxLikelihoodEstimation(self, data):
        n = data.shape[0]
        a = self.k
        xo = 0
        x1 = 0
        for i in range(0, n):
            xo += data[i]
            x1 = math.log(data[i])
        self.t = float(xo/a)
        self.k = float(0.5/(math.log(xo) - x1))

In [14]:
class GaussianMixtureModel(Distribution):
    'A two Mode one feature GMM'
    
    def __init__(self):
        self.modes = 2
        self.dimension = 1
        
    def initialGuess(self, guess1, guess2):
        self.mean1 = np.array([guess1])
        self.sigma1 = np.array([[1]])
        self.weight1 = 0.5

        self.mean2 = np.array([guess2])
        self.sigma2 = np.array([[1]])
        self.weight2 = 0.5
        
    def probability(self, x, mean, sigma, weight):
        return weight*(norm.pdf(x, mean, sigma))
    
    def expectation(self, X):
        labels = np.zeros((X.shape[0], 1))
        n = X.shape[0]
        for i in range(0, n):
            p1 = self.probability(X[i], self.mean1, self.sigma1, self.weight1)
            p2 = self.probability(X[i], self.mean2, self.sigma2, self.weight2)
            if p1>p2:
                labels[i] = 0
            else:
                labels[i] = 1
        return np.concatenate((X, labels), axis=1)
        
    def maximization(self, X):
        n = X.shape[0]
        x1 = []
        x2 = []
        fraction1 = 0
        fraction2 = 0
        for i in range(0, n):
            label = X[i, 1]
            if label == 0:
                x1 = np.append(x1, X[i, 0])
                fraction1 += 1
            else:
                x2 = np.append(x2, X[i, 0])
                fraction2 += 1
        print(x1)
        print(x2)
        if fraction1 == 0:
            self.weight1 = 0.5
            self.weight2 = 0.5
            self.mean1 = self.mean2 - 1
            self.sigma1 = self.sigma2 - 1
        elif fraction2 == 0:
            self.weight1 = 0.5
            self.weight2 = 0.5
            self.mean2 = self.mean1 - 1
            self.sigma2 = self.sigma1 - 1
        else:
            self.weight1 = fraction1 / n
            self.weight2 = fraction2 / n
            self.mean1 = np.mean(x1)
            self.mean2 = np.mean(x2)
            self.sigma1 = np.std(x1)
            self.sigma2 = np.std(x2)
        
    def distance(self, mean1old, mean2old):
        dist = 0
        dist += (self.mean1 - mean1old) ** 2
        dist += (self.mean2 - mean2old) ** 2
        return (dist ** 0.5)
    
    def expactationMaximasation(self, epsilon, X, guess1, guess2):
        error = math.inf
        iters = 0
        self.initialGuess(guess1, guess2)
        while error > epsilon:
            iters += 1
            Xlabelled = self.expectation(X)
            print(Xlabelled)
            mean1old = self.mean1
            mean2old = self.mean2
            self.maximization(Xlabelled)
            print(self.mean1)
            print(self.mean2)
            error = self.distance(mean1old, mean2old)
            print(str(iters) + ": " + str(error))