In [1]:
import numpy as np
import time
from numba import njit, float64, int64, types
from GMM_with_EM_Multivariate import GMM_EM_multi
from sklearn.mixture import GaussianMixture as GMM

# GMM_EM_multi_njit (eager compilation)

In [2]:
@njit('float64[:](float64[:,:],float64[:],float64[:,:])')
def multi_ll_njit(X, mu, cov):
    n = X.shape[0]
    p = X.shape[1]
    res = np.zeros(n)
    
    for i in range(n):
        exp_inter = np.dot(np.dot((X[i, :] - mu).T, np.linalg.inv(cov)), 
                           (X[i, :] - mu)) / 2.
        res[i] = (2*np.pi)**(-p/2) * np.linalg.det(cov)**(-0.5)*np.exp(-exp_inter)
    
    return res



@njit('float64(float64[:])')
def nb_sum(X):
    res = 0.0
    for i in range(X.shape[0]):
        res += X[i]
    
    return res


r_sig = types.Tuple([float64[:,:],float64[:,:,:],float64[:],int64])
sig = r_sig(float64[:,:],float64[:,:],float64[:,:,:],int64,float64[:],float64[:,:],float64)


@njit(sig)
def GMM_EM_multi_njit(X, mu, cov, max_iter, tau, q, tol = 1e-08):
    n = X.shape[0]
    p = X.shape[1]
    K = mu.shape[0]
    
    for iteration in range(max_iter):
        for k in range(K):
            ll = multi_ll_njit(X, mu[k, :], cov[:, :, k])
            q[:, k] = tau[k] * ll
            
        for i in range(n):
            q[i, :] /=  nb_sum(q[i, :])
        
        mu_before = mu
        cov_before = cov
        tau_before = tau
        
        for k in range(K):
            q_k = nb_sum(q[:, k])
            q = np.ascontiguousarray(q[:, k]).reshape(n,1)
            mu[k, :] = np.sum(q * X, axis = 0) / q_k
            cov[:, :, k] = np.dot((q*(X - mu[k, :])).T, (X - mu[k, :])) / q_k
            tau[k] = q_k / n
        
        mu_diff = np.max(np.abs(mu - mu_before))
        cov_diff = np.max(np.abs(cov - cov_before))
        tau_diff = np.max(np.abs(tau - tau_before))
        
        diff = np.max(np.array([np.abs(mu_diff), np.abs(cov_diff), np.abs(tau_diff)]))
        
        if ( (iteration > 1 ) & (diff < tol)):
            break
        
    return mu, cov, tau, iteration

# GMM_EM_multi_njit (lazy compilation)

In [3]:
@njit
def multi_ll_njit_lazy(X, mu, cov):
    n = X.shape[0]
    p = X.shape[1]
    res = np.zeros(n)
    
    for i in range(n):
        exp_inter = np.dot(np.dot((X[i, :] - mu).T, np.linalg.inv(cov)), 
                           (X[i, :] - mu)) / 2.
        res[i] = (2*np.pi)**(-p/2) * np.linalg.det(cov)**(-0.5)*np.exp(-exp_inter)
    
    return res



@njit
def nb_sum_lazy(X):
    res = 0.0
    for i in range(X.shape[0]):
        res += X[i]
    
    return res


@njit
def GMM_EM_multi_njit_lazy(X, mu, cov, max_iter, tau, q, tol = 1e-08):
    n = X.shape[0]
    p = X.shape[1]
    K = mu.shape[0]
    
    for iteration in range(max_iter):
        for k in range(K):
            ll = multi_ll_njit_lazy(X, mu[k, :], cov[:, :, k])
            q[:, k] = tau[k] * ll
            
        for i in range(n):
            q[i, :] /=  nb_sum_lazy(q[i, :])
        
        mu_before = mu
        cov_before = cov
        tau_before = tau
        
        for k in range(K):
            q_k = nb_sum_lazy(q[:, k])
            q = np.ascontiguousarray(q[:, k]).reshape(n,1)
            mu[k, :] = np.sum(q * X, axis = 0) / q_k
            cov[:, :, k] = np.dot((q*(X - mu[k, :])).T, (X - mu[k, :])) / q_k
            tau[k] = q_k / n
        
        mu_diff = np.max(np.abs(mu - mu_before))
        cov_diff = np.max(np.abs(cov - cov_before))
        tau_diff = np.max(np.abs(tau - tau_before))
        
        diff = np.max(np.array([np.abs(mu_diff), np.abs(cov_diff), np.abs(tau_diff)]))
        
        if ( (iteration > 1 ) & (diff < tol)):
            break
        
    return mu, cov, tau, iteration

## Setting 5

### Seed: Fixed, Size: 600, N_comp: 2

In [4]:
np.random.seed(42)
time_list_naive_multi = []

for i in range(10):
    
    x1 = np.random.multivariate_normal([1.5, 2.5], [[1.2,0.4],[0.4,1.1]],size= 300)
    x2 = np.random.multivariate_normal([7.3, 10.2], [[1.5,0.5],[0.5,2.1]],size= 300)
    X_tot = np.vstack((x1,x2))
    
    mu = np.array([[1.,2.],[6.,8.]])
    cov = np.array([[[1.,1.2],[0.,0.2]],[[0.,0.2],[1.,1.5]]])
    q= np.zeros((len(X_tot),2))
    tau = np.array([1/2,1/2])
    
    t1 = time.time()
    GMM_EM_multi(X_tot, mu, cov, 10000 , tau , q)
    t2 = time.time()
    
    time_list_naive_multi.append(t2-t1)

In [5]:
np.random.seed(42)
time_list_njit_multi = []

for i in range(10):
    
    x1 = np.random.multivariate_normal([1.5, 2.5], [[1.2,0.4],[0.4,1.1]],size= 300)
    x2 = np.random.multivariate_normal([7.3, 10.2], [[1.5,0.5],[0.5,2.1]],size= 300)
    X_tot = np.vstack((x1,x2))
    
    mu = np.array([[1.,2.],[6.,8.]])
    cov = np.array([[[1.,1.2],[0.,0.2]],[[0.,0.2],[1.,1.5]]])
    q= np.zeros((len(X_tot),2))
    tau = np.array([1/2,1/2])
    
    t1 = time.time()
    GMM_EM_multi_njit(X_tot, mu, cov, 10000 , tau , q, 1e-08)
    t2 = time.time()
    
    time_list_njit_multi.append(t2-t1)

In [6]:
np.random.seed(42)
time_list_njit_lazy_multi = []

for i in range(10):
    
    x1 = np.random.multivariate_normal([1.5, 2.5], [[1.2,0.4],[0.4,1.1]],size= 300)
    x2 = np.random.multivariate_normal([7.3, 10.2], [[1.5,0.5],[0.5,2.1]],size= 300)
    X_tot = np.vstack((x1,x2))
    
    mu = np.array([[1.,2.],[6.,8.]])
    cov = np.array([[[1.,1.2],[0.,0.2]],[[0.,0.2],[1.,1.5]]])
    q= np.zeros((len(X_tot),2))
    tau = np.array([1/2,1/2])
    
    t1 = time.time()
    GMM_EM_multi_njit_lazy(X_tot, mu, cov, 10000 , tau , q, 1e-08)
    t2 = time.time()
    
    time_list_njit_lazy_multi.append(t2-t1)

In [7]:
cov = np.array([[[1.,1.2],[0.,0.2]],[[0.,0.2],[1.,1.5]]])
precision = np.array([np.linalg.inv(cov[:,:,0]),np.linalg.inv(cov[:,:,1])])

In [8]:
time_list_sklearn_multi = []

np.random.seed(42)
for i in range(10):
    
    x1 = np.random.multivariate_normal([1.5, 2.5], [[1.2,0.4],[0.4,1.1]],size= 300)
    x2 = np.random.multivariate_normal([7.3, 10.2], [[1.5,0.5],[0.5,2.1]],size= 300)
    X_tot = np.vstack((x1,x2))
    
    t1 = time.time()
    gmm = GMM(n_components= 2, random_state= 42 , covariance_type="full" 
              ,means_init=np.array([[1.5,2.5],[7.3,10.2]]), precisions_init= precision
              ,max_iter=10000)
    gmm.fit(X_tot)
    t2 = time.time()
    
    time_list_sklearn_multi.append(t2-t1)

In [9]:
print("Setting 5")
print("naive time: " + str(np.mean(time_list_naive_multi)))
print("njit time (with compilation): " + str(time_list_njit_multi[0]))
print("njit time (after compilation): " + str(np.mean(time_list_njit_multi[1:])))
print("njit time (with lazy compilation): " + str(time_list_njit_lazy_multi[0]))
print("njit time (after lazy compilation): " + str(np.mean(time_list_njit_lazy_multi[1:])))
print("sklearn time: " + str(np.mean(time_list_sklearn_multi)))

Setting 5
naive time: 0.07535848617553711
njit time (with compilation): 0.004920482635498047
njit time (after compilation): 0.004645877414279514
njit time (with lazy compilation): 1.9705290794372559
njit time (after lazy compilation): 0.004454639222886827
sklearn time: 0.0033439159393310546


In [10]:
print("naive std: "+ str(np.std(time_list_naive_multi)/np.sqrt(10)))
print("njit std: " + str(np.std(time_list_njit_multi[1:])/np.sqrt(9)))
print("njit lazy std: " + str(np.std(time_list_njit_lazy_multi[1:])/np.sqrt(9)))
print("sklearn std: " + str(np.std(time_list_sklearn_multi)/np.sqrt(10)))

naive std: 0.0005278902522552766
njit std: 4.974910424953586e-06
njit lazy std: 3.0129577271252944e-05
sklearn std: 0.0002362506559667699


## Setting 6

### Redefine the function for just-in-time compilation

In [11]:
@njit('float64[:](float64[:,:],float64[:],float64[:,:])')
def multi_ll_njit(X, mu, cov):
    n = X.shape[0]
    p = X.shape[1]
    res = np.zeros(n)
    
    for i in range(n):
        exp_inter = np.dot(np.dot((X[i, :] - mu).T, np.linalg.inv(cov)), 
                           (X[i, :] - mu)) / 2.
        res[i] = (2*np.pi)**(-p/2) * np.linalg.det(cov)**(-0.5)*np.exp(-exp_inter)
    
    return res



@njit('float64(float64[:])')
def nb_sum(X):
    res = 0.0
    for i in range(X.shape[0]):
        res += X[i]
    
    return res


r_sig = types.Tuple([float64[:,:],float64[:,:,:],float64[:],int64])
sig = r_sig(float64[:,:],float64[:,:],float64[:,:,:],int64,float64[:],float64[:,:],float64)


@njit(sig)
def GMM_EM_multi_njit(X, mu, cov, max_iter, tau, q, tol = 1e-08):
    n = X.shape[0]
    p = X.shape[1]
    K = mu.shape[0]
    
    for iteration in range(max_iter):
        for k in range(K):
            ll = multi_ll_njit(X, mu[k, :], cov[:, :, k])
            q[:, k] = tau[k] * ll
            
        for i in range(n):
            q[i, :] /=  nb_sum(q[i, :])
        
        mu_before = mu
        cov_before = cov
        tau_before = tau
        
        for k in range(K):
            q_k = nb_sum(q[:, k])
            q = np.ascontiguousarray(q[:, k]).reshape(n,1)
            mu[k, :] = np.sum(q * X, axis = 0) / q_k
            cov[:, :, k] = np.dot((q*(X - mu[k, :])).T, (X - mu[k, :])) / q_k
            tau[k] = q_k / n
        
        mu_diff = np.max(np.abs(mu - mu_before))
        cov_diff = np.max(np.abs(cov - cov_before))
        tau_diff = np.max(np.abs(tau - tau_before))
        
        diff = np.max(np.array([np.abs(mu_diff), np.abs(cov_diff), np.abs(tau_diff)]))
        
        if ( (iteration > 1 ) & (diff < tol)):
            break
        
    return mu, cov, tau, iteration

In [12]:
@njit
def multi_ll_njit_lazy(X, mu, cov):
    n = X.shape[0]
    p = X.shape[1]
    res = np.zeros(n)
    
    for i in range(n):
        exp_inter = np.dot(np.dot((X[i, :] - mu).T, np.linalg.inv(cov)), 
                           (X[i, :] - mu)) / 2.
        res[i] = (2*np.pi)**(-p/2) * np.linalg.det(cov)**(-0.5)*np.exp(-exp_inter)
    
    return res



@njit
def nb_sum_lazy(X):
    res = 0.0
    for i in range(X.shape[0]):
        res += X[i]
    
    return res


@njit
def GMM_EM_multi_njit_lazy(X, mu, cov, max_iter, tau, q, tol = 1e-08):
    n = X.shape[0]
    p = X.shape[1]
    K = mu.shape[0]
    
    for iteration in range(max_iter):
        for k in range(K):
            ll = multi_ll_njit_lazy(X, mu[k, :], cov[:, :, k])
            q[:, k] = tau[k] * ll
            
        for i in range(n):
            q[i, :] /=  nb_sum_lazy(q[i, :])
        
        mu_before = mu
        cov_before = cov
        tau_before = tau
        
        for k in range(K):
            q_k = nb_sum_lazy(q[:, k])
            q = np.ascontiguousarray(q[:, k]).reshape(n,1)
            mu[k, :] = np.sum(q * X, axis = 0) / q_k
            cov[:, :, k] = np.dot((q*(X - mu[k, :])).T, (X - mu[k, :])) / q_k
            tau[k] = q_k / n
        
        mu_diff = np.max(np.abs(mu - mu_before))
        cov_diff = np.max(np.abs(cov - cov_before))
        tau_diff = np.max(np.abs(tau - tau_before))
        
        diff = np.max(np.array([np.abs(mu_diff), np.abs(cov_diff), np.abs(tau_diff)]))
        
        if ( (iteration > 1 ) & (diff < tol)):
            break
        
    return mu, cov, tau, iteration

### Seed: Changed, Size: 600, N_comp: 2

In [13]:
time_list_naive_multi_seed = []

for j in range(10):
    np.random.seed(42+j)

    for i in range(10):
        
        x1 = np.random.multivariate_normal([1.5, 2.5], [[1.2,0.4],[0.4,1.1]],size= 300)
        x2 = np.random.multivariate_normal([7.3, 10.2], [[1.5,0.5],[0.5,2.1]],size= 300)
        X_tot = np.vstack((x1,x2))
        
        mu = np.array([[1.,2.],[6.,8.]])
        cov = np.array([[[1.,1.2],[0.,0.2]],[[0.,0.2],[1.,1.5]]])
        q= np.zeros((len(X_tot),2))
        tau = np.array([1/2,1/2])
        
        t1 = time.time()
        GMM_EM_multi(X_tot, mu, cov, 10000 , tau , q)
        t2 = time.time()
        
        time_list_naive_multi_seed.append(t2-t1)

In [14]:
time_list_njit_multi_seed = []

for j in range(10):
    np.random.seed(42+j)

    for i in range(10):
        
        x1 = np.random.multivariate_normal([1.5, 2.5], [[1.2,0.4],[0.4,1.1]],size= 300)
        x2 = np.random.multivariate_normal([7.3, 10.2], [[1.5,0.5],[0.5,2.1]],size= 300)
        X_tot = np.vstack((x1,x2))
        
        mu = np.array([[1.,2.],[6.,8.]])
        cov = np.array([[[1.,1.2],[0.,0.2]],[[0.,0.2],[1.,1.5]]])
        q= np.zeros((len(X_tot),2))
        tau = np.array([1/2,1/2])
        
        t1 = time.time()
        test=GMM_EM_multi_njit(X_tot, mu, cov, 10000 , tau , q, 1e-15)
        t2 = time.time()
        
        time_list_njit_multi_seed.append(t2-t1)

In [15]:
time_list_njit_lazy_multi_seed = []

for j in range(10):
    np.random.seed(42+j)

    for i in range(10):
        
        x1 = np.random.multivariate_normal([1.5, 2.5], [[1.2,0.4],[0.4,1.1]],size= 300)
        x2 = np.random.multivariate_normal([7.3, 10.2], [[1.5,0.5],[0.5,2.1]],size= 300)
        X_tot = np.vstack((x1,x2))
        
        mu = np.array([[1.,2.],[6.,8.]])
        cov = np.array([[[1.,1.2],[0.,0.2]],[[0.,0.2],[1.,1.5]]])
        q= np.zeros((len(X_tot),2))
        tau = np.array([1/2,1/2])
        
        t1 = time.time()
        test=GMM_EM_multi_njit_lazy(X_tot, mu, cov, 10000 , tau , q, 1e-08)
        t2 = time.time()
        
        time_list_njit_lazy_multi_seed.append(t2-t1)

In [16]:
time_list_sklearn_multi_seed = []
for j in range(10):
    np.random.seed(42+j)
    for i in range(10):
        
        x1 = np.random.multivariate_normal([1.5, 2.5], [[1.2,0.4],[0.4,1.1]],size= 300)
        x2 = np.random.multivariate_normal([7.3, 10.2], [[1.5,0.5],[0.5,2.1]],size= 300)
        X_tot = np.vstack((x1,x2))
        
        t1 = time.time()
        gmm = GMM(n_components= 2, random_state= 42+j , covariance_type="full" 
                  ,means_init=np.array([[1.5,2.5],[7.3,10.2]]), precisions_init= precision
                  ,max_iter=10000)
        gmm.fit(X_tot)
        t2 = time.time()
        
        time_list_sklearn_multi_seed.append(t2-t1)

In [17]:
print("Setting 6")
print("naive time: "+ str(np.mean(time_list_naive_multi_seed)))
print("njit time (with compilation): " + str(time_list_njit_multi_seed[0]))
print("njit time (after compilation): "+ str(np.mean(time_list_njit_multi_seed[1:])))
print("njit time (with lazy compilation): " + str(time_list_njit_lazy_multi_seed[0]))
print("njit time (after lazy compilation): "+ str(np.mean(time_list_njit_lazy_multi_seed[1:])))
print("sklearn time: " + str(np.mean(time_list_sklearn_multi_seed)))

Setting 6
naive time: 0.07499917984008789
njit time (with compilation): 0.004672050476074219
njit time (after compilation): 0.004335285437227499
njit time (with lazy compilation): 1.9490966796875
njit time (after lazy compilation): 0.00421130055128926
sklearn time: 0.0027690386772155763


In [18]:
print("naive std: " + str(np.std(time_list_naive_multi_seed)/np.sqrt(100)))
print("njit std: " + str(np.std(time_list_njit_multi_seed[1:])/np.sqrt(99)))
print("njit lazy std: " + str(np.std(time_list_njit_lazy_multi_seed[1:])/np.sqrt(99)))
print("sklearn std: " + str(np.std(time_list_sklearn_multi_seed)/np.sqrt(100)))

naive std: 4.6924397757193834e-05
njit std: 1.4149576588299546e-05
njit lazy std: 5.0585211762027565e-06
sklearn std: 4.9050544299818744e-05


## Setting 7

### Redefine the function for just-in-time compilation

In [19]:
@njit('float64[:](float64[:,:],float64[:],float64[:,:])')
def multi_ll_njit(X, mu, cov):
    n = X.shape[0]
    p = X.shape[1]
    res = np.zeros(n)
    
    for i in range(n):
        exp_inter = np.dot(np.dot((X[i, :] - mu).T, np.linalg.inv(cov)), 
                           (X[i, :] - mu)) / 2.
        res[i] = (2*np.pi)**(-p/2) * np.linalg.det(cov)**(-0.5)*np.exp(-exp_inter)
    
    return res



@njit('float64(float64[:])')
def nb_sum(X):
    res = 0.0
    for i in range(X.shape[0]):
        res += X[i]
    
    return res


r_sig = types.Tuple([float64[:,:],float64[:,:,:],float64[:],int64])
sig = r_sig(float64[:,:],float64[:,:],float64[:,:,:],int64,float64[:],float64[:,:],float64)


@njit(sig)
def GMM_EM_multi_njit(X, mu, cov, max_iter, tau, q, tol = 1e-08):
    n = X.shape[0]
    p = X.shape[1]
    K = mu.shape[0]
    
    for iteration in range(max_iter):
        for k in range(K):
            ll = multi_ll_njit(X, mu[k, :], cov[:, :, k])
            q[:, k] = tau[k] * ll
            
        for i in range(n):
            q[i, :] /=  nb_sum(q[i, :])
        
        mu_before = mu
        cov_before = cov
        tau_before = tau
        
        for k in range(K):
            q_k = nb_sum(q[:, k])
            q = np.ascontiguousarray(q[:, k]).reshape(n,1)
            mu[k, :] = np.sum(q * X, axis = 0) / q_k
            cov[:, :, k] = np.dot((q*(X - mu[k, :])).T, (X - mu[k, :])) / q_k
            tau[k] = q_k / n
        
        mu_diff = np.max(np.abs(mu - mu_before))
        cov_diff = np.max(np.abs(cov - cov_before))
        tau_diff = np.max(np.abs(tau - tau_before))
        
        diff = np.max(np.array([np.abs(mu_diff), np.abs(cov_diff), np.abs(tau_diff)]))
        
        if ( (iteration > 1 ) & (diff < tol)):
            break
        
    return mu, cov, tau, iteration

In [20]:
@njit
def multi_ll_njit_lazy(X, mu, cov):
    n = X.shape[0]
    p = X.shape[1]
    res = np.zeros(n)
    
    for i in range(n):
        exp_inter = np.dot(np.dot((X[i, :] - mu).T, np.linalg.inv(cov)), 
                           (X[i, :] - mu)) / 2.
        res[i] = (2*np.pi)**(-p/2) * np.linalg.det(cov)**(-0.5)*np.exp(-exp_inter)
    
    return res



@njit
def nb_sum_lazy(X):
    res = 0.0
    for i in range(X.shape[0]):
        res += X[i]
    
    return res


@njit
def GMM_EM_multi_njit_lazy(X, mu, cov, max_iter, tau, q, tol = 1e-08):
    n = X.shape[0]
    p = X.shape[1]
    K = mu.shape[0]
    
    for iteration in range(max_iter):
        for k in range(K):
            ll = multi_ll_njit_lazy(X, mu[k, :], cov[:, :, k])
            q[:, k] = tau[k] * ll
            
        for i in range(n):
            q[i, :] /=  nb_sum_lazy(q[i, :])
        
        mu_before = mu
        cov_before = cov
        tau_before = tau
        
        for k in range(K):
            q_k = nb_sum_lazy(q[:, k])
            q = np.ascontiguousarray(q[:, k]).reshape(n,1)
            mu[k, :] = np.sum(q * X, axis = 0) / q_k
            cov[:, :, k] = np.dot((q*(X - mu[k, :])).T, (X - mu[k, :])) / q_k
            tau[k] = q_k / n
        
        mu_diff = np.max(np.abs(mu - mu_before))
        cov_diff = np.max(np.abs(cov - cov_before))
        tau_diff = np.max(np.abs(tau - tau_before))
        
        diff = np.max(np.array([np.abs(mu_diff), np.abs(cov_diff), np.abs(tau_diff)]))
        
        if ( (iteration > 1 ) & (diff < tol)):
            break
        
    return mu, cov, tau, iteration

### Seed: Fixed, Size: 40000, N_comp: 2

In [21]:
time_list_naive_multi_large = []
np.random.seed(42)

for i in range(10):
    
    x1 = np.random.multivariate_normal([1.5, 2.5], [[1.2,0.4],[0.4,1.1]],size= 20000)
    x2 = np.random.multivariate_normal([7.3, 10.2], [[1.5,0.5],[0.5,2.1]],size= 20000)
    X_tot = np.vstack((x1,x2))
    
    mu = np.array([[1.,2.],[6.,8.]])
    cov = np.array([[[1.,1.2],[0.,0.2]],[[0.,0.2],[1.,1.5]]])
    q= np.zeros((len(X_tot),2))
    tau = np.array([1/2,1/2])
    
    t1 = time.time()
    GMM_EM_multi(X_tot, mu, cov, 10000 , tau , q)
    t2 = time.time()
    
    time_list_naive_multi_large.append(t2-t1)

In [22]:
time_list_njit_multi_large = []

np.random.seed(42)

for i in range(10):
    
    x1 = np.random.multivariate_normal([1.5, 2.5], [[1.2,0.4],[0.4,1.1]],size= 20000)
    x2 = np.random.multivariate_normal([7.3, 10.2], [[1.5,0.5],[0.5,2.1]],size= 20000)
    X_tot = np.vstack((x1,x2))
    
    mu = np.array([[1.,2.],[6.,8.]])
    cov = np.array([[[1.,1.2],[0.,0.2]],[[0.,0.2],[1.,1.5]]])
    q= np.zeros((len(X_tot),2))
    tau = np.array([1/2,1/2])
    
    t1 = time.time()
    GMM_EM_multi_njit(X_tot, mu, cov, 10000 , tau , q, 1e-08)
    t2 = time.time()
    
    time_list_njit_multi_large.append(t2-t1)

In [23]:
time_list_njit_lazy_multi_large = []

np.random.seed(42)

for i in range(10):
    
    x1 = np.random.multivariate_normal([1.5, 2.5], [[1.2,0.4],[0.4,1.1]],size= 20000)
    x2 = np.random.multivariate_normal([7.3, 10.2], [[1.5,0.5],[0.5,2.1]],size= 20000)
    X_tot = np.vstack((x1,x2))
    
    mu = np.array([[1.,2.],[6.,8.]])
    cov = np.array([[[1.,1.2],[0.,0.2]],[[0.,0.2],[1.,1.5]]])
    q= np.zeros((len(X_tot),2))
    tau = np.array([1/2,1/2])
    
    t1 = time.time()
    GMM_EM_multi_njit_lazy(X_tot, mu, cov, 10000 , tau , q, 1e-08)
    t2 = time.time()
    
    time_list_njit_lazy_multi_large.append(t2-t1)

In [24]:
time_list_sklearn_multi_large = []
np.random.seed(42)
for i in range(10):
    
    x1 = np.random.multivariate_normal([1.5, 2.5], [[1.2,0.4],[0.4,1.1]],size= 20000)
    x2 = np.random.multivariate_normal([7.3, 10.2], [[1.5,0.5],[0.5,2.1]],size= 20000)
    X_tot = np.vstack((x1,x2))
    
    t1 = time.time()
    gmm = GMM(n_components= 2, random_state= 42 , covariance_type="full" 
              ,means_init=np.array([[1.5,2.5],[7.3,10.2]]), precisions_init= precision
              ,max_iter=10000)
    gmm.fit(X_tot)
    t2 = time.time()
    
    time_list_sklearn_multi_large.append(t2-t1)

In [25]:
print("Setting 7")
print("naive time: " + str(np.mean(time_list_naive_multi_large)))
print("njit time(with compilation): " + str(time_list_njit_multi_large[0]))
print("njit time(after compilation): " + str(np.mean(time_list_njit_multi_large[1:])))
print("njit time(with lazy compilation): " + str(time_list_njit_lazy_multi_large[0]))
print("njit time(after lazy compilation): " + str(np.mean(time_list_njit_lazy_multi_large[1:])))
print("sklearn time: " + str(np.mean(time_list_sklearn_multi_large)))

Setting 7
naive time: 4.874786710739135
njit time(with compilation): 0.2846231460571289
njit time(after compilation): 0.2788536548614502
njit time(with lazy compilation): 2.0979788303375244
njit time(after lazy compilation): 0.27346844143337673
sklearn time: 0.1545419692993164


In [26]:
print("naive std: " + str(np.std(time_list_naive_multi_large)/np.sqrt(10)))
print("njit std: " + str(np.std(time_list_njit_multi_large[1:])/np.sqrt(9)))
print("njit lazy std: " + str(np.std(time_list_njit_lazy_multi_large[1:])/np.sqrt(9)))
print("sklearn std: " + str(np.std(time_list_sklearn_multi_large)/np.sqrt(10)))

naive std: 0.010118283218620879
njit std: 0.00032042138807644374
njit lazy std: 0.0001123278255390958
sklearn std: 0.00476371649181667


## Setting 8

### Redefine the function for just-in-time compilation

In [27]:
@njit('float64[:](float64[:,:],float64[:],float64[:,:])')
def multi_ll_njit(X, mu, cov):
    n = X.shape[0]
    p = X.shape[1]
    res = np.zeros(n)
    
    for i in range(n):
        exp_inter = np.dot(np.dot((X[i, :] - mu).T, np.linalg.inv(cov)), 
                           (X[i, :] - mu)) / 2.
        res[i] = (2*np.pi)**(-p/2) * np.linalg.det(cov)**(-0.5)*np.exp(-exp_inter)
    
    return res



@njit('float64(float64[:])')
def nb_sum(X):
    res = 0.0
    for i in range(X.shape[0]):
        res += X[i]
    
    return res


r_sig = types.Tuple([float64[:,:],float64[:,:,:],float64[:],int64])
sig = r_sig(float64[:,:],float64[:,:],float64[:,:,:],int64,float64[:],float64[:,:],float64)


@njit(sig)
def GMM_EM_multi_njit(X, mu, cov, max_iter, tau, q, tol = 1e-08):
    n = X.shape[0]
    p = X.shape[1]
    K = mu.shape[0]
    
    for iteration in range(max_iter):
        for k in range(K):
            ll = multi_ll_njit(X, mu[k, :], cov[:, :, k])
            q[:, k] = tau[k] * ll
            
        for i in range(n):
            q[i, :] /=  nb_sum(q[i, :])
        
        mu_before = mu
        cov_before = cov
        tau_before = tau
        
        for k in range(K):
            q_k = nb_sum(q[:, k])
            q = np.ascontiguousarray(q[:, k]).reshape(n,1)
            mu[k, :] = np.sum(q * X, axis = 0) / q_k
            cov[:, :, k] = np.dot((q*(X - mu[k, :])).T, (X - mu[k, :])) / q_k
            tau[k] = q_k / n
        
        mu_diff = np.max(np.abs(mu - mu_before))
        cov_diff = np.max(np.abs(cov - cov_before))
        tau_diff = np.max(np.abs(tau - tau_before))
        
        diff = np.max(np.array([np.abs(mu_diff), np.abs(cov_diff), np.abs(tau_diff)]))
        
        if ( (iteration > 1 ) & (diff < tol)):
            break
        
    return mu, cov, tau, iteration

In [28]:
@njit
def multi_ll_njit_lazy(X, mu, cov):
    n = X.shape[0]
    p = X.shape[1]
    res = np.zeros(n)
    
    for i in range(n):
        exp_inter = np.dot(np.dot((X[i, :] - mu).T, np.linalg.inv(cov)), 
                           (X[i, :] - mu)) / 2.
        res[i] = (2*np.pi)**(-p/2) * np.linalg.det(cov)**(-0.5)*np.exp(-exp_inter)
    
    return res



@njit
def nb_sum_lazy(X):
    res = 0.0
    for i in range(X.shape[0]):
        res += X[i]
    
    return res


@njit
def GMM_EM_multi_njit_lazy(X, mu, cov, max_iter, tau, q, tol = 1e-08):
    n = X.shape[0]
    p = X.shape[1]
    K = mu.shape[0]
    
    for iteration in range(max_iter):
        for k in range(K):
            ll = multi_ll_njit_lazy(X, mu[k, :], cov[:, :, k])
            q[:, k] = tau[k] * ll
            
        for i in range(n):
            q[i, :] /=  nb_sum_lazy(q[i, :])
        
        mu_before = mu
        cov_before = cov
        tau_before = tau
        
        for k in range(K):
            q_k = nb_sum_lazy(q[:, k])
            q = np.ascontiguousarray(q[:, k]).reshape(n,1)
            mu[k, :] = np.sum(q * X, axis = 0) / q_k
            cov[:, :, k] = np.dot((q*(X - mu[k, :])).T, (X - mu[k, :])) / q_k
            tau[k] = q_k / n
        
        mu_diff = np.max(np.abs(mu - mu_before))
        cov_diff = np.max(np.abs(cov - cov_before))
        tau_diff = np.max(np.abs(tau - tau_before))
        
        diff = np.max(np.array([np.abs(mu_diff), np.abs(cov_diff), np.abs(tau_diff)]))
        
        if ( (iteration > 1 ) & (diff < tol)):
            break
        
    return mu, cov, tau, iteration

### Seed: Changed, Size: 40000, N_comp: 2

In [29]:
time_list_naive_multi_seed_large = []

for j in range(10):
    np.random.seed(42+j)

    for i in range(10):
        
        x1 = np.random.multivariate_normal([1.5, 2.5], [[1.2,0.4],[0.4,1.1]],size= 20000)
        x2 = np.random.multivariate_normal([7.3, 10.2], [[1.5,0.5],[0.5,2.1]],size= 20000)
        X_tot = np.vstack((x1,x2))
        
        mu = np.array([[1.,2.],[6.,8.]])
        cov = np.array([[[1.,1.2],[0.,0.2]],[[0.,0.2],[1.,1.5]]])
        q= np.zeros((len(X_tot),2))
        tau = np.array([1/2,1/2])
        
        t1 = time.time()
        GMM_EM_multi(X_tot, mu, cov, 10000 , tau , q)
        t2 = time.time()
        
        time_list_naive_multi_seed_large.append(t2-t1)

In [30]:
time_list_njit_multi_seed_large = []

for j in range(10):
    np.random.seed(42+j)

    for i in range(10):
        
        x1 = np.random.multivariate_normal([1.5, 2.5], [[1.2,0.4],[0.4,1.1]],size= 20000)
        x2 = np.random.multivariate_normal([7.3, 10.2], [[1.5,0.5],[0.5,2.1]],size= 20000)
        X_tot = np.vstack((x1,x2))
        
        mu = np.array([[1.,2.],[6.,8.]])
        cov = np.array([[[1.,1.2],[0.,0.2]],[[0.,0.2],[1.,1.5]]])
        q= np.zeros((len(X_tot),2))
        tau = np.array([1/2,1/2])
        
        t1 = time.time()
        test=GMM_EM_multi_njit(X_tot, mu, cov, 10000 , tau , q, 1e-08)
        t2 = time.time()
        
        time_list_njit_multi_seed_large.append(t2-t1)

In [31]:
time_list_njit_lazy_multi_seed_large = []

for j in range(10):
    np.random.seed(42+j)

    for i in range(10):
        
        x1 = np.random.multivariate_normal([1.5, 2.5], [[1.2,0.4],[0.4,1.1]],size= 20000)
        x2 = np.random.multivariate_normal([7.3, 10.2], [[1.5,0.5],[0.5,2.1]],size= 20000)
        X_tot = np.vstack((x1,x2))
        
        mu = np.array([[1.,2.],[6.,8.]])
        cov = np.array([[[1.,1.2],[0.,0.2]],[[0.,0.2],[1.,1.5]]])
        q= np.zeros((len(X_tot),2))
        tau = np.array([1/2,1/2])
        
        t1 = time.time()
        test=GMM_EM_multi_njit_lazy(X_tot, mu, cov, 10000 , tau , q, 1e-08)
        t2 = time.time()
        
        time_list_njit_lazy_multi_seed_large.append(t2-t1)

In [32]:
time_list_sklearn_multi_seed_large = []
for j in range(10):
    np.random.seed(42+j)
    for i in range(10):
        
        x1 = np.random.multivariate_normal([1.5, 2.5], [[1.2,0.4],[0.4,1.1]],size= 20000)
        x2 = np.random.multivariate_normal([7.3, 10.2], [[1.5,0.5],[0.5,2.1]],size= 20000)
        X_tot = np.vstack((x1,x2))
        
        t1 = time.time()
        gmm = GMM(n_components= 2, random_state= 42+j , covariance_type="full" 
                  ,means_init=np.array([[1.5,2.5],[7.3,10.2]]), precisions_init= precision
                  ,max_iter=10000)
        gmm.fit(X_tot)
        t2 = time.time()
        
        time_list_sklearn_multi_seed_large.append(t2-t1)

In [33]:
print("Setting 8")
print("naive time: " + str(np.mean(time_list_naive_multi_seed_large)))
print("njit time (with compilation): " + str(time_list_njit_multi_seed_large[0]))
print("njit time (after compilation): " + str(np.mean(time_list_njit_multi_seed_large[1:])))
print("njit time (with lazy compilation): " + str(time_list_njit_lazy_multi_seed_large[0]))
print("njit time (after lazy compilation): " + str(np.mean(time_list_njit_lazy_multi_seed_large[1:])))
print("sklearn time: " + str(np.mean(time_list_sklearn_multi_seed_large)))

Setting 8
naive time: 4.880346529483795
njit time (with compilation): 0.3032546043395996
njit time (after compilation): 0.30201696145414103
njit time (with lazy compilation): 2.4250478744506836
njit time (after lazy compilation): 0.3048559945039075
sklearn time: 0.15682470560073852


In [34]:
print("naive std: " + str(np.std(time_list_naive_multi_seed_large)/np.sqrt(100)))
print("njit std: " + str(np.std(time_list_njit_multi_seed_large[1:])/np.sqrt(99)))
print("njit lazy std: " + str(np.std(time_list_njit_lazy_multi_seed_large[1:])/np.sqrt(99)))
print("sklearn std: " + str(np.std(time_list_sklearn_multi_seed_large)/np.sqrt(100)))

naive std: 0.014166308147507554
njit std: 7.435697943341003e-05
njit lazy std: 7.609547305828804e-05
sklearn std: 0.0017178310737391005
