In [1]:
import numpy as np
from numba import njit, float64, int64, types
from sklearn.mixture import GaussianMixture as GMM
from GMM_with_EM_Univariate import GMM_EM
import time

## GMM_EM_njit (eager compilation)

In [2]:
@njit('float64[:](float64[:],float64,float64)')
def normal_ll_njit(X, mu, sigma):
    return np.exp(-(X-mu)**2 / (2 * sigma)) / np.sqrt(2*np.pi*sigma)



@njit('float64(float64[:])')
def nb_sum(x):
    res = 0.0
    for i in range(x.shape[0]):
        res += x[i]
    return res



r_sig = types.Tuple([float64[:],float64[:],float64[:],int64])
sig = r_sig(float64[:],float64[:],float64[:],int64,float64[:],float64[:,:],float64)


@njit(sig)
def GMM_EM_njit(X, mu, sigma, max_iter, tau, q, tol = 1e-15):
    K = len(mu)
    n = len(X)
    
    for iteration in range(max_iter):
        for k in range(K):
            ll = normal_ll_njit(X, mu[k], sigma[k])
            q[:, k ] = tau[k] * ll
        
        for i in range(n):
            q[i, :] /= nb_sum(q[i, :])
        
        mu_before = mu
        sigma_before = sigma
        tau_before = tau
        
        for k in range(K):
            q_k = nb_sum(q[:, k])
            mu[k] = nb_sum(q[:, k] * X) / q_k
            sigma[k] = nb_sum(q[:, k] * (X - mu[k])**2) / q_k
            tau[k]  = q_k / n
            
        mu_diff = np.max(np.abs(mu - mu_before))
        sigma_diff = np.max(np.abs(sigma-sigma_before))
        tau_diff = np.max(np.abs(tau - tau_before))
        
        diff = np.max(np.array([np.abs(mu_diff), np.abs(sigma_diff), np.abs(tau_diff)]))
        
        if ( (iteration > 1) & (diff < tol)): break
        
    return mu, sigma, tau, iteration


## GMM_EM_njit (lazy compilation)

In [3]:
@njit
def normal_ll_njit_lazy(X, mu, sigma):
    return np.exp(-(X-mu)**2 / (2 * sigma)) / np.sqrt(2*np.pi*sigma)



@njit
def nb_sum_lazy(x):
    res = 0.0
    for i in range(x.shape[0]):
        res += x[i]
    return res

@njit
def GMM_EM_njit_lazy(X, mu, sigma, max_iter, tau, q, tol = 1e-15):
    K = len(mu)
    n = len(X)
    
    for iteration in range(max_iter):
        for k in range(K):
            ll = normal_ll_njit_lazy(X, mu[k], sigma[k])
            q[:, k ] = tau[k] * ll
        
        for i in range(n):
            q[i, :] /= nb_sum_lazy(q[i, :])
        
        mu_before = mu
        sigma_before = sigma
        tau_before = tau
        
        for k in range(K):
            q_k = nb_sum_lazy(q[:, k])
            mu[k] = nb_sum_lazy(q[:, k] * X) / q_k
            sigma[k] = nb_sum_lazy(q[:, k] * (X - mu[k])**2) / q_k
            tau[k]  = q_k / n
            
        mu_diff = np.max(np.abs(mu - mu_before))
        sigma_diff = np.max(np.abs(sigma-sigma_before))
        tau_diff = np.max(np.abs(tau - tau_before))
        
        diff = np.max(np.array([np.abs(mu_diff), np.abs(sigma_diff), np.abs(tau_diff)]))
        
        if ( (iteration > 1) & (diff < tol)): break
        
    return mu, sigma, tau, iteration


## Setting 1

### Seed: Fixed, Size = 600, N_comp = 3

In [4]:
np.random.seed(42)
time_list_naive = []

for i in range(10):
    
    X1 = np.random.normal(loc = 20., scale = 3.1, size= 200) 
    X2 = np.random.normal(loc = 3., scale = 2.3, size= 200) 
    X3 = np.random.normal(loc = -5., scale = 1.4, size= 200) 
    X_tot = np.hstack((X1,X2,X3)).flatten()
    
    mu = np.array([15.,6.,-7.])
    tau = np.array([1/3,1/3,1/3])
    sigma = np.array([8.,3.5,1.3])
    q = np.zeros((len(X_tot),3))
    
    t1 = time.time()
    GMM_EM(X_tot, mu, sigma, 10000, tau, q)
    t2 = time.time()
    
    time_list_naive.append(t2-t1)

In [5]:
np.random.seed(42)
time_list_njit = []

for i in range(10):
    
    X1 = np.random.normal(loc = 20., scale = 3.1, size= 200) 
    X2 = np.random.normal(loc = 3., scale = 2.3, size= 200) 
    X3 = np.random.normal(loc = -5., scale = 1.4, size= 200) 
    X_tot = np.hstack((X1,X2,X3)).flatten()
    
    mu = np.array([15.,6.,-7.])
    tau = np.array([1/3,1/3,1/3])
    sigma = np.array([8.,3.5,1.3])
    q = np.zeros((len(X_tot),3))
    
    t1 = time.time()
    GMM_EM_njit(X_tot, mu, sigma, 10000, tau, q, 1e-15)
    t2 = time.time()
    
    time_list_njit.append(t2-t1)

In [6]:
np.random.seed(42)
time_list_njit_lazy = []

for i in range(10):
    
    X1 = np.random.normal(loc = 20., scale = 3.1, size= 200) 
    X2 = np.random.normal(loc = 3., scale = 2.3, size= 200) 
    X3 = np.random.normal(loc = -5., scale = 1.4, size= 200) 
    X_tot = np.hstack((X1,X2,X3)).flatten()
    
    mu = np.array([15.,6.,-7.])
    tau = np.array([1/3,1/3,1/3])
    sigma = np.array([8.,3.5,1.3])
    q = np.zeros((len(X_tot),3))
    
    t1 = time.time()
    GMM_EM_njit_lazy(X_tot, mu, sigma, 10000, tau, q, 1e-15)
    t2 = time.time()
    
    time_list_njit_lazy.append(t2-t1)

In [7]:
np.random.seed(42)
time_list_sklearn = []

for i in range(10):
    
    X1 = np.random.normal(loc = 20., scale = 3.1, size= 200) 
    X2 = np.random.normal(loc = 3., scale = 2.3, size= 200) 
    X3 = np.random.normal(loc = -5., scale = 1.4, size= 200) 
    X_tot = np.hstack((X1,X2,X3)).flatten()
    
    
    t1 = time.time()
    gmm = GMM(n_components= 3, random_state= 42, max_iter=10000,
             means_init=np.array([15.,6.,-7.]).reshape(3,1), precisions_init=np.array([1/8,1/3.5,1/1.3]).reshape(3,1,1) )
    gmm.fit(X_tot.reshape(-1,1))
    t2 = time.time()
    
    time_list_sklearn.append(t2-t1)

In [8]:
print("Setting 1")
print("naive time: " +str(np.mean(time_list_naive)))
print("njit time (with compilation): "+ str(time_list_njit[0]))
print("njit time (after compilation): " + str(np.mean(time_list_njit[1:])))
print("njit time (with lazy compilation): " + str(time_list_njit_lazy[0]))
print("njit time (after lazy compilation): " + str(np.mean(time_list_njit_lazy[1:])))
print("sklearn time: " + str(np.mean(time_list_sklearn)))

Setting 1
naive time: 0.010461115837097168
njit time (with compilation): 0.00034427642822265625
njit time (after compilation): 0.00015086597866482206
njit time (with lazy compilation): 0.8721120357513428
njit time (after lazy compilation): 0.00015147527058919272
sklearn time: 0.003230738639831543


In [9]:
print("naive std: " + str(np.std(time_list_naive)/np.sqrt(10)))
print("njit std: " + str(np.std(time_list_njit[1:])/np.sqrt(9)))
print("lazy njit std: " + str(np.std(time_list_njit_lazy[1:])/np.sqrt(9)))
print("sklearn std: "+ str(np.std(time_list_sklearn)/np.sqrt(10)))

naive std: 0.0002250448298965338
njit std: 3.489931043937384e-07
lazy njit std: 5.271633132863681e-07
sklearn std: 9.815659052716559e-05


### Redefine the function for just-in-time compilation

#### njit eager compilation

In [10]:
@njit('float64[:](float64[:],float64,float64)')
def normal_ll_njit(X, mu, sigma):
    return np.exp(-(X-mu)**2 / (2 * sigma)) / np.sqrt(2*np.pi*sigma)



@njit('float64(float64[:])')
def nb_sum(x):
    res = 0.0
    for i in range(x.shape[0]):
        res += x[i]
    return res



r_sig = types.Tuple([float64[:],float64[:],float64[:],int64])
sig = r_sig(float64[:],float64[:],float64[:],int64,float64[:],float64[:,:],float64)


@njit(sig)
def GMM_EM_njit(X, mu, sigma, max_iter, tau, q, tol = 1e-15):
    K = len(mu)
    n = len(X)
    
    for iteration in range(max_iter):
        for k in range(K):
            ll = normal_ll_njit(X, mu[k], sigma[k])
            q[:, k ] = tau[k] * ll
        
        for i in range(n):
            q[i, :] /= nb_sum(q[i, :])
        
        mu_before = mu
        sigma_before = sigma
        tau_before = tau
        
        for k in range(K):
            q_k = nb_sum(q[:, k])
            mu[k] = nb_sum(q[:, k] * X) / q_k
            sigma[k] = nb_sum(q[:, k] * (X - mu[k])**2) / q_k
            tau[k]  = q_k / n
            
        mu_diff = np.max(np.abs(mu - mu_before))
        sigma_diff = np.max(np.abs(sigma-sigma_before))
        tau_diff = np.max(np.abs(tau - tau_before))
        
        diff = np.max(np.array([np.abs(mu_diff), np.abs(sigma_diff), np.abs(tau_diff)]))
        
        if ( (iteration > 1) & (diff < tol)): break
        
    return mu, sigma, tau, iteration


#### njit lazy compilation

In [11]:
@njit
def normal_ll_njit_lazy(X, mu, sigma):
    return np.exp(-(X-mu)**2 / (2 * sigma)) / np.sqrt(2*np.pi*sigma)



@njit
def nb_sum_lazy(x):
    res = 0.0
    for i in range(x.shape[0]):
        res += x[i]
    return res

@njit
def GMM_EM_njit_lazy(X, mu, sigma, max_iter, tau, q, tol = 1e-15):
    K = len(mu)
    n = len(X)
    
    for iteration in range(max_iter):
        for k in range(K):
            ll = normal_ll_njit_lazy(X, mu[k], sigma[k])
            q[:, k ] = tau[k] * ll
        
        for i in range(n):
            q[i, :] /= nb_sum_lazy(q[i, :])
        
        mu_before = mu
        sigma_before = sigma
        tau_before = tau
        
        for k in range(K):
            q_k = nb_sum_lazy(q[:, k])
            mu[k] = nb_sum_lazy(q[:, k] * X) / q_k
            sigma[k] = nb_sum_lazy(q[:, k] * (X - mu[k])**2) / q_k
            tau[k]  = q_k / n
            
        mu_diff = np.max(np.abs(mu - mu_before))
        sigma_diff = np.max(np.abs(sigma-sigma_before))
        tau_diff = np.max(np.abs(tau - tau_before))
        
        diff = np.max(np.array([np.abs(mu_diff), np.abs(sigma_diff), np.abs(tau_diff)]))
        
        if ( (iteration > 1) & (diff < tol)): break
        
    return mu, sigma, tau, iteration


## Setting 2

### Seed: Changed, Size: 600, N_comp: 3

In [12]:
time_list_naive_seed = []

for j in range(10):
    np.random.seed(42+j)
    for i in range(10):
        
        X1 = np.random.normal(loc = 20., scale = 3.1, size= 200) 
        X2 = np.random.normal(loc = 3., scale = 2.3, size= 200) 
        X3 = np.random.normal(loc = -5., scale = 1.4, size= 200) 
        X_tot = np.hstack((X1,X2,X3)).flatten()
        
        mu = np.array([15.,6.,-7.])
        tau = np.array([1/3,1/3,1/3])
        sigma = np.array([8.,3.5,1.3])
        q = np.zeros((len(X_tot),3))
        
        t1 = time.time()
        GMM_EM(X_tot, mu, sigma, 10000, tau, q)
        t2 = time.time()
        
        time_list_naive_seed.append(t2-t1)

In [13]:
time_list_njit_seed = []

for j in range(10):
    np.random.seed(42+j)
    for i in range(10):
        
        X1 = np.random.normal(loc = 20., scale = 3.1, size= 200) 
        X2 = np.random.normal(loc = 3., scale = 2.3, size= 200) 
        X3 = np.random.normal(loc = -5., scale = 1.4, size= 200) 
        X_tot = np.hstack((X1,X2,X3)).flatten()
        
        mu = np.array([15.,6.,-7.])
        tau = np.array([1/3,1/3,1/3])
        sigma = np.array([8.,3.5,1.3])
        q = np.zeros((len(X_tot),3))
        
        t1 = time.time()
        GMM_EM_njit(X_tot, mu, sigma, 10000, tau, q,1e-15)
        t2 = time.time()
        
        time_list_njit_seed.append(t2-t1)

In [14]:
time_list_njit_lazy_seed = []

for j in range(10):
    np.random.seed(42+j)
    for i in range(10):
        
        X1 = np.random.normal(loc = 20., scale = 3.1, size= 200) 
        X2 = np.random.normal(loc = 3., scale = 2.3, size= 200) 
        X3 = np.random.normal(loc = -5., scale = 1.4, size= 200) 
        X_tot = np.hstack((X1,X2,X3)).flatten()
        
        mu = np.array([15.,6.,-7.])
        tau = np.array([1/3,1/3,1/3])
        sigma = np.array([8.,3.5,1.3])
        q = np.zeros((len(X_tot),3))
        
        t1 = time.time()
        GMM_EM_njit_lazy(X_tot, mu, sigma, 10000, tau, q,1e-15)
        t2 = time.time()
        
        time_list_njit_lazy_seed.append(t2-t1)

In [15]:
time_list_sklearn_seed = []
for j in range(10):
    np.random.seed(42+j)
    for i in range(10):
        
        X1 = np.random.normal(loc = 20., scale = 3.1, size= 200) 
        X2 = np.random.normal(loc = 3., scale = 2.3, size= 200) 
        X3 = np.random.normal(loc = -5., scale = 1.4, size= 200) 
        X_tot = np.hstack((X1,X2,X3)).flatten()
        
        t1 = time.time()
        gmm = GMM(n_components= 3, random_state= 42, max_iter=10000,
                 means_init=np.array([15.,6.,-7.]).reshape(3,1), precisions_init=np.array([1/8,1/3.5,1/1.3]).reshape(3,1,1))
        gmm.fit(X_tot.reshape(-1,1))
        t2 = time.time()
        
        time_list_sklearn_seed.append(t2-t1)

In [16]:
print("Setting 2")
print("naive time: " +str(np.mean(time_list_naive_seed)))
print("njit time (with compilation): "+ str(time_list_njit_seed[0]))
print("njit time (after compilation): " + str(np.mean(time_list_njit_seed[1:])))
print("njit time (with lazy compilation): "+ str(time_list_njit_lazy_seed[0]))
print("njit time (after lazy compilation): " + str(np.mean(time_list_njit_lazy_seed[1:])))
print("sklearn time: " + str(np.mean(time_list_sklearn_seed)))

Setting 2
naive time: 0.009991564750671388
njit time (with compilation): 0.00016021728515625
njit time (after compilation): 0.00015096712594080453
njit time (with lazy compilation): 0.8580930233001709
njit time (after lazy compilation): 0.00015044693995003747
sklearn time: 0.003269178867340088


In [17]:
print("naive std: " + str(np.std(time_list_naive_seed)/np.sqrt(100)))
print("njit std: " + str(np.std(time_list_njit_seed[1:])/np.sqrt(99)))
print("njit lazy std: " + str(np.std(time_list_njit_lazy_seed[1:])/np.sqrt(99)))
print("sklearn std: "+ str(np.std(time_list_sklearn_seed)/np.sqrt(100)))

naive std: 4.5160526682832954e-05
njit std: 2.0125920826979688e-07
njit lazy std: 3.7009029734962303e-07
sklearn std: 4.134156481904358e-05


### Redefine the function for just-in-time compilation

#### njit eager compilation

In [18]:
@njit('float64[:](float64[:],float64,float64)')
def normal_ll_njit(X, mu, sigma):
    return np.exp(-(X-mu)**2 / (2 * sigma)) / np.sqrt(2*np.pi*sigma)



@njit('float64(float64[:])')
def nb_sum(x):
    res = 0.0
    for i in range(x.shape[0]):
        res += x[i]
    return res



r_sig = types.Tuple([float64[:],float64[:],float64[:],int64])
sig = r_sig(float64[:],float64[:],float64[:],int64,float64[:],float64[:,:],float64)


@njit(sig)
def GMM_EM_njit(X, mu, sigma, max_iter, tau, q, tol = 1e-15):
    K = len(mu)
    n = len(X)
    
    for iteration in range(max_iter):
        for k in range(K):
            ll = normal_ll_njit(X, mu[k], sigma[k])
            q[:, k ] = tau[k] * ll
        
        for i in range(n):
            q[i, :] /= nb_sum(q[i, :])
        
        mu_before = mu
        sigma_before = sigma
        tau_before = tau
        
        for k in range(K):
            q_k = nb_sum(q[:, k])
            mu[k] = nb_sum(q[:, k] * X) / q_k
            sigma[k] = nb_sum(q[:, k] * (X - mu[k])**2) / q_k
            tau[k]  = q_k / n
            
        mu_diff = np.max(np.abs(mu - mu_before))
        sigma_diff = np.max(np.abs(sigma-sigma_before))
        tau_diff = np.max(np.abs(tau - tau_before))
        
        diff = np.max(np.array([np.abs(mu_diff), np.abs(sigma_diff), np.abs(tau_diff)]))
        
        if ( (iteration > 1) & (diff < tol)): break
        
    return mu, sigma, tau, iteration


#### njit lazy compilation

In [19]:
@njit
def normal_ll_njit_lazy(X, mu, sigma):
    return np.exp(-(X-mu)**2 / (2 * sigma)) / np.sqrt(2*np.pi*sigma)



@njit
def nb_sum_lazy(x):
    res = 0.0
    for i in range(x.shape[0]):
        res += x[i]
    return res

@njit
def GMM_EM_njit_lazy(X, mu, sigma, max_iter, tau, q, tol = 1e-15):
    K = len(mu)
    n = len(X)
    
    for iteration in range(max_iter):
        for k in range(K):
            ll = normal_ll_njit_lazy(X, mu[k], sigma[k])
            q[:, k ] = tau[k] * ll
        
        for i in range(n):
            q[i, :] /= nb_sum_lazy(q[i, :])
        
        mu_before = mu
        sigma_before = sigma
        tau_before = tau
        
        for k in range(K):
            q_k = nb_sum_lazy(q[:, k])
            mu[k] = nb_sum_lazy(q[:, k] * X) / q_k
            sigma[k] = nb_sum_lazy(q[:, k] * (X - mu[k])**2) / q_k
            tau[k]  = q_k / n
            
        mu_diff = np.max(np.abs(mu - mu_before))
        sigma_diff = np.max(np.abs(sigma-sigma_before))
        tau_diff = np.max(np.abs(tau - tau_before))
        
        diff = np.max(np.array([np.abs(mu_diff), np.abs(sigma_diff), np.abs(tau_diff)]))
        
        if ( (iteration > 1) & (diff < tol)): break
        
    return mu, sigma, tau, iteration


## Setting 3

### Seed: Fixed, Size: 30000, N_comp: 3

In [20]:
time_list_naive_large = []

np.random.seed(42)
for i in range(10):
    
    X1 = np.random.normal(loc = 20., scale = 3.1, size= 10000) 
    X2 = np.random.normal(loc = 3., scale = 2.3, size= 10000) 
    X3 = np.random.normal(loc = -5., scale = 1.4, size= 10000) 
    X_tot = np.hstack((X1,X2,X3)).flatten()
    
    mu = np.array([15.,6.,-7.])
    tau = np.array([1/3,1/3,1/3])
    sigma = np.array([8.,3.5,1.3])
    q = np.zeros((len(X_tot),3))
    
    t1 = time.time()
    GMM_EM(X_tot, mu, sigma, 10000, tau, q)
    t2 = time.time()
    
    time_list_naive_large.append(t2-t1)

In [21]:
time_list_njit_large = []

np.random.seed(42)
for i in range(10):
    
    X1 = np.random.normal(loc = 20., scale = 3.1, size= 10000) 
    X2 = np.random.normal(loc = 3., scale = 2.3, size= 10000) 
    X3 = np.random.normal(loc = -5., scale = 1.4, size= 10000) 
    X_tot = np.hstack((X1,X2,X3)).flatten()
    
    mu = np.array([15.,6.,-7.])
    tau = np.array([1/3,1/3,1/3])
    sigma = np.array([8.,3.5,1.3])
    q = np.zeros((len(X_tot),3))
    
    t1 = time.time()
    test=GMM_EM_njit(X_tot, mu, sigma, 10000, tau, q,1e-15)
    t2 = time.time()
    
    time_list_njit_large.append(t2-t1)

In [22]:
time_list_njit_lazy_large = []

np.random.seed(42)
for i in range(10):
    
    X1 = np.random.normal(loc = 20., scale = 3.1, size= 10000) 
    X2 = np.random.normal(loc = 3., scale = 2.3, size= 10000) 
    X3 = np.random.normal(loc = -5., scale = 1.4, size= 10000) 
    X_tot = np.hstack((X1,X2,X3)).flatten()
    
    mu = np.array([15.,6.,-7.])
    tau = np.array([1/3,1/3,1/3])
    sigma = np.array([8.,3.5,1.3])
    q = np.zeros((len(X_tot),3))
    
    t1 = time.time()
    test=GMM_EM_njit_lazy(X_tot, mu, sigma, 10000, tau, q,1e-15)
    t2 = time.time()
    
    time_list_njit_lazy_large.append(t2-t1)

In [23]:
time_list_sklearn_large = []
np.random.seed(42)
for i in range(10):
    
    X1 = np.random.normal(loc = 20., scale = 3.1, size= 10000) 
    X2 = np.random.normal(loc = 3., scale = 2.3, size= 10000) 
    X3 = np.random.normal(loc = -5., scale = 1.4, size= 10000) 
    X_tot = np.hstack((X1,X2,X3)).flatten()
    
    t1 = time.time()
    gmm = GMM(n_components= 3, random_state= 42, 
              means_init=np.array([15.,6.,-7.]).reshape(3,1), precisions_init= (np.array([1/8, 1/3.5, 1/1.3])).reshape(3,1,1)
              ,max_iter=10000)
    gmm.fit(X_tot.reshape(-1,1))
    t2 = time.time()
    
    time_list_sklearn_large.append(t2-t1)

In [24]:
print("Setting 3")
print("naive time: " +str(np.mean(time_list_naive_large)))
print("njit time (with compilation): "+ str(time_list_njit_large[0]))
print("njit time (after compilation): " + str(np.mean(time_list_njit_large[1:])))
print("njit time (with lazy compilation): "+ str(time_list_njit_lazy_large[0]))
print("njit time (after lazy compilation): " + str(np.mean(time_list_njit_lazy_large[1:])))
print("sklearn time: " + str(np.mean(time_list_sklearn_large)))

Setting 3
naive time: 0.46084668636322024
njit time (with compilation): 0.006787776947021484
njit time (after compilation): 0.0069091055128309466
njit time (with lazy compilation): 0.8402407169342041
njit time (after lazy compilation): 0.006843434439765083
sklearn time: 0.3260080814361572


In [25]:
print("naive std: " + str(np.std(time_list_naive_large)/np.sqrt(10)))
print("njit std: " + str(np.std(time_list_njit_large[1:])/np.sqrt(9)))
print("njit lazy std: " + str(np.std(time_list_njit_lazy_large[1:])/np.sqrt(9)))
print("sklearn std: "+ str(np.std(time_list_sklearn_large)/np.sqrt(10)))

naive std: 0.0028337829074993504
njit std: 1.0966131239586986e-05
njit lazy std: 1.1340610379406654e-05
sklearn std: 0.011788988769400835


### Redefine the function for just-in-time compilation

#### njit eager compilation

In [26]:
@njit('float64[:](float64[:],float64,float64)')
def normal_ll_njit(X, mu, sigma):
    return np.exp(-(X-mu)**2 / (2 * sigma)) / np.sqrt(2*np.pi*sigma)



@njit('float64(float64[:])')
def nb_sum(x):
    res = 0.0
    for i in range(x.shape[0]):
        res += x[i]
    return res



r_sig = types.Tuple([float64[:],float64[:],float64[:],int64])
sig = r_sig(float64[:],float64[:],float64[:],int64,float64[:],float64[:,:],float64)


@njit(sig)
def GMM_EM_njit(X, mu, sigma, max_iter, tau, q, tol = 1e-15):
    K = len(mu)
    n = len(X)
    
    for iteration in range(max_iter):
        for k in range(K):
            ll = normal_ll_njit(X, mu[k], sigma[k])
            q[:, k ] = tau[k] * ll
        
        for i in range(n):
            q[i, :] /= nb_sum(q[i, :])
        
        mu_before = mu
        sigma_before = sigma
        tau_before = tau
        
        for k in range(K):
            q_k = nb_sum(q[:, k])
            mu[k] = nb_sum(q[:, k] * X) / q_k
            sigma[k] = nb_sum(q[:, k] * (X - mu[k])**2) / q_k
            tau[k]  = q_k / n
            
        mu_diff = np.max(np.abs(mu - mu_before))
        sigma_diff = np.max(np.abs(sigma-sigma_before))
        tau_diff = np.max(np.abs(tau - tau_before))
        
        diff = np.max(np.array([np.abs(mu_diff), np.abs(sigma_diff), np.abs(tau_diff)]))
        
        if ( (iteration > 1) & (diff < tol)): break
        
    return mu, sigma, tau, iteration


#### njit lazy compilation

In [27]:
@njit
def normal_ll_njit_lazy(X, mu, sigma):
    return np.exp(-(X-mu)**2 / (2 * sigma)) / np.sqrt(2*np.pi*sigma)



@njit
def nb_sum_lazy(x):
    res = 0.0
    for i in range(x.shape[0]):
        res += x[i]
    return res

@njit
def GMM_EM_njit_lazy(X, mu, sigma, max_iter, tau, q, tol = 1e-15):
    K = len(mu)
    n = len(X)
    
    for iteration in range(max_iter):
        for k in range(K):
            ll = normal_ll_njit_lazy(X, mu[k], sigma[k])
            q[:, k ] = tau[k] * ll
        
        for i in range(n):
            q[i, :] /= nb_sum_lazy(q[i, :])
        
        mu_before = mu
        sigma_before = sigma
        tau_before = tau
        
        for k in range(K):
            q_k = nb_sum_lazy(q[:, k])
            mu[k] = nb_sum_lazy(q[:, k] * X) / q_k
            sigma[k] = nb_sum_lazy(q[:, k] * (X - mu[k])**2) / q_k
            tau[k]  = q_k / n
            
        mu_diff = np.max(np.abs(mu - mu_before))
        sigma_diff = np.max(np.abs(sigma-sigma_before))
        tau_diff = np.max(np.abs(tau - tau_before))
        
        diff = np.max(np.array([np.abs(mu_diff), np.abs(sigma_diff), np.abs(tau_diff)]))
        
        if ( (iteration > 1) & (diff < tol)): break
        
    return mu, sigma, tau, iteration


## Setting 4

### Seed: Changed, Size: 30000, N_comp: 3

In [28]:
time_list_naive_seed_large = []

for j in range(10):
    np.random.seed(42+j)
    for i in range(10):
        
        X1 = np.random.normal(loc = 20., scale = 3.1, size= 10000) 
        X2 = np.random.normal(loc = 3., scale = 2.3, size= 10000) 
        X3 = np.random.normal(loc = -5., scale = 1.4, size= 10000) 
        X_tot = np.hstack((X1,X2,X3)).flatten()
        
        mu = np.array([15.,6.,-7.])
        tau = np.array([1/3,1/3,1/3])
        sigma = np.array([8.,3.5,1.3])
        q = np.zeros((len(X_tot),3))
        
        t1 = time.time()
        GMM_EM(X_tot, mu, sigma, 10000, tau, q)
        t2 = time.time()
        
        time_list_naive_seed_large.append(t2-t1)

In [29]:
time_list_njit_seed_large = []

for j in range(10):
    np.random.seed(42+j)
    for i in range(10):
        
        X1 = np.random.normal(loc = 20., scale = 3.1, size= 10000) 
        X2 = np.random.normal(loc = 3., scale = 2.3, size= 10000) 
        X3 = np.random.normal(loc = -5., scale = 1.4, size= 10000) 
        X_tot = np.hstack((X1,X2,X3)).flatten()
        
        mu = np.array([15.,6.,-7.])
        tau = np.array([1/3,1/3,1/3])
        sigma = np.array([8.,3.5,1.3])
        q = np.zeros((len(X_tot),3))
        
        t1 = time.time()
        test=GMM_EM_njit(X_tot, mu, sigma, 10000, tau, q,1e-15)
        t2 = time.time()
        
        time_list_njit_seed_large.append(t2-t1)

In [30]:
time_list_njit_lazy_seed_large = []

for j in range(10):
    np.random.seed(42+j)
    for i in range(10):
        
        X1 = np.random.normal(loc = 20., scale = 3.1, size= 10000) 
        X2 = np.random.normal(loc = 3., scale = 2.3, size= 10000) 
        X3 = np.random.normal(loc = -5., scale = 1.4, size= 10000) 
        X_tot = np.hstack((X1,X2,X3)).flatten()
        
        mu = np.array([15.,6.,-7.])
        tau = np.array([1/3,1/3,1/3])
        sigma = np.array([8.,3.5,1.3])
        q = np.zeros((len(X_tot),3))
        
        t1 = time.time()
        test=GMM_EM_njit_lazy(X_tot, mu, sigma, 10000, tau, q,1e-15)
        t2 = time.time()
        
        time_list_njit_lazy_seed_large.append(t2-t1)

In [31]:
time_list_sklearn_seed_large = []
for j in range(10):
    np.random.seed(42+j)
    for i in range(10):
        
        X1 = np.random.normal(loc = 20., scale = 3.1, size= 10000) 
        X2 = np.random.normal(loc = 3., scale = 2.3, size= 10000) 
        X3 = np.random.normal(loc = -5., scale = 1.4, size= 10000) 
        X_tot = np.hstack((X1,X2,X3)).flatten()
        
        t1 = time.time()
        gmm = GMM(n_components= 3, random_state= 42, 
                  means_init=np.array([15.,6.,-7.]).reshape(3,1), precisions_init= (np.array([1/8, 1/3.5, 1/1.3])).reshape(3,1,1)
                  ,max_iter=10000)
        gmm.fit(X_tot.reshape(-1,1))
        t2 = time.time()
        
        time_list_sklearn_seed_large.append(t2-t1)

In [32]:
print("Setting 4")
print("naive time: " +str(np.mean(time_list_naive_seed_large)))
print("njit time (with compilation): "+ str(time_list_njit_seed_large[0]))
print("njit time (after compilation): " + str(np.mean(time_list_njit_seed_large[1:])))
print("njit time (with lazy compilation): "+ str(time_list_njit_lazy_seed_large[0]))
print("njit time (after lazy compilation): " + str(np.mean(time_list_njit_lazy_seed_large[1:])))
print("sklearn time: " + str(np.mean(time_list_sklearn_seed_large)))

Setting 4
naive time: 0.4546662449836731
njit time (with compilation): 0.006796121597290039
njit time (after compilation): 0.006801723229764688
njit time (with lazy compilation): 0.8385603427886963
njit time (after lazy compilation): 0.006747859897035541
sklearn time: 0.35401684522628785


In [33]:
print("naive std: " + str(np.std(time_list_naive_seed_large)/np.sqrt(100)))
print("njit std: " + str(np.std(time_list_njit_seed_large[1:])/np.sqrt(99)))
print("njit lazy std: " + str(np.std(time_list_njit_lazy_seed_large[1:])/np.sqrt(99)))
print("sklearn std: "+ str(np.std(time_list_sklearn_seed_large)/np.sqrt(100)))

naive std: 0.0007027505919532101
njit std: 1.921364747907913e-06
njit lazy std: 7.126441128812109e-05
sklearn std: 0.004203431475685767
