## Load libraries

In [1]:
# Libraries to work cross-platform
import os

# Libraries to work with dataset
import numpy as np
import pandas as pd
import ast                      # convert string to list after importing csv data
import tmtoolkit
import pickle

# Libraries to cluster data
from gsdmm import MovieGroupProcess

# Libraries to visualize data
# import matplotlib.pyplot as plt
import seaborn as sns
from cluster_visualization_helper import (
    visualize_cluster  # user-defined functions
)

# Libraries for evaluation
from sklearn import metrics

# Libraries for monitoring operation process
from tqdm import tqdm
from datetime import datetime
# from pprint import pprint
# import logging

## Configurate and declare global variables

In [2]:
os_name = os.name

if os_name == 'nt':
    BASE_DIR = "E:/THIENDHB_GOOGLEDRIVE/MASTER TILBURG/THESIS/"
    INPUT_DIR = BASE_DIR + "DATASET/INPUT/"
    OUTPUT_DIR = BASE_DIR + "DATASET/OUTPUT/"
elif os_name == 'posix':
    BASE_DIR = "/media/pinkalinux/WORK/THIENDHB_GOOGLEDRIVE/MASTER TILBURG/THESIS/"
    INPUT_DIR = BASE_DIR + "DATASET/INPUT/"
    OUTPUT_DIR = BASE_DIR + "DATASET/OUTPUT/"

SEED = 6886

%matplotlib inline

## Import data

In [3]:
title_embeddings = np.load(OUTPUT_DIR + "title_embeddings.npy")

In [4]:
# title_embeddings.shape

In [5]:
title_docs = pd.read_csv(OUTPUT_DIR + "title_docs.csv")

In [6]:
title_docs["clean_job_title"] = title_docs["clean_job_title"].apply(ast.literal_eval)

## Clustering data

### Gibbs Sampling Dirichlet Mixture Model (GSDMM)

In [7]:
title_corpus = title_docs["clean_job_title"].values.tolist()
title_vocab = set(tmtoolkit.utils.flatten_list(title_corpus))
len(title_vocab), list(title_vocab)[:10]

(2617,
 ['premium',
  'replacement',
  'registry',
  'update',
  'course',
  'road',
  'seg',
  'introduce',
  'speaker',
  'landmine'])

In [8]:
def build_mgp_model(
    X,
    corpus,
    vocab_size,
    K=100,
    alpha=0.1,
    beta=0.1,
    n_iters=50,
    seed=6886,
):
    """
    A MovieGroupProcess is a conceptual model introduced by Yin and Wang 2014 to
    describe their Gibbs sampling algorithm for a Dirichlet Mixture Model for the
    clustering short text documents.
    Reference: http://dbgroup.cs.tsinghua.edu.cn/wangjy/papers/KDD14-GSDMM.pdf
    Imagine a professor is leading a film class. At the start of the class, the students
    are randomly assigned to K tables. Before class begins, the students make lists of
    their favorite films. The teacher reads the role n_iters times. When
    a student is called, the student must select a new table satisfying either:
        1) The new table has more students than the current table.
    OR
        2) The new table has students with similar lists of favorite movies.

    :param K: int
        Upper bound on the number of possible clusters. Typically many fewer
    :param alpha: float between 0 and 1
        Alpha controls the probability that a student will join a table that is currently empty
        When alpha is 0, no one will join an empty table.
    :param beta: float between 0 and 1
        Beta controls the student's affinity for other students with similar interests. A low beta means
        that students desire to sit with students of similar interests. A high beta means they are less
        concerned with affinity and are more influenced by the popularity of a table
    """

#     logging.debug("Start of GSDMM model for job titles")

    np.random.seed(SEED)
    mgp = MovieGroupProcess(K=K, alpha=alpha, beta=beta, n_iters=n_iters)

    """
        MovieGroupProcess().fit: Cluster the input documents
        :param docs: list of list
            list of lists containing the unique token set of each document
        :param V: total vocabulary size for each document
        :return: list of length len(doc)
            cluster label for each document
    """
    mgp_labels = mgp.fit(docs=corpus, vocab_size=vocab_size)

#     logging.debug("End of GSDMM model for job titles")

    return mgp, mgp_labels

In [9]:
# Define search parameters
k_range = [50, 100, 150]
alpha_range = [0.01, 0.25, 0.5, 0.75, 1.0]
beta_range = [0.01, 0.25, 0.5, 0.75, 1.0]
n_iter_range = [50, 100]
# k_range = [5]
# alpha_range = [0.5]
# beta_range = [0.5]
# n_iter_range = [5]

print(
    "Total hyperparameters in search space =",
    str(len(k_range) * len(alpha_range) * len(beta_range) * len(n_iter_range)),
)

Total hyperparameters in search space = 150


In [10]:
# # Set up logger for callback

# # Creating logger
# mylogs = logging.getLogger(__name__)
# mylogs.setLevel(logging.NOTSET)

# # Handler - log in file
# log_file = logging.FileHandler(OUTPUT_DIR + "title_gsdmm_model_callbacks.log")
# fileformat = logging.Formatter("%(asctime)s:%(levelname)s:%(message)s")
# log_file.setLevel(logging.NOTSET)
# log_file.setFormatter(fileformat)

# # Handler - log in console
# # stream = logging.StreamHandler()
# # streamformat = logging.Formatter("%(levelname)s:%(module)s:%(message)s")
# # stream.setLevel(logging.NOTSET)
# # stream.setFormatter(streamformat)

# # Adding all handlers to the logs
# mylogs.addHandler(log_file)
# # mylogs.addHandler(stream)
# print = mylogs.info

In [None]:
# # Set up logger for callback
# logging.basicConfig(
# #     filename=OUTPUT_DIR + "skills_lda_model_callbacks.log",
#     handlers=[
#         logging.FileHandler(
#             OUTPUT_DIR + "title_gsdmm_model_callbacks.log", "w", "utf-8"
#         )
#     ],
#     format="%(asctime)s:%(levelname)s:%(message)s",
#     level=logging.NOTSET,
# )

# Create empty list to store labels created by models
labels_list = []
model_list = []
duration_list = []
model_result = {"K": [], "alpha": [], "beta": [], "n_iters": [], "Silhouette": []}

start_loop_time = datetime.now()
print(f"Start loop {start_loop_time.strftime('%Y-%m-%d %H:%M:%S.%f')}")

# Loop through the search space
tqdm_bar = tqdm(
    desc="Tuning GSDMM model",
    total=len(k_range) * len(alpha_range) * len(beta_range) * len(n_iter_range),
)

for K in k_range:
    for alpha in alpha_range:
        for beta in beta_range:
            for n_iters in n_iter_range:
                print(f"K = {K}")
                print(f"alpha = {alpha}")
                print(f"beta = {beta}")
                print(f"n_iters = {n_iters}")

                starttime = datetime.now()
                print(f"Start {starttime.strftime('%Y-%m-%d %H:%M:%S.%f')}")

                mgp_model, mgp_labels = build_mgp_model(
                    X=title_docs,
                    corpus=title_corpus,
                    vocab_size=len(title_vocab),
                    K=K,
                    alpha=alpha,
                    beta=beta,
                    n_iters=n_iters,
                    seed=SEED,
                )

                endtime = datetime.now()
                print(f"End {endtime.strftime('%Y-%m-%d %H:%M:%S.%f')}")
                print(f"Duration {endtime - starttime}")

                duration_list.append(round((endtime - starttime).seconds / 60, 2))
                labels_list.append(mgp_labels)
                model_list.append(mgp_model)

                model_result["K"].append(K)
                model_result["alpha"].append(alpha)
                model_result["beta"].append(beta)
                model_result["n_iters"].append(n_iters)

                tqdm_bar.update(1)

tqdm_bar.close()

end_loop_time = datetime.now()
print(f"End loop {end_loop_time.strftime('%Y-%m-%d %H:%M:%S.%f')}")
print(f"Duration {end_loop_time - start_loop_time}")

Tuning GSDMM model:   0%|                                                                                       | 0/150 [00:00<?, ?it/s]

Start loop 2021-05-04 02:27:01.723086
K = 50
alpha = 0.01
beta = 0.01
n_iters = 50
Start 2021-05-04 02:27:01.762087
In stage 0: transferred 17800 clusters with 50 clusters populated
In stage 1: transferred 13725 clusters with 50 clusters populated
In stage 2: transferred 9785 clusters with 50 clusters populated
In stage 3: transferred 7733 clusters with 50 clusters populated
In stage 4: transferred 6614 clusters with 50 clusters populated
In stage 5: transferred 6051 clusters with 50 clusters populated
In stage 6: transferred 5645 clusters with 50 clusters populated
In stage 7: transferred 5502 clusters with 50 clusters populated
In stage 8: transferred 5408 clusters with 50 clusters populated
In stage 9: transferred 5341 clusters with 50 clusters populated
In stage 10: transferred 5269 clusters with 50 clusters populated
In stage 11: transferred 5126 clusters with 50 clusters populated
In stage 12: transferred 5048 clusters with 50 clusters populated
In stage 13: transferred 4941 clus

Tuning GSDMM model:   1%|▌                                                                          | 1/150 [10:37<26:24:14, 637.95s/it]

In stage 49: transferred 4316 clusters with 50 clusters populated
End 2021-05-04 02:37:39.707631
Duration 0:10:37.945544
K = 50
alpha = 0.01
beta = 0.01
n_iters = 100
Start 2021-05-04 02:37:39.708631
In stage 0: transferred 17800 clusters with 50 clusters populated
In stage 1: transferred 13725 clusters with 50 clusters populated
In stage 2: transferred 9785 clusters with 50 clusters populated
In stage 3: transferred 7733 clusters with 50 clusters populated
In stage 4: transferred 6614 clusters with 50 clusters populated
In stage 5: transferred 6051 clusters with 50 clusters populated
In stage 6: transferred 5645 clusters with 50 clusters populated
In stage 7: transferred 5502 clusters with 50 clusters populated
In stage 8: transferred 5408 clusters with 50 clusters populated
In stage 9: transferred 5341 clusters with 50 clusters populated
In stage 10: transferred 5269 clusters with 50 clusters populated
In stage 11: transferred 5126 clusters with 50 clusters populated
In stage 12: tra

Tuning GSDMM model:   1%|█                                                                          | 2/150 [30:31<39:40:17, 964.98s/it]

In stage 99: transferred 4122 clusters with 50 clusters populated
End 2021-05-04 02:57:33.616145
Duration 0:19:53.907514
K = 50
alpha = 0.01
beta = 0.25
n_iters = 50
Start 2021-05-04 02:57:33.618143
In stage 0: transferred 18271 clusters with 50 clusters populated
In stage 1: transferred 14886 clusters with 50 clusters populated
In stage 2: transferred 10067 clusters with 50 clusters populated
In stage 3: transferred 7407 clusters with 50 clusters populated
In stage 4: transferred 6059 clusters with 49 clusters populated
In stage 5: transferred 5341 clusters with 46 clusters populated
In stage 6: transferred 4925 clusters with 44 clusters populated
In stage 7: transferred 4893 clusters with 42 clusters populated
In stage 8: transferred 4577 clusters with 39 clusters populated
In stage 9: transferred 4597 clusters with 38 clusters populated
In stage 10: transferred 4439 clusters with 38 clusters populated
In stage 11: transferred 4450 clusters with 36 clusters populated
In stage 12: tra

Tuning GSDMM model:   2%|█▌                                                                         | 3/150 [40:24<32:27:42, 794.98s/it]

In stage 49: transferred 3868 clusters with 28 clusters populated
End 2021-05-04 03:07:26.299345
Duration 0:09:52.681202
K = 50
alpha = 0.01
beta = 0.25
n_iters = 100
Start 2021-05-04 03:07:26.300308
In stage 0: transferred 18271 clusters with 50 clusters populated
In stage 1: transferred 14886 clusters with 50 clusters populated
In stage 2: transferred 10067 clusters with 50 clusters populated
In stage 3: transferred 7407 clusters with 50 clusters populated
In stage 4: transferred 6059 clusters with 49 clusters populated
In stage 5: transferred 5341 clusters with 46 clusters populated
In stage 6: transferred 4925 clusters with 44 clusters populated
In stage 7: transferred 4893 clusters with 42 clusters populated
In stage 8: transferred 4577 clusters with 39 clusters populated
In stage 9: transferred 4597 clusters with 38 clusters populated
In stage 10: transferred 4439 clusters with 38 clusters populated
In stage 11: transferred 4450 clusters with 36 clusters populated
In stage 12: tr

Tuning GSDMM model:   3%|██                                                                         | 4/150 [58:58<37:20:51, 920.90s/it]

In stage 98: transferred 3681 clusters with 25 clusters populated
In stage 99: transferred 3677 clusters with 25 clusters populated
End 2021-05-04 03:26:00.239927
Duration 0:18:33.939619
K = 50
alpha = 0.01
beta = 0.5
n_iters = 50
Start 2021-05-04 03:26:00.241929
In stage 0: transferred 18362 clusters with 50 clusters populated
In stage 1: transferred 15056 clusters with 50 clusters populated
In stage 2: transferred 9965 clusters with 50 clusters populated
In stage 3: transferred 7100 clusters with 38 clusters populated
In stage 4: transferred 5756 clusters with 27 clusters populated
In stage 5: transferred 5125 clusters with 24 clusters populated
In stage 6: transferred 4669 clusters with 22 clusters populated
In stage 7: transferred 4484 clusters with 21 clusters populated
In stage 8: transferred 4192 clusters with 18 clusters populated
In stage 9: transferred 4064 clusters with 18 clusters populated
In stage 10: transferred 3954 clusters with 17 clusters populated
In stage 11: trans

Tuning GSDMM model:   3%|██▍                                                                      | 5/150 [1:06:06<29:55:48, 743.09s/it]

In stage 49: transferred 3391 clusters with 14 clusters populated
End 2021-05-04 03:33:08.061177
Duration 0:07:07.819248
K = 50
alpha = 0.01
beta = 0.5
n_iters = 100
Start 2021-05-04 03:33:08.063156
In stage 0: transferred 18362 clusters with 50 clusters populated
In stage 1: transferred 15056 clusters with 50 clusters populated
In stage 2: transferred 9965 clusters with 50 clusters populated
In stage 3: transferred 7100 clusters with 38 clusters populated
In stage 4: transferred 5756 clusters with 27 clusters populated
In stage 5: transferred 5125 clusters with 24 clusters populated
In stage 6: transferred 4669 clusters with 22 clusters populated
In stage 7: transferred 4484 clusters with 21 clusters populated
In stage 8: transferred 4192 clusters with 18 clusters populated
In stage 9: transferred 4064 clusters with 18 clusters populated
In stage 10: transferred 3954 clusters with 17 clusters populated
In stage 11: transferred 3850 clusters with 17 clusters populated
In stage 12: tran

Tuning GSDMM model:   4%|██▉                                                                      | 6/150 [1:19:45<30:45:20, 768.89s/it]

In stage 99: transferred 3437 clusters with 13 clusters populated
End 2021-05-04 03:46:47.030554
Duration 0:13:38.967398
K = 50
alpha = 0.01
beta = 0.75
n_iters = 50
Start 2021-05-04 03:46:47.032552
In stage 0: transferred 18453 clusters with 50 clusters populated
In stage 1: transferred 14939 clusters with 50 clusters populated
In stage 2: transferred 8148 clusters with 47 clusters populated
In stage 3: transferred 5030 clusters with 32 clusters populated
In stage 4: transferred 3985 clusters with 17 clusters populated
In stage 5: transferred 3612 clusters with 15 clusters populated
In stage 6: transferred 3255 clusters with 15 clusters populated
In stage 7: transferred 3191 clusters with 14 clusters populated
In stage 8: transferred 3199 clusters with 13 clusters populated
In stage 9: transferred 3146 clusters with 13 clusters populated
In stage 10: transferred 3096 clusters with 13 clusters populated
In stage 11: transferred 3114 clusters with 13 clusters populated
In stage 12: tran

Tuning GSDMM model:   5%|███▍                                                                     | 7/150 [1:26:32<25:51:06, 650.81s/it]

In stage 49: transferred 2821 clusters with 10 clusters populated
End 2021-05-04 03:53:34.734788
Duration 0:06:47.702236
K = 50
alpha = 0.01
beta = 0.75
n_iters = 100
Start 2021-05-04 03:53:34.735827
In stage 0: transferred 18453 clusters with 50 clusters populated
In stage 1: transferred 14939 clusters with 50 clusters populated
In stage 2: transferred 8148 clusters with 47 clusters populated
In stage 3: transferred 5030 clusters with 32 clusters populated
In stage 4: transferred 3985 clusters with 17 clusters populated
In stage 5: transferred 3612 clusters with 15 clusters populated
In stage 6: transferred 3255 clusters with 15 clusters populated
In stage 7: transferred 3191 clusters with 14 clusters populated
In stage 8: transferred 3199 clusters with 13 clusters populated
In stage 9: transferred 3146 clusters with 13 clusters populated
In stage 10: transferred 3096 clusters with 13 clusters populated
In stage 11: transferred 3114 clusters with 13 clusters populated
In stage 12: tra

Tuning GSDMM model:   5%|███▉                                                                     | 8/150 [1:40:04<27:41:07, 701.89s/it]

In stage 99: transferred 3001 clusters with 9 clusters populated
End 2021-05-04 04:07:05.979163
Duration 0:13:31.243336
K = 50
alpha = 0.01
beta = 1.0
n_iters = 50
Start 2021-05-04 04:07:05.980210
In stage 0: transferred 18451 clusters with 50 clusters populated
In stage 1: transferred 15369 clusters with 50 clusters populated
In stage 2: transferred 8549 clusters with 47 clusters populated
In stage 3: transferred 5339 clusters with 29 clusters populated
In stage 4: transferred 4252 clusters with 17 clusters populated
In stage 5: transferred 3795 clusters with 14 clusters populated
In stage 6: transferred 3563 clusters with 13 clusters populated
In stage 7: transferred 3362 clusters with 9 clusters populated
In stage 8: transferred 3292 clusters with 9 clusters populated
In stage 9: transferred 3188 clusters with 9 clusters populated
In stage 10: transferred 3125 clusters with 9 clusters populated
In stage 11: transferred 3025 clusters with 9 clusters populated
In stage 12: transferred

Tuning GSDMM model:   6%|████▍                                                                    | 9/150 [1:46:43<23:47:25, 607.42s/it]

In stage 49: transferred 2234 clusters with 7 clusters populated
End 2021-05-04 04:13:45.672396
Duration 0:06:39.692186
K = 50
alpha = 0.01
beta = 1.0
n_iters = 100
Start 2021-05-04 04:13:45.673374
In stage 0: transferred 18451 clusters with 50 clusters populated
In stage 1: transferred 15369 clusters with 50 clusters populated
In stage 2: transferred 8549 clusters with 47 clusters populated
In stage 3: transferred 5339 clusters with 29 clusters populated
In stage 4: transferred 4252 clusters with 17 clusters populated
In stage 5: transferred 3795 clusters with 14 clusters populated
In stage 6: transferred 3563 clusters with 13 clusters populated
In stage 7: transferred 3362 clusters with 9 clusters populated
In stage 8: transferred 3292 clusters with 9 clusters populated
In stage 9: transferred 3188 clusters with 9 clusters populated
In stage 10: transferred 3125 clusters with 9 clusters populated
In stage 11: transferred 3025 clusters with 9 clusters populated
In stage 12: transferre

Tuning GSDMM model:   7%|████▊                                                                   | 10/150 [1:59:58<25:52:16, 665.26s/it]

In stage 99: transferred 2332 clusters with 7 clusters populated
End 2021-05-04 04:27:00.458793
Duration 0:13:14.785419
K = 50
alpha = 0.25
beta = 0.01
n_iters = 50
Start 2021-05-04 04:27:00.459758
In stage 0: transferred 17800 clusters with 50 clusters populated
In stage 1: transferred 13784 clusters with 50 clusters populated
In stage 2: transferred 9866 clusters with 50 clusters populated
In stage 3: transferred 7809 clusters with 50 clusters populated
In stage 4: transferred 6711 clusters with 50 clusters populated
In stage 5: transferred 6206 clusters with 50 clusters populated
In stage 6: transferred 5879 clusters with 50 clusters populated
In stage 7: transferred 5756 clusters with 50 clusters populated
In stage 8: transferred 5559 clusters with 50 clusters populated
In stage 9: transferred 5419 clusters with 50 clusters populated
In stage 10: transferred 5384 clusters with 50 clusters populated
In stage 11: transferred 5265 clusters with 50 clusters populated
In stage 12: trans

Tuning GSDMM model:   7%|█████▎                                                                  | 11/150 [2:06:44<22:37:19, 585.90s/it]

In stage 49: transferred 4544 clusters with 50 clusters populated
End 2021-05-04 04:33:46.398996
Duration 0:06:45.939238
K = 50
alpha = 0.25
beta = 0.01
n_iters = 100
Start 2021-05-04 04:33:46.400962
In stage 0: transferred 17800 clusters with 50 clusters populated
In stage 1: transferred 13784 clusters with 50 clusters populated
In stage 2: transferred 9866 clusters with 50 clusters populated
In stage 3: transferred 7809 clusters with 50 clusters populated
In stage 4: transferred 6711 clusters with 50 clusters populated
In stage 5: transferred 6206 clusters with 50 clusters populated
In stage 6: transferred 5879 clusters with 50 clusters populated
In stage 7: transferred 5756 clusters with 50 clusters populated
In stage 8: transferred 5559 clusters with 50 clusters populated
In stage 9: transferred 5419 clusters with 50 clusters populated
In stage 10: transferred 5384 clusters with 50 clusters populated
In stage 11: transferred 5265 clusters with 50 clusters populated
In stage 12: tra

Tuning GSDMM model:   8%|█████▊                                                                  | 12/150 [2:20:16<25:05:43, 654.66s/it]

In stage 99: transferred 4445 clusters with 50 clusters populated
End 2021-05-04 04:47:18.346840
Duration 0:13:31.945878
K = 50
alpha = 0.25
beta = 0.25
n_iters = 50
Start 2021-05-04 04:47:18.346840
In stage 0: transferred 18271 clusters with 50 clusters populated
In stage 1: transferred 14903 clusters with 50 clusters populated
In stage 2: transferred 10078 clusters with 50 clusters populated
In stage 3: transferred 7516 clusters with 50 clusters populated
In stage 4: transferred 6117 clusters with 47 clusters populated
In stage 5: transferred 5489 clusters with 45 clusters populated
In stage 6: transferred 5094 clusters with 43 clusters populated
In stage 7: transferred 4755 clusters with 41 clusters populated
In stage 8: transferred 4557 clusters with 41 clusters populated
In stage 9: transferred 4438 clusters with 37 clusters populated
In stage 10: transferred 4247 clusters with 38 clusters populated
In stage 11: transferred 4219 clusters with 38 clusters populated
In stage 12: tra

Tuning GSDMM model:   9%|██████▏                                                                 | 13/150 [2:27:07<22:06:17, 580.86s/it]

In stage 49: transferred 3831 clusters with 23 clusters populated
End 2021-05-04 04:54:09.368279
Duration 0:06:51.021439
K = 50
alpha = 0.25
beta = 0.25
n_iters = 100
Start 2021-05-04 04:54:09.370283
In stage 0: transferred 18271 clusters with 50 clusters populated
In stage 1: transferred 14903 clusters with 50 clusters populated
In stage 2: transferred 10078 clusters with 50 clusters populated
In stage 3: transferred 7516 clusters with 50 clusters populated
In stage 4: transferred 6117 clusters with 47 clusters populated
In stage 5: transferred 5489 clusters with 45 clusters populated
In stage 6: transferred 5094 clusters with 43 clusters populated
In stage 7: transferred 4755 clusters with 41 clusters populated
In stage 8: transferred 4557 clusters with 41 clusters populated
In stage 9: transferred 4438 clusters with 37 clusters populated
In stage 10: transferred 4247 clusters with 38 clusters populated
In stage 11: transferred 4219 clusters with 38 clusters populated
In stage 12: tr

Tuning GSDMM model:   9%|██████▋                                                                 | 14/150 [2:40:44<24:37:52, 652.00s/it]

In stage 99: transferred 3813 clusters with 21 clusters populated
End 2021-05-04 05:07:45.778536
Duration 0:13:36.408253
K = 50
alpha = 0.25
beta = 0.5
n_iters = 50
Start 2021-05-04 05:07:45.779546
In stage 0: transferred 18377 clusters with 50 clusters populated
In stage 1: transferred 15074 clusters with 50 clusters populated
In stage 2: transferred 9895 clusters with 50 clusters populated
In stage 3: transferred 7036 clusters with 40 clusters populated
In stage 4: transferred 5726 clusters with 32 clusters populated
In stage 5: transferred 5128 clusters with 31 clusters populated
In stage 6: transferred 4829 clusters with 28 clusters populated
In stage 7: transferred 4578 clusters with 26 clusters populated
In stage 8: transferred 4419 clusters with 23 clusters populated
In stage 9: transferred 4291 clusters with 22 clusters populated
In stage 10: transferred 4162 clusters with 21 clusters populated
In stage 11: transferred 4077 clusters with 21 clusters populated
In stage 12: trans

Tuning GSDMM model:  10%|███████▏                                                                | 15/150 [2:47:33<21:42:15, 578.78s/it]

In stage 49: transferred 3624 clusters with 15 clusters populated
End 2021-05-04 05:14:34.876741
Duration 0:06:49.097195
K = 50
alpha = 0.25
beta = 0.5
n_iters = 100
Start 2021-05-04 05:14:34.877743
In stage 0: transferred 18377 clusters with 50 clusters populated
In stage 1: transferred 15074 clusters with 50 clusters populated
In stage 2: transferred 9895 clusters with 50 clusters populated
In stage 3: transferred 7036 clusters with 40 clusters populated
In stage 4: transferred 5726 clusters with 32 clusters populated
In stage 5: transferred 5128 clusters with 31 clusters populated
In stage 6: transferred 4829 clusters with 28 clusters populated
In stage 7: transferred 4578 clusters with 26 clusters populated
In stage 8: transferred 4419 clusters with 23 clusters populated
In stage 9: transferred 4291 clusters with 22 clusters populated
In stage 10: transferred 4162 clusters with 21 clusters populated
In stage 11: transferred 4077 clusters with 21 clusters populated
In stage 12: tran

Tuning GSDMM model:  11%|███████▋                                                                | 16/150 [3:01:07<24:10:42, 649.57s/it]

In stage 99: transferred 3457 clusters with 14 clusters populated
End 2021-05-04 05:28:08.831183
Duration 0:13:33.953440
K = 50
alpha = 0.25
beta = 0.75
n_iters = 50
Start 2021-05-04 05:28:08.833168
In stage 0: transferred 18450 clusters with 50 clusters populated
In stage 1: transferred 15052 clusters with 50 clusters populated
In stage 2: transferred 8376 clusters with 48 clusters populated
In stage 3: transferred 5439 clusters with 31 clusters populated
In stage 4: transferred 4409 clusters with 27 clusters populated
In stage 5: transferred 3873 clusters with 22 clusters populated
In stage 6: transferred 3496 clusters with 17 clusters populated
In stage 7: transferred 3475 clusters with 18 clusters populated
In stage 8: transferred 3401 clusters with 19 clusters populated
In stage 9: transferred 3363 clusters with 17 clusters populated
In stage 10: transferred 3291 clusters with 18 clusters populated
In stage 11: transferred 3209 clusters with 16 clusters populated
In stage 12: tran

Tuning GSDMM model:  11%|████████▏                                                               | 17/150 [3:07:53<21:18:08, 576.60s/it]

In stage 49: transferred 3167 clusters with 14 clusters populated
End 2021-05-04 05:34:55.740792
Duration 0:06:46.907624
K = 50
alpha = 0.25
beta = 0.75
n_iters = 100
Start 2021-05-04 05:34:55.741790
In stage 0: transferred 18450 clusters with 50 clusters populated
In stage 1: transferred 15052 clusters with 50 clusters populated
In stage 2: transferred 8376 clusters with 48 clusters populated
In stage 3: transferred 5439 clusters with 31 clusters populated
In stage 4: transferred 4409 clusters with 27 clusters populated
In stage 5: transferred 3873 clusters with 22 clusters populated
In stage 6: transferred 3496 clusters with 17 clusters populated
In stage 7: transferred 3475 clusters with 18 clusters populated
In stage 8: transferred 3401 clusters with 19 clusters populated
In stage 9: transferred 3363 clusters with 17 clusters populated
In stage 10: transferred 3291 clusters with 18 clusters populated
In stage 11: transferred 3209 clusters with 16 clusters populated
In stage 12: tra

Tuning GSDMM model:  12%|████████▋                                                               | 18/150 [3:21:24<23:43:26, 647.02s/it]

In stage 99: transferred 3242 clusters with 12 clusters populated
End 2021-05-04 05:48:26.692157
Duration 0:13:30.950367
K = 50
alpha = 0.25
beta = 1.0
n_iters = 50
Start 2021-05-04 05:48:26.693158
In stage 0: transferred 18451 clusters with 50 clusters populated
In stage 1: transferred 15383 clusters with 50 clusters populated
In stage 2: transferred 8588 clusters with 49 clusters populated
In stage 3: transferred 5402 clusters with 26 clusters populated
In stage 4: transferred 4293 clusters with 17 clusters populated
In stage 5: transferred 3745 clusters with 13 clusters populated
In stage 6: transferred 3570 clusters with 15 clusters populated
In stage 7: transferred 3343 clusters with 12 clusters populated
In stage 8: transferred 3275 clusters with 12 clusters populated
In stage 9: transferred 3227 clusters with 10 clusters populated
In stage 10: transferred 3180 clusters with 9 clusters populated
In stage 11: transferred 3079 clusters with 11 clusters populated
In stage 12: transf

Tuning GSDMM model:  13%|█████████                                                               | 19/150 [3:27:59<20:47:20, 571.30s/it]

In stage 49: transferred 2243 clusters with 12 clusters populated
End 2021-05-04 05:55:01.598354
Duration 0:06:34.905196
K = 50
alpha = 0.25
beta = 1.0
n_iters = 100
Start 2021-05-04 05:55:01.600377
In stage 0: transferred 18451 clusters with 50 clusters populated
In stage 1: transferred 15383 clusters with 50 clusters populated
In stage 2: transferred 8588 clusters with 49 clusters populated
In stage 3: transferred 5402 clusters with 26 clusters populated
In stage 4: transferred 4293 clusters with 17 clusters populated
In stage 5: transferred 3745 clusters with 13 clusters populated
In stage 6: transferred 3570 clusters with 15 clusters populated
In stage 7: transferred 3343 clusters with 12 clusters populated
In stage 8: transferred 3275 clusters with 12 clusters populated
In stage 9: transferred 3227 clusters with 10 clusters populated
In stage 10: transferred 3180 clusters with 9 clusters populated
In stage 11: transferred 3079 clusters with 11 clusters populated
In stage 12: trans

Tuning GSDMM model:  13%|█████████▌                                                              | 20/150 [3:41:20<23:07:10, 640.23s/it]

In stage 99: transferred 2289 clusters with 13 clusters populated
End 2021-05-04 06:08:22.494755
Duration 0:13:20.894378
K = 50
alpha = 0.5
beta = 0.01
n_iters = 50
Start 2021-05-04 06:08:22.495771
In stage 0: transferred 17799 clusters with 50 clusters populated
In stage 1: transferred 13677 clusters with 50 clusters populated
In stage 2: transferred 9905 clusters with 50 clusters populated
In stage 3: transferred 7859 clusters with 50 clusters populated
In stage 4: transferred 6771 clusters with 50 clusters populated
In stage 5: transferred 6157 clusters with 50 clusters populated
In stage 6: transferred 5805 clusters with 50 clusters populated
In stage 7: transferred 5665 clusters with 50 clusters populated
In stage 8: transferred 5538 clusters with 50 clusters populated
In stage 9: transferred 5405 clusters with 50 clusters populated
In stage 10: transferred 5352 clusters with 50 clusters populated
In stage 11: transferred 5274 clusters with 50 clusters populated
In stage 12: trans

Tuning GSDMM model:  14%|██████████                                                              | 21/150 [3:48:09<20:27:21, 570.86s/it]

In stage 49: transferred 4300 clusters with 50 clusters populated
End 2021-05-04 06:15:11.611959
Duration 0:06:49.116188
K = 50
alpha = 0.5
beta = 0.01
n_iters = 100
Start 2021-05-04 06:15:11.613963
In stage 0: transferred 17799 clusters with 50 clusters populated
In stage 1: transferred 13677 clusters with 50 clusters populated
In stage 2: transferred 9905 clusters with 50 clusters populated
In stage 3: transferred 7859 clusters with 50 clusters populated
In stage 4: transferred 6771 clusters with 50 clusters populated
In stage 5: transferred 6157 clusters with 50 clusters populated
In stage 6: transferred 5805 clusters with 50 clusters populated
In stage 7: transferred 5665 clusters with 50 clusters populated
In stage 8: transferred 5538 clusters with 50 clusters populated
In stage 9: transferred 5405 clusters with 50 clusters populated
In stage 10: transferred 5352 clusters with 50 clusters populated
In stage 11: transferred 5274 clusters with 50 clusters populated
In stage 12: tran

Tuning GSDMM model:  15%|██████████▌                                                             | 22/150 [4:01:40<22:51:26, 642.87s/it]

In stage 99: transferred 4107 clusters with 50 clusters populated
End 2021-05-04 06:28:42.400829
Duration 0:13:30.786866
K = 50
alpha = 0.5
beta = 0.25
n_iters = 50
Start 2021-05-04 06:28:42.401811
In stage 0: transferred 18275 clusters with 50 clusters populated
In stage 1: transferred 15021 clusters with 50 clusters populated
In stage 2: transferred 10220 clusters with 50 clusters populated
In stage 3: transferred 7532 clusters with 50 clusters populated
In stage 4: transferred 6134 clusters with 47 clusters populated
In stage 5: transferred 5490 clusters with 44 clusters populated
In stage 6: transferred 5118 clusters with 42 clusters populated
In stage 7: transferred 4778 clusters with 40 clusters populated
In stage 8: transferred 4500 clusters with 41 clusters populated
In stage 9: transferred 4429 clusters with 40 clusters populated
In stage 10: transferred 4320 clusters with 39 clusters populated
In stage 11: transferred 4248 clusters with 38 clusters populated
In stage 12: tran

Tuning GSDMM model:  15%|███████████                                                             | 23/150 [4:08:31<20:13:30, 573.31s/it]

In stage 49: transferred 3969 clusters with 34 clusters populated
End 2021-05-04 06:35:33.485016
Duration 0:06:51.083205
K = 50
alpha = 0.5
beta = 0.25
n_iters = 100
Start 2021-05-04 06:35:33.486017
In stage 0: transferred 18275 clusters with 50 clusters populated
In stage 1: transferred 15021 clusters with 50 clusters populated
In stage 2: transferred 10220 clusters with 50 clusters populated
In stage 3: transferred 7532 clusters with 50 clusters populated
In stage 4: transferred 6134 clusters with 47 clusters populated
In stage 5: transferred 5490 clusters with 44 clusters populated
In stage 6: transferred 5118 clusters with 42 clusters populated
In stage 7: transferred 4778 clusters with 40 clusters populated
In stage 8: transferred 4500 clusters with 41 clusters populated
In stage 9: transferred 4429 clusters with 40 clusters populated
In stage 10: transferred 4320 clusters with 39 clusters populated
In stage 11: transferred 4248 clusters with 38 clusters populated
In stage 12: tra

Tuning GSDMM model:  16%|███████████▌                                                            | 24/150 [4:22:13<22:40:28, 647.84s/it]

In stage 99: transferred 4023 clusters with 30 clusters populated
End 2021-05-04 06:49:15.189030
Duration 0:13:41.703013
K = 50
alpha = 0.5
beta = 0.5
n_iters = 50
Start 2021-05-04 06:49:15.191027
In stage 0: transferred 18370 clusters with 50 clusters populated
In stage 1: transferred 15142 clusters with 50 clusters populated
In stage 2: transferred 9862 clusters with 49 clusters populated
In stage 3: transferred 6759 clusters with 41 clusters populated
In stage 4: transferred 5338 clusters with 35 clusters populated
In stage 5: transferred 4938 clusters with 29 clusters populated
In stage 6: transferred 4753 clusters with 22 clusters populated
In stage 7: transferred 4535 clusters with 20 clusters populated
In stage 8: transferred 4364 clusters with 22 clusters populated
In stage 9: transferred 4267 clusters with 24 clusters populated
In stage 10: transferred 4205 clusters with 21 clusters populated
In stage 11: transferred 4071 clusters with 23 clusters populated
In stage 12: transf

Tuning GSDMM model:  17%|████████████                                                            | 25/150 [4:29:00<19:59:14, 575.64s/it]

In stage 49: transferred 3533 clusters with 22 clusters populated
End 2021-05-04 06:56:02.380265
Duration 0:06:47.189238
K = 50
alpha = 0.5
beta = 0.5
n_iters = 100
Start 2021-05-04 06:56:02.382252
In stage 0: transferred 18370 clusters with 50 clusters populated
In stage 1: transferred 15142 clusters with 50 clusters populated
In stage 2: transferred 9862 clusters with 49 clusters populated
In stage 3: transferred 6759 clusters with 41 clusters populated
In stage 4: transferred 5338 clusters with 35 clusters populated
In stage 5: transferred 4938 clusters with 29 clusters populated
In stage 6: transferred 4753 clusters with 22 clusters populated
In stage 7: transferred 4535 clusters with 20 clusters populated
In stage 8: transferred 4364 clusters with 22 clusters populated
In stage 9: transferred 4267 clusters with 24 clusters populated
In stage 10: transferred 4205 clusters with 21 clusters populated
In stage 11: transferred 4071 clusters with 23 clusters populated
In stage 12: trans

Tuning GSDMM model:  17%|████████████▍                                                           | 26/150 [4:42:41<22:21:35, 649.16s/it]

In stage 98: transferred 3487 clusters with 15 clusters populated
In stage 99: transferred 3538 clusters with 15 clusters populated
End 2021-05-04 07:09:43.064780
Duration 0:13:40.682528
K = 50
alpha = 0.5
beta = 0.75
n_iters = 50
Start 2021-05-04 07:09:43.065764
In stage 0: transferred 18416 clusters with 50 clusters populated
In stage 1: transferred 15483 clusters with 50 clusters populated
In stage 2: transferred 8670 clusters with 49 clusters populated
In stage 3: transferred 5559 clusters with 34 clusters populated
In stage 4: transferred 4754 clusters with 23 clusters populated
In stage 5: transferred 4317 clusters with 22 clusters populated
In stage 6: transferred 3987 clusters with 19 clusters populated
In stage 7: transferred 3780 clusters with 17 clusters populated
In stage 8: transferred 3717 clusters with 15 clusters populated
In stage 9: transferred 3480 clusters with 17 clusters populated
In stage 10: transferred 3365 clusters with 18 clusters populated
In stage 11: trans

Tuning GSDMM model:  18%|████████████▉                                                           | 27/150 [4:49:31<19:43:48, 577.47s/it]

In stage 49: transferred 3154 clusters with 15 clusters populated
End 2021-05-04 07:16:33.274413
Duration 0:06:50.208649
K = 50
alpha = 0.5
beta = 0.75
n_iters = 100
Start 2021-05-04 07:16:33.276415
In stage 0: transferred 18416 clusters with 50 clusters populated
In stage 1: transferred 15483 clusters with 50 clusters populated
In stage 2: transferred 8670 clusters with 49 clusters populated
In stage 3: transferred 5559 clusters with 34 clusters populated
In stage 4: transferred 4754 clusters with 23 clusters populated
In stage 5: transferred 4317 clusters with 22 clusters populated
In stage 6: transferred 3987 clusters with 19 clusters populated
In stage 7: transferred 3780 clusters with 17 clusters populated
In stage 8: transferred 3717 clusters with 15 clusters populated
In stage 9: transferred 3480 clusters with 17 clusters populated
In stage 10: transferred 3365 clusters with 18 clusters populated
In stage 11: transferred 3375 clusters with 17 clusters populated
In stage 12: tran

Tuning GSDMM model:  19%|█████████████▍                                                          | 28/150 [5:03:07<21:59:27, 648.92s/it]


In stage 99: transferred 3201 clusters with 16 clusters populated
End 2021-05-04 07:30:08.890830
Duration 0:13:35.614415
K = 50
alpha = 0.5
beta = 1.0
n_iters = 50
Start 2021-05-04 07:30:08.892822
In stage 0: transferred 18480 clusters with 50 clusters populated
In stage 1: transferred 15429 clusters with 50 clusters populated
In stage 2: transferred 8910 clusters with 48 clusters populated
In stage 3: transferred 5747 clusters with 27 clusters populated
In stage 4: transferred 4411 clusters with 18 clusters populated
In stage 5: transferred 3750 clusters with 16 clusters populated
In stage 6: transferred 3438 clusters with 14 clusters populated
In stage 7: transferred 3304 clusters with 11 clusters populated
In stage 8: transferred 3261 clusters with 14 clusters populated
In stage 9: transferred 3241 clusters with 13 clusters populated
In stage 10: transferred 3258 clusters with 16 clusters populated
In stage 11: transferred 3198 clusters with 13 clusters populated
In stage 12: trans

Tuning GSDMM model:  19%|█████████████▉                                                          | 29/150 [5:09:45<19:17:09, 573.80s/it]

In stage 49: transferred 2672 clusters with 14 clusters populated
End 2021-05-04 07:36:47.423020
Duration 0:06:38.530198
K = 50
alpha = 0.5
beta = 1.0
n_iters = 100
Start 2021-05-04 07:36:47.425024
In stage 0: transferred 18480 clusters with 50 clusters populated
In stage 1: transferred 15429 clusters with 50 clusters populated
In stage 2: transferred 8910 clusters with 48 clusters populated
In stage 3: transferred 5747 clusters with 27 clusters populated
In stage 4: transferred 4411 clusters with 18 clusters populated
In stage 5: transferred 3750 clusters with 16 clusters populated
In stage 6: transferred 3438 clusters with 14 clusters populated
In stage 7: transferred 3304 clusters with 11 clusters populated
In stage 8: transferred 3261 clusters with 14 clusters populated
In stage 9: transferred 3241 clusters with 13 clusters populated
In stage 10: transferred 3258 clusters with 16 clusters populated
In stage 11: transferred 3198 clusters with 13 clusters populated
In stage 12: trans

Tuning GSDMM model:  20%|██████████████▍                                                         | 30/150 [5:22:51<21:14:46, 637.39s/it]

In stage 99: transferred 2450 clusters with 9 clusters populated
End 2021-05-04 07:49:53.179448
Duration 0:13:05.754424
K = 50
alpha = 0.75
beta = 0.01
n_iters = 50
Start 2021-05-04 07:49:53.181419
In stage 0: transferred 17799 clusters with 50 clusters populated
In stage 1: transferred 13677 clusters with 50 clusters populated
In stage 2: transferred 9844 clusters with 50 clusters populated
In stage 3: transferred 7951 clusters with 50 clusters populated
In stage 4: transferred 6807 clusters with 50 clusters populated
In stage 5: transferred 6326 clusters with 50 clusters populated
In stage 6: transferred 5978 clusters with 50 clusters populated
In stage 7: transferred 5726 clusters with 50 clusters populated
In stage 8: transferred 5494 clusters with 50 clusters populated
In stage 9: transferred 5388 clusters with 50 clusters populated
In stage 10: transferred 5390 clusters with 50 clusters populated
In stage 11: transferred 5321 clusters with 50 clusters populated
In stage 12: trans

Tuning GSDMM model:  21%|██████████████▉                                                         | 31/150 [5:29:34<18:44:42, 567.08s/it]

In stage 49: transferred 4506 clusters with 50 clusters populated
End 2021-05-04 07:56:36.222649
Duration 0:06:43.041230
K = 50
alpha = 0.75
beta = 0.01
n_iters = 100
Start 2021-05-04 07:56:36.224615
In stage 0: transferred 17799 clusters with 50 clusters populated
In stage 1: transferred 13677 clusters with 50 clusters populated
In stage 2: transferred 9844 clusters with 50 clusters populated
In stage 3: transferred 7951 clusters with 50 clusters populated
In stage 4: transferred 6807 clusters with 50 clusters populated
In stage 5: transferred 6326 clusters with 50 clusters populated
In stage 6: transferred 5978 clusters with 50 clusters populated
In stage 7: transferred 5726 clusters with 50 clusters populated
In stage 8: transferred 5494 clusters with 50 clusters populated
In stage 9: transferred 5388 clusters with 50 clusters populated
In stage 10: transferred 5390 clusters with 50 clusters populated
In stage 11: transferred 5321 clusters with 50 clusters populated
In stage 12: tra

Tuning GSDMM model:  21%|███████████████▎                                                        | 32/150 [5:42:56<20:53:55, 637.59s/it]

In stage 99: transferred 4232 clusters with 50 clusters populated
End 2021-05-04 08:09:58.326015
Duration 0:13:22.101400
K = 50
alpha = 0.75
beta = 0.25
n_iters = 50
Start 2021-05-04 08:09:58.327019
In stage 0: transferred 18275 clusters with 50 clusters populated
In stage 1: transferred 15022 clusters with 50 clusters populated
In stage 2: transferred 10234 clusters with 50 clusters populated
In stage 3: transferred 7481 clusters with 49 clusters populated
In stage 4: transferred 6100 clusters with 49 clusters populated
In stage 5: transferred 5445 clusters with 48 clusters populated
In stage 6: transferred 5032 clusters with 43 clusters populated
In stage 7: transferred 4742 clusters with 43 clusters populated
In stage 8: transferred 4625 clusters with 40 clusters populated
In stage 9: transferred 4460 clusters with 39 clusters populated
In stage 10: transferred 4314 clusters with 41 clusters populated
In stage 11: transferred 4341 clusters with 40 clusters populated
In stage 12: tra

Tuning GSDMM model:  22%|███████████████▊                                                        | 33/150 [5:49:48<18:31:25, 569.96s/it]

In stage 48: transferred 3790 clusters with 36 clusters populated
In stage 49: transferred 3889 clusters with 32 clusters populated
End 2021-05-04 08:16:50.483281
Duration 0:06:52.156262
K = 50
alpha = 0.75
beta = 0.25
n_iters = 100
Start 2021-05-04 08:16:50.484226
In stage 0: transferred 18275 clusters with 50 clusters populated
In stage 1: transferred 15022 clusters with 50 clusters populated
In stage 2: transferred 10234 clusters with 50 clusters populated
In stage 3: transferred 7481 clusters with 49 clusters populated
In stage 4: transferred 6100 clusters with 49 clusters populated
In stage 5: transferred 5445 clusters with 48 clusters populated
In stage 6: transferred 5032 clusters with 43 clusters populated
In stage 7: transferred 4742 clusters with 43 clusters populated
In stage 8: transferred 4625 clusters with 40 clusters populated
In stage 9: transferred 4460 clusters with 39 clusters populated
In stage 10: transferred 4314 clusters with 41 clusters populated
In stage 11: tr

Tuning GSDMM model:  23%|████████████████▎                                                       | 34/150 [6:03:28<20:46:38, 644.81s/it]

In stage 98: transferred 3879 clusters with 34 clusters populated
In stage 99: transferred 3868 clusters with 31 clusters populated
End 2021-05-04 08:30:29.950658
Duration 0:13:39.466432
K = 50
alpha = 0.75
beta = 0.5
n_iters = 50
Start 2021-05-04 08:30:29.951638
In stage 0: transferred 18376 clusters with 50 clusters populated
In stage 1: transferred 15156 clusters with 50 clusters populated
In stage 2: transferred 10051 clusters with 50 clusters populated
In stage 3: transferred 7245 clusters with 40 clusters populated
In stage 4: transferred 5961 clusters with 32 clusters populated
In stage 5: transferred 5307 clusters with 31 clusters populated
In stage 6: transferred 4953 clusters with 28 clusters populated
In stage 7: transferred 4794 clusters with 25 clusters populated
In stage 8: transferred 4658 clusters with 23 clusters populated
In stage 9: transferred 4602 clusters with 27 clusters populated
In stage 10: transferred 4355 clusters with 22 clusters populated
In stage 11: tran

Tuning GSDMM model:  23%|████████████████▊                                                       | 35/150 [6:10:18<18:20:50, 574.35s/it]

In stage 49: transferred 3590 clusters with 22 clusters populated
End 2021-05-04 08:37:19.892735
Duration 0:06:49.941097
K = 50
alpha = 0.75
beta = 0.5
n_iters = 100
Start 2021-05-04 08:37:19.894704
In stage 0: transferred 18376 clusters with 50 clusters populated
In stage 1: transferred 15156 clusters with 50 clusters populated
In stage 2: transferred 10051 clusters with 50 clusters populated
In stage 3: transferred 7245 clusters with 40 clusters populated
In stage 4: transferred 5961 clusters with 32 clusters populated
In stage 5: transferred 5307 clusters with 31 clusters populated
In stage 6: transferred 4953 clusters with 28 clusters populated
In stage 7: transferred 4794 clusters with 25 clusters populated
In stage 8: transferred 4658 clusters with 23 clusters populated
In stage 9: transferred 4602 clusters with 27 clusters populated
In stage 10: transferred 4355 clusters with 22 clusters populated
In stage 11: transferred 4240 clusters with 26 clusters populated
In stage 12: tra

Tuning GSDMM model:  24%|█████████████████▎                                                      | 36/150 [6:24:00<20:32:24, 648.64s/it]

In stage 99: transferred 3341 clusters with 25 clusters populated
End 2021-05-04 08:51:01.871554
Duration 0:13:41.976850
K = 50
alpha = 0.75
beta = 0.75
n_iters = 50
Start 2021-05-04 08:51:01.873499
In stage 0: transferred 18434 clusters with 50 clusters populated
In stage 1: transferred 15486 clusters with 50 clusters populated
In stage 2: transferred 8892 clusters with 48 clusters populated
In stage 3: transferred 5981 clusters with 39 clusters populated
In stage 4: transferred 5014 clusters with 20 clusters populated
In stage 5: transferred 4570 clusters with 17 clusters populated
In stage 6: transferred 4382 clusters with 17 clusters populated
In stage 7: transferred 4189 clusters with 16 clusters populated
In stage 8: transferred 4066 clusters with 19 clusters populated
In stage 9: transferred 3935 clusters with 17 clusters populated
In stage 10: transferred 3855 clusters with 16 clusters populated
In stage 11: transferred 3666 clusters with 16 clusters populated
In stage 12: tran

Tuning GSDMM model:  25%|█████████████████▊                                                      | 37/150 [6:30:52<18:08:22, 577.90s/it]

In stage 49: transferred 3339 clusters with 19 clusters populated
End 2021-05-04 08:57:54.715746
Duration 0:06:52.842247
K = 50
alpha = 0.75
beta = 0.75
n_iters = 100
Start 2021-05-04 08:57:54.716707
In stage 0: transferred 18434 clusters with 50 clusters populated
In stage 1: transferred 15486 clusters with 50 clusters populated
In stage 2: transferred 8892 clusters with 48 clusters populated
In stage 3: transferred 5981 clusters with 39 clusters populated
In stage 4: transferred 5014 clusters with 20 clusters populated
In stage 5: transferred 4570 clusters with 17 clusters populated
In stage 6: transferred 4382 clusters with 17 clusters populated
In stage 7: transferred 4189 clusters with 16 clusters populated
In stage 8: transferred 4066 clusters with 19 clusters populated
In stage 9: transferred 3935 clusters with 17 clusters populated
In stage 10: transferred 3855 clusters with 16 clusters populated
In stage 11: transferred 3666 clusters with 16 clusters populated
In stage 12: tra

Tuning GSDMM model:  25%|██████████████████▏                                                     | 38/150 [6:44:36<20:16:16, 651.58s/it]

In stage 98: transferred 3375 clusters with 15 clusters populated
In stage 99: transferred 3225 clusters with 22 clusters populated
End 2021-05-04 09:11:38.198151
Duration 0:13:43.481444
K = 50
alpha = 0.75
beta = 1.0
n_iters = 50
Start 2021-05-04 09:11:38.199118
In stage 0: transferred 18466 clusters with 50 clusters populated
In stage 1: transferred 15418 clusters with 50 clusters populated
In stage 2: transferred 8531 clusters with 47 clusters populated
In stage 3: transferred 5085 clusters with 25 clusters populated
In stage 4: transferred 3971 clusters with 16 clusters populated
In stage 5: transferred 3479 clusters with 16 clusters populated
In stage 6: transferred 3341 clusters with 17 clusters populated
In stage 7: transferred 3272 clusters with 20 clusters populated
In stage 8: transferred 3240 clusters with 17 clusters populated
In stage 9: transferred 3194 clusters with 17 clusters populated
In stage 10: transferred 3163 clusters with 14 clusters populated
In stage 11: trans

Tuning GSDMM model:  26%|██████████████████▋                                                     | 39/150 [6:51:16<17:45:55, 576.18s/it]

In stage 49: transferred 2475 clusters with 13 clusters populated
End 2021-05-04 09:18:18.449318
Duration 0:06:40.250200
K = 50
alpha = 0.75
beta = 1.0
n_iters = 100
Start 2021-05-04 09:18:18.450319
In stage 0: transferred 18466 clusters with 50 clusters populated
In stage 1: transferred 15418 clusters with 50 clusters populated
In stage 2: transferred 8531 clusters with 47 clusters populated
In stage 3: transferred 5085 clusters with 25 clusters populated
In stage 4: transferred 3971 clusters with 16 clusters populated
In stage 5: transferred 3479 clusters with 16 clusters populated
In stage 6: transferred 3341 clusters with 17 clusters populated
In stage 7: transferred 3272 clusters with 20 clusters populated
In stage 8: transferred 3240 clusters with 17 clusters populated
In stage 9: transferred 3194 clusters with 17 clusters populated
In stage 10: transferred 3163 clusters with 14 clusters populated
In stage 11: transferred 3101 clusters with 10 clusters populated
In stage 12: tran

Tuning GSDMM model:  27%|███████████████████▏                                                    | 40/150 [7:04:31<19:36:39, 641.81s/it]

In stage 99: transferred 2133 clusters with 19 clusters populated
End 2021-05-04 09:31:33.403360
Duration 0:13:14.953041
K = 50
alpha = 1.0
beta = 0.01
n_iters = 50
Start 2021-05-04 09:31:33.404360
In stage 0: transferred 17799 clusters with 50 clusters populated
In stage 1: transferred 13752 clusters with 50 clusters populated
In stage 2: transferred 10071 clusters with 50 clusters populated
In stage 3: transferred 7982 clusters with 50 clusters populated
In stage 4: transferred 6920 clusters with 50 clusters populated
In stage 5: transferred 6170 clusters with 50 clusters populated
In stage 6: transferred 5703 clusters with 50 clusters populated
In stage 7: transferred 5579 clusters with 50 clusters populated
In stage 8: transferred 5339 clusters with 50 clusters populated
In stage 9: transferred 5257 clusters with 50 clusters populated
In stage 10: transferred 5083 clusters with 50 clusters populated
In stage 11: transferred 5046 clusters with 50 clusters populated
In stage 12: tran

Tuning GSDMM model:  27%|███████████████████▋                                                    | 41/150 [7:11:17<17:17:27, 571.08s/it]

In stage 49: transferred 4277 clusters with 50 clusters populated
End 2021-05-04 09:38:19.445551
Duration 0:06:46.041191
K = 50
alpha = 1.0
beta = 0.01
n_iters = 100
Start 2021-05-04 09:38:19.447537
In stage 0: transferred 17799 clusters with 50 clusters populated
In stage 1: transferred 13752 clusters with 50 clusters populated
In stage 2: transferred 10071 clusters with 50 clusters populated
In stage 3: transferred 7982 clusters with 50 clusters populated
In stage 4: transferred 6920 clusters with 50 clusters populated
In stage 5: transferred 6170 clusters with 50 clusters populated
In stage 6: transferred 5703 clusters with 50 clusters populated
In stage 7: transferred 5579 clusters with 50 clusters populated
In stage 8: transferred 5339 clusters with 50 clusters populated
In stage 9: transferred 5257 clusters with 50 clusters populated
In stage 10: transferred 5083 clusters with 50 clusters populated
In stage 11: transferred 5046 clusters with 50 clusters populated
In stage 12: tra

Tuning GSDMM model:  28%|████████████████████▏                                                   | 42/150 [7:24:59<19:23:06, 646.18s/it]

In stage 99: transferred 4230 clusters with 50 clusters populated
End 2021-05-04 09:52:00.843926
Duration 0:13:41.396389
K = 50
alpha = 1.0
beta = 0.25
n_iters = 50
Start 2021-05-04 09:52:00.845924
In stage 0: transferred 18278 clusters with 50 clusters populated
In stage 1: transferred 14842 clusters with 50 clusters populated
In stage 2: transferred 9958 clusters with 50 clusters populated
In stage 3: transferred 7111 clusters with 50 clusters populated
In stage 4: transferred 5763 clusters with 47 clusters populated
In stage 5: transferred 4946 clusters with 48 clusters populated
In stage 6: transferred 4620 clusters with 47 clusters populated
In stage 7: transferred 4453 clusters with 46 clusters populated
In stage 8: transferred 4391 clusters with 43 clusters populated
In stage 9: transferred 4236 clusters with 42 clusters populated
In stage 10: transferred 4234 clusters with 38 clusters populated
In stage 11: transferred 4258 clusters with 38 clusters populated
In stage 12: trans

Tuning GSDMM model:  29%|████████████████████▋                                                   | 43/150 [7:31:49<17:06:28, 575.59s/it]

In stage 49: transferred 3761 clusters with 30 clusters populated
End 2021-05-04 09:58:51.736251
Duration 0:06:50.890327
K = 50
alpha = 1.0
beta = 0.25
n_iters = 100
Start 2021-05-04 09:58:51.736251
In stage 0: transferred 18278 clusters with 50 clusters populated
In stage 1: transferred 14842 clusters with 50 clusters populated
In stage 2: transferred 9958 clusters with 50 clusters populated
In stage 3: transferred 7111 clusters with 50 clusters populated
In stage 4: transferred 5763 clusters with 47 clusters populated
In stage 5: transferred 4946 clusters with 48 clusters populated
In stage 6: transferred 4620 clusters with 47 clusters populated
In stage 7: transferred 4453 clusters with 46 clusters populated
In stage 8: transferred 4391 clusters with 43 clusters populated
In stage 9: transferred 4236 clusters with 42 clusters populated
In stage 10: transferred 4234 clusters with 38 clusters populated
In stage 11: transferred 4258 clusters with 38 clusters populated
In stage 12: tran

In [None]:
# np.vstack(
#     (
#         np.unique(gsdmm_cluster_labels, return_counts=True)[0],
#         np.unique(gsdmm_cluster_labels, return_counts=True)[1],
#     )
# ).T

## Tuning hyperparameters

In [None]:
# # Plot the log-likelihood
# plt.figure(figsize=(12, 8))
# plt.plot(n_range, score_list, "-o", color="brown")
# plt.xlabel("Number of Components")
# plt.ylabel("Log-likelihood")
# plt.xticks(n_range)
# plt.title("Average log-likelihood of BGM models")
# plt.grid()

In [None]:
# i = 0
# silho_table = {"K": [], "alpha": [], "beta": [], "n_iters": [], "Silhouette": []}

# for K in k_range:
#     #     print('K =', K)
#     for alpha in alpha_range:
#         #         print('alpha =', alpha)
#         for beta in beta_range:
#             #             print('beta =', beta)
#             for n_iters in n_iter_range:
#                 #                 print('n_iters =', n_iters)
#                 #                 print("i =", i)
#                 silho_score = metrics.silhouette_score(title_embeddings, labels_list[i])
#                 silho_table["K"].append(K)
#                 silho_table["alpha"].append(alpha)
#                 silho_table["beta"].append(beta)
#                 silho_table["n_iters"].append(n_iters)
#                 silho_table["Silhouette"].append(silho_score)
#                 i += 1

# # pd.DataFrame(silho_table)

In [None]:
# Calculate Silhouette score
for idx, _ in tqdm(enumerate(model_list)):
    silho_score = metrics.silhouette_score(title_embeddings, labels_list[idx])
    model_result["Silhouette"].append(silho_score)

In [None]:
def get_average_score(models, docs):
    avg_score_list = []
    for model in models:
        score_list = []
        for doc in docs:
            score = model.score(doc)
            score_list.append(score)
#             print(score_list)
            avg_score = np.mean(score_list)
#             print(avg_score)
        avg_score_list.append(avg_score)
    return avg_score_list

In [None]:
# Create average score
avg_scores = get_average_score(model_list, title_docs)
# print(avg_scores)
model_result['Average Score'] = avg_scores

In [None]:
pd.DataFrame(model_result)
# model_result

In [None]:
# Save tuning results to file
pd.DataFrame(model_result).to_csv(
    OUTPUT_DIR + "job-title_gsdmm-tuning-results.csv", index=False
)

In [None]:
# Combine model sets
tuple_objects = (
    k_range,
    alpha_range,
    beta_range,
    model_list,
    duration_list,
    labels_list,
    model_result
)
len(tuple_objects), len(tuple_objects[0])

In [None]:
# Save tuple
pickle.dump(tuple_objects, open(OUTPUT_DIR + "title_gsdmm_tuning_results.pkl", "wb"))

In [None]:
# Select best model base on Silhouette score
best_index = np.argmax(model_result["Silhouette"])
# best_score = silho_table["Silhouette"][best_index]
best_n = len(np.unique(labels_list[best_index]))
best_model = model_list[best_index]
best_labels = labels_list[best_index]
best_params = [(key, value[best_index]) for key, value in model_result.items()]
best_index, best_n, best_params

In [None]:
# # indexes = len(silho_list) % (len(alpha_range)*len(beta_range)*len(n_iter_range))
# score_range = np.arange(len(silho_list))
# print(score_range)

# indexes = score_range % (len(alpha_range)*len(beta_range)*len(n_iter_range))
# print(indexes)

# print(score_range[indexes == 0])
# print(silho_list)
# print(np.asarray(silho_list)[np.where(np.asarray(indexes) == 0)])

In [None]:
# silho_df = pd.DataFrame(silho_table)
# # Plot the Silhouette score
# for b in beta_range:
#     plt.figure(figsize=(16, 5))
#     for ids, a in enumerate(alpha_range):
#         plot_df = silho_df.loc[silho_df["beta"] == b]
#         plot_df = plot_df.loc[plot_df['alpha'] == a]
#         plt.subplot(1, len(alpha_range), ids+1)
#         for n_iters in n_iter_range:
#             plt.plot(
#                 plot_df["K"].loc[plot_df['n_iters'] == n_iters],
#                 plot_df["Silhouette"].loc[plot_df['n_iters'] == n_iters],
#                 label=f"n_iters = {n_iters}",
#             )
#         plt.title(f"Alpha = {a} \n Beta = {b}")
#         plt.legend(loc="best")
#         plt.xlabel("Number of maximum usable clusters")
#         plt.xticks(k_range)
#         plt.ylabel("Silhouette score")
#         plt.grid()
#     plt.show()

In [None]:
pca_datapoint = np.load(OUTPUT_DIR + "pca_datapoints.npy")
tsne_datapoint = np.load(OUTPUT_DIR + "tsne_datapoints.npy")

In [None]:
plot_title = f"GSDMM visualization of job titles (number of components = {best_n})"
palette = sns.color_palette("hls", as_cmap=True)
colors = np.asarray(best_labels) / best_n

# Visualize clusters with PCA and t-SNE
visualize_cluster(
    plot_title,
    (12, 12),
    colors,
    palette,
    pca_datapoint,
    tsne_datapoint,
    None,
    pca=True,
    tsne=True,
    mds=False
)

## Save cluster results to file

In [None]:
np.save(OUTPUT_DIR + "job-title_gsdmm-clusters.csv", best_labels)