In [None]:
import os

path = "../.."
os.chdir(path)

print("File location using os.getcwd():", os.getcwd())

## Imports

In [None]:
from cognitivefactory.interactive_clustering.clustering.kmeans import KMeansConstrainedClustering
from src.cognitivefactory.interactive_clustering.clustering.dbscan import DBScanConstrainedClustering
from src.cognitivefactory.interactive_clustering.clustering.mpckmeans import MPCKMeansConstrainedClustering
from src.cognitivefactory.interactive_clustering.clustering.affinity_propagation import AffinityPropagationConstrainedClustering
from tests.comparative_tests.utils import load_dataset,estimate_mean_min_distance_between_same_cluster_points ,get_constraints_couples, run_performances_measure, run_all_performances_measures, save_results, load_results, plot_results, plot_all_results

# Run measures

In [None]:
path = "./tests/comparative_tests"
os.chdir(path)

print("File location using os.getcwd():", os.getcwd())

## Individual tests with each algorithm

##### For C-DBScan

In [None]:
size = 500

# Load dataset and extract data
dict_of_vectors, dict_of_real_clusters = load_dataset(
    dataset_path="./French_trainset_for_chatbots_dealing_with_usual_requests_on_bank_cards.csv",
    desired_size=size
    )

# Compute all the possible constraints
dict_of_constraints_couples = get_constraints_couples(dict_of_real_clusters)

# To estimate a suitable `eps` hyperparameter for C-DBScan model
suitable_eps = estimate_mean_min_distance_between_same_cluster_points(dict_of_vectors, dict_of_real_clusters)

# Create an instance of C-DBScan model
clustering_model = DBScanConstrainedClustering(eps=suitable_eps, min_samples=8)

# Run measure
dict_of_clustering_performances = run_performances_measure(clustering_model,
                                                           dict_of_vectors,
                                                           dict_of_real_clusters,
                                                           dict_of_constraints_couples,
                                                           specific_nb_of_clusters=True
                                                           )

# To save the results in a .json file
save_results(dict_of_clustering_performances, dst_path="./measures_results/c_dbscan_individual_size_" + str(size) + ".json") ### dst_path can be changed

##### For MPCKMeans

In [None]:
size = 60

# Load dataset and extract data
dict_of_vectors, dict_of_real_clusters = load_dataset(
    dataset_path="./French_trainset_for_chatbots_dealing_with_usual_requests_on_bank_cards.csv",
    desired_size=size
    )

# Compute all the possible constraints
dict_of_constraints_couples = get_constraints_couples(dict_of_real_clusters)

# Create an instance of MPCKMeans model
clustering_model = MPCKMeansConstrainedClustering()

# Run measure
dict_of_clustering_performances = run_performances_measure(clustering_model,
                                                           dict_of_vectors,
                                                           dict_of_real_clusters,
                                                           dict_of_constraints_couples,
                                                           )

# To save the results in a .json file
save_results(dict_of_clustering_performances, dst_path="./measures_results/mpckmeans_individual_size_" + str(size) + ".json") ### dst_path can be changed

##### For Affinity propagation

In [None]:
size = 500

# Load dataset and extract data
dict_of_vectors, dict_of_real_clusters = load_dataset(
    dataset_path="./French_trainset_for_chatbots_dealing_with_usual_requests_on_bank_cards.csv",
    desired_size=size
    )

# Compute all the possible constraints
dict_of_constraints_couples = get_constraints_couples(dict_of_real_clusters)

# Create an instance of Affinity propagation model
clustering_model = AffinityPropagationConstrainedClustering()

# Run measure
dict_of_clustering_performances = run_performances_measure(clustering_model,
                                                           dict_of_vectors,
                                                           dict_of_real_clusters,
                                                           dict_of_constraints_couples,
                                                           )

# To save the results in a .json file
save_results(dict_of_clustering_performances, dst_path="./measures_results/affinity_propagation_individual_size_" + str(size) + ".json") ### dst_path can be changed

##### For constrained K-means

In [None]:
size = 500

# Load dataset and extract data
dict_of_vectors, dict_of_real_clusters = load_dataset(
    dataset_path="./French_trainset_for_chatbots_dealing_with_usual_requests_on_bank_cards.csv",
    desired_size=size
    )
    
# Compute all the possible constraints
dict_of_constraints_couples = get_constraints_couples(dict_of_real_clusters)

# Create an instance of KMeans model
clustering_model = KMeansConstrainedClustering()

# Run measure
dict_of_clustering_performances = run_performances_measure(clustering_model,
                                                           dict_of_vectors,
                                                           dict_of_real_clusters,
                                                           dict_of_constraints_couples,
                                                           )

# To save the results in a .json file
save_results(dict_of_clustering_performances, dst_path="./measures_results/kmeans_individual_size_" + str(size) + ".json") ### dst_path can be changed

## Tests with several algorithms at the same time

### For all the algorithms at the same time over a 60-entries reduced dataset

In [None]:
size = 60

dict_of_vectors, dict_of_real_clusters = load_dataset(
    dataset_path="./French_trainset_for_chatbots_dealing_with_usual_requests_on_bank_cards.csv",
    desired_size=size
    )

dict_of_constraints_couples = get_constraints_couples(dict_of_real_clusters)

# Run measure
dict_of_clustering_performances = run_all_performances_measures(
    dict_of_vectors,
    dict_of_real_clusters,
    dict_of_constraints_couples,
    )

# To save the results in a .json file
save_results(dict_of_clustering_performances, dst_path="./measures_results/kmeans_c_dbscan_mpckmeans_affinity_size_" + str(size) + ".json") ### dst_path can be changed

#### For Kmeans and C-DBScan and Affinity propagation at the same time over full dataset

In [None]:
size = 500

dict_of_vectors, dict_of_real_clusters = load_dataset(
    dataset_path="./French_trainset_for_chatbots_dealing_with_usual_requests_on_bank_cards.csv",
    desired_size=size
    )

dict_of_constraints_couples = get_constraints_couples(dict_of_real_clusters)

# Run measure
dict_of_clustering_performances = run_all_performances_measures(
    dict_of_vectors,
    dict_of_real_clusters,
    dict_of_constraints_couples,
    mpckmeans= False,
    )

# To save the results in a .json file
save_results(dict_of_clustering_performances, dst_path="./measures_results/kmeans_c_dbscan_affinity_size_" + str(size) + ".json") ### dst_path can be changed

# Plot graphs of the results

## For all the algorithms at the same time over a 60-entries reduced dataset

In [None]:
dict_of_clustering_performances = load_results("./measures_results/kmeans_c_dbscan_mpckmeans_affinity_size_60.json")

plot_all_results(dict_of_clustering_performances, title="Results with a 60-entries reduced dataset", plot_nb_clusters=True)

plot_results(dict_of_clustering_performances["kmeans"], algo_name="COP K-means", constraints_increment=7)
plot_results(dict_of_clustering_performances["c_dbscan"], algo_name="C-DBScan", constraints_increment=7)
plot_results(dict_of_clustering_performances["mpckmeans"], algo_name="MPCK-means", constraints_increment=7)
plot_results(dict_of_clustering_performances["affinity_propagation"], algo_name="Affinity Propagation", constraints_increment=7)

## For K-means, C-DBScan and Affinity Propagation at the same time over full dataset

In [None]:
dict_of_clustering_performances = load_results("./measures_results/kmeans_c_dbscan_affinity_size_500.json")

plot_all_results(
    dict_of_clustering_performances,
    title="Results with full dataset",
    constraints_increment=499,
    plot_nb_clusters=True,
    mpckmeans=False,
)

plot_results(dict_of_clustering_performances["kmeans"], algo_name="COP K-means")
plot_results(dict_of_clustering_performances["c_dbscan"], algo_name="C-DBScan")
plot_results(dict_of_clustering_performances["affinity_propagation"], algo_name="Affinity Propagation")

## For individual tests

In [None]:
# For constrained K-means

kmeans_dict_of_clustering_performances = load_results(src_path="./measures_results/kmeans_individual_size_500.json")

plot_results(kmeans_dict_of_clustering_performances, algo_name="kmeans")

In [None]:
# For C-DBScan

c_dbscan_dict_of_clustering_performances = load_results(src_path="./measures_results/c_dbscan_individual_size_500.json")

plot_results(c_dbscan_dict_of_clustering_performances, algo_name="C-DBScan", plot_nb_clusters=True, print_time=True)

In [None]:
# For MPCK-means

mpckmeans_dict_of_clustering_performances = load_results(src_path="./measures_results/mpckmeans_individual_size_60.json")

plot_results(mpckmeans_dict_of_clustering_performances, algo_name="MPCKmeans")

In [None]:
# For Affinity Propagation

affinity_propagation_dict_of_clustering_performances = load_results(src_path="./measures_results/affinity_propagation_individual_size_500.json")

plot_results(affinity_propagation_dict_of_clustering_performances, algo_name="Affinity Propagation")