In [None]:
from cobra import COBRA_KMeans, XCOBRA_KMeans
# from xcobras_kmeans import XCOBRAS_kmeans
from cobras_ts.querier.labelquerier import LabelQuerier
from utils.utils import read_arff_dataset, one_run_ground_truth_accuracy
import matplotlib.pyplot as  plt
from RandomQuerier import RandomQuerier
from XQuerier import XQuerier
from DQuerier import DQuerier
from model_explainer import ClusteringExplainer
from tqdm import tqdm
from sklearn import metrics
import json

# -- others
import warnings
warnings.filterwarnings("ignore")


budget = 100
K = 35

n_runs = 20

output = {}

for i, n in enumerate(names_35):
    for run in range(n_runs):
        print(f"Dataset: {n}...")
        # Load data
        data = read_arff_dataset(PATH + n)
        X, y = data.drop(["class"], axis=1), data["class"].values
        feature_names = X.columns
        X = X.values
        saving_path = "./results/XCOBRA_/"+n+"_results_run_"+str(run)+".json"

        # strategies
        querier_strat = [
            #GT + RAND + RAW DATA
            (LabelQuerier(y), _ , "Ground Truth"), # Ground truth querier
            (RandomQuerier(), _  ,"Random"),# Random P(Must Link) = P(Cannot Link) = 0.5
            (DQuerier(strat="cosine_similarity", threshold=0.95), _ , "Raw data"),

            # SHAP
            (XQuerier(y, xai_method="shap", strat="cosine_similarity", threshold=0.95), ClusteringExplainer(model="rbf_svm", xai_model="shap", verbose=False) ,"shap SVM CS"),
            (XQuerier(y, xai_method="shap", strat="cosine_similarity", threshold=0.95), ClusteringExplainer(model="decision_tree", xai_model="shap", verbose=False) ,"shap Decision tree CS"),
            (XQuerier(y, xai_method="shap", strat="cosine_similarity", threshold=0.95), ClusteringExplainer(model="knn-dist", xai_model="shap", verbose=False) ,"shap KNN distance CS"),
            (XQuerier(y, xai_method="shap", strat="cosine_similarity", threshold=0.95), ClusteringExplainer(model="knn-uniform", xai_model="shap",verbose=False) ,"shap KNN uniform CS"),

            # LIME
            (XQuerier(y, xai_method="lime", strat="cosine_similarity", threshold=0.95), ClusteringExplainer(model="rbf_svm", xai_model="lime", verbose=False) ,"lime SVM CS"),
            (XQuerier(y, xai_method="lime", strat="cosine_similarity", threshold=0.95), ClusteringExplainer(model="decision_tree", xai_model="lime", verbose=False) ,"lime Decision tree CS"),
            (XQuerier(y, xai_method="lime", strat="cosine_similarity", threshold=0.95), ClusteringExplainer(model="knn-dist", xai_model="lime", verbose=False) ,"lime KNN distance CS"),
            (XQuerier(y, xai_method="lime", strat="cosine_similarity", threshold=0.95), ClusteringExplainer(model="knn-uniform", xai_model="lime", verbose=False) ,"lime KNN uniform CS")
        ]

        for querier, explain_it_model, querier_strat in tqdm(querier_strat):
            cobra=None

            if querier_strat == "Ground Truth" or querier_strat == "Random":
                cobra = COBRA_KMeans(data=X, y=y, store_intermediate_results=True, querier=querier, max_questions=budget, init_k=K, verbose=False)
                cobra.fit()

            elif querier_strat == "Raw data":
                cobra = XCOBRA_KMeans(data=X, y=y, store_intermediate_results=True, querier=querier, max_questions=budget, init_k=K, verbose=False, use_explanation=False)
                cobra.fit()

            else:
                cobra = XCOBRA_KMeans(data=X, y=y, store_intermediate_results=True, querier=querier, max_questions=budget, init_k=K, verbose=False, explain_it=explain_it_model)
                cobra.fit()
                # total_queries = len(querier.ml) + len(querier.cl)
                # print(f"{n},{querier_strat},{total_queries},{round(len(querier.ml)*1./total_queries,2)},{round(len(querier.cl)*1./100,2)},{round(one_run_ground_truth_accuracy(y, querier.ml,  querier.cl),2)},{round(cobra.aris[-1],2)}")

            aris = cobra.aris
            
            output[querier_strat] = {}
            output[querier_strat]['Init K'] = K
            output[querier_strat]['Budget'] = budget
            output[querier_strat]["ml"], output[querier_strat]["cl"] = cobra.ml, cobra.cl
            output[querier_strat]["aris"] = cobra.aris 
            output[querier_strat]["num_clusters"] = cobra.y_tmp
            output[querier_strat]["total queries"] = len(cobra.ml) + len(cobra.cl)

        with open(saving_path, "w") as write_file:
            json.dump(output, write_file, indent=4)