# ==== INTERACTIVE CLUSTERING : EFFICIENCE STUDY ====
> ### Stage 4 : Export experiments for other studies.

------------------------------
## READ-ME BEFORE RUNNING

### Quick Description

This notebook is **aimed at export experiments needed to run other studies**.
- Environments are represented by subdirectories in the `/experiments` folder. A full path to an experiment environment is `/experiments/[DATASET]/[PREPROCESSING]/[VECTORIZATION]/[SAMPLING]/[CLUSTERING]/[EXPERIMENT]`.
- Experiments have to be run and evaluated in order to analyze convergence speed.

Before running, **run the notebook `2_Run_until_convergence_and_evaluate_efficience.ipynb` to run and evaluate each experiment you have set**.

------------------------------
## 1. IMPORT PYTHON DEPENDENCIES

In [None]:
import json
import os
import listing_envs
from typing import Any, Dict, List

------------------------------
## 2. EXPORT

Find all experiment environments.

In [None]:
# Get list of experiment environments.
LIST_OF_EXPERIMENT_ENVIRONMENTS: List[
    str
] = listing_envs.get_list_of_experiment_env_paths()
print(
    "There are",
    "`" + str(len(LIST_OF_EXPERIMENT_ENVIRONMENTS)) + "`",
    "created experiment environments in `../experiments`",
)
LIST_OF_EXPERIMENT_ENVIRONMENTS

Check `.temp/exports` folder exists.

In [None]:
if not os.path.exists("../.temp/exports"):
    os.mkdir("../.temp/exports")

In [None]:
for env in LIST_OF_EXPERIMENT_ENVIRONMENTS:
    
    # Initialize export.
    export_filename: str = "../.temp/exports/" + "_-_".join(env.split("/")[2:-1]) + ".json"
    export_data: Dict[str, Any] = {}
    # Experiment name
    export_data["_ENV_PATH"] = env
    # Load texts.
    with open(env + "../../../../../dict_of_texts.json", "r") as texts_file_r:
        export_data["dict_of_texts"] = json.load(texts_file_r)
    # Load true intents.
    with open(env + "../../../../../dict_of_true_intents.json", "r") as true_intents_file_r:
        export_data["dict_of_true_intents"] = json.load(true_intents_file_r)
    # Load preprocessed texts.
    with open(env + "../../../../dict_of_preprocessed_texts.json", "r") as preprocessed_texts_file_r:
        export_data["dict_of_preprocessed_texts"] = json.load(preprocessed_texts_file_r)
    # Load constraints.
    with open(env + "dict_of_constraints_annotations.json", "r") as constraints_file_r:
        export_data["dict_of_constraints_annotations"] = json.load(constraints_file_r)
    # Load clustering.
    with open(env + "dict_of_clustering_results.json", "r") as clustering_file_r:
        export_data["dict_of_clustering_results"] = json.load(clustering_file_r)
    # Load dataset configurations.
    with open(env + "../../../../../config.json", "r") as dataset_config_file_r:
        dataset_config = json.load(dataset_config_file_r)
        export_data["dataset_config"] = {
            "file_name": dataset_config["file_name"],
            "sheet_name": dataset_config["sheet_name"],
            "language": dataset_config["language"],
        }
    # Load preprocessing configurations.
    with open(env + "../../../../config.json", "r") as preprocessing_config_file_r:
        preprocessing_config = json.load(preprocessing_config_file_r)
        export_data["preprocessing_config"] = {
            "apply_preprocessing": preprocessing_config["apply_preprocessing"],
            "apply_lemmatization": preprocessing_config["apply_lemmatization"],
            "apply_parsing_filter": preprocessing_config["apply_parsing_filter"],
            "spacy_language_model": preprocessing_config["spacy_language_model"],
        }
    # Load vectorization configurations.
    with open(env + "../../../config.json", "r") as vectorization_config_file_r:
        vectorization_config = json.load(vectorization_config_file_r)
        export_data["vectorization_config"] = {
            "vectorizer_type": vectorization_config["vectorizer_type"],
            "spacy_language_model": vectorization_config["spacy_language_model"],
        }
    # Load sampling configurations.
    with open(env + "../../config.json", "r") as sampling_config_file_r:
        sampling_config = json.load(sampling_config_file_r)
        export_data["sampling_config"] = {
            "algorithm": sampling_config["algorithm"],
            "nb_to_select": sampling_config["nb_to_select"],
        }
    # Load clustering configurations.
    with open(env + "../config.json", "r") as clustering_config_file_r:
        clustering_config = json.load(clustering_config_file_r)
        export_data["clustering_config"] = {
            "algorithm": clustering_config["algorithm"],
            "init**kargs": clustering_config["init**kargs"],
            "nb_clusters": clustering_config["nb_clusters"],
        }
    # Load experiment configurations.
    with open(env + "config.json", "r") as experiment_config_file_r:
        experiment_config = json.load(experiment_config_file_r)
        export_data["experiment_config"] = {
            "random_seed": experiment_config["random_seed"],
            "manager_type": experiment_config["manager_type"],
        }
        
    # Store export
    with open(export_filename, "w") as export_file_w:
        json.dump(export_data, export_file_w)