In [1]:
from datetime import datetime
from functools import partial

from _correlation import cluster_correlation_search
import clustering
from common_generate_predictions import load_data, grid_search_without_nclusters, get_gold_data
import networkx as nx
import numpy as np

In [2]:
def get_clusters(adj_matrix, hyperparameters, seed=0):
    use_disconnected_edges = hyperparameters.pop("use_disconnected_edges", False)
    G = clustering._adjacency_matrix_to_nxgraph(
        adj_matrix,
        use_disconnected_edges=use_disconnected_edges,
    )
    hyperparameters.pop("use_disconnected_edges", None)

    clusters = clustering.correlation_clustering(G, **hyperparameters)
    return clustering._convert_graph_cluster_list_set_to_list(G, clusters)

In [3]:
method = "cc"
path_to_data = "../mixtral-8xtb-v0.1/v1/"
is_cross_validation=False
path_to_experiments = {
    True: "../cv-experiments/" + f"{method}/" + "{kfold}_fold",
    False: "../no-cv-experiments/" + f"{method}/" + "{kfold}"
}

In [4]:
def generate_hyperparameter_combinations_for_cc():
    combinations = []

    for number_sense in [10]:
        for attempt in [2000]:
            for iteration in [50000]:
                combinations.append(
                    {
                        "max_attempts": attempt,
                        "max_iters": iteration,
                        "max_senses": number_sense,
                    }
                )

    return combinations

In [5]:
start_time = datetime.now()


grid_search_without_nclusters(
    partial(load_data, path_to_data),
    get_clusters,
    generate_hyperparameter_combinations_for_cc(),
    fill_diagonal=True,
    normalize=True,
    method=method,
)

print(f"Elapsed time: {datetime.now() - start_time}")

2024-06-19 16:25:44,347 - INFO - loading data ...
2024-06-19 16:25:44,563 - INFO - data loaded ...
2024-06-19 16:25:44,563 - INFO - training cc method ...
2024-06-19 16:25:44,563 - INFO - 1/4 - {'fill_diagonal': True, 'normalize': False, 'model_hyperparameters': {'max_attempts': 2000, 'max_iters': 50000, 'max_senses': 10}}
2024-06-19 16:25:44,564 - INFO - get predictions without nclusters ...
2024-06-19 16:25:44,569 - INFO - building adjacency matrix ...
2024-06-19 16:25:44,590 - INFO - adjacency matrix built ...
2024-06-19 16:25:44,591 - INFO - calculating predictions ...
2024-06-19 16:25:44,602 - INFO - predictions calculated ...
2024-06-19 16:25:44,605 - INFO - building adjacency matrix ...
2024-06-19 16:25:44,626 - INFO - adjacency matrix built ...
2024-06-19 16:25:44,626 - INFO - calculating predictions ...
2024-06-19 16:25:44,635 - INFO - predictions calculated ...
2024-06-19 16:25:44,637 - INFO - building adjacency matrix ...
2024-06-19 16:25:44,659 - INFO - adjacency matrix bui

Elapsed time: 0:00:07.951579
