In [1]:
from datetime import datetime
from functools import partial
from itertools import product

from _correlation import cluster_correlation_search
import clustering
from common_generate_predictions import load_data, grid_search_without_nclusters, get_gold_data
import networkx as nx
import numpy as np

In [2]:
wic_data = True
method = "cc"
llm = "wic"
dataset = "dwug_es"
path_to_data = f"../input/wic-scores/dwug_es_cleaned"
path_to_gold_data = "../test_data_es.csv"
score_paths = ["wic1", "wic2", "wic3", "wic4", "wic5", "wic6", "wic7"]

In [3]:
def get_clusters(adj_matrix, hyperparameters, seed=0):
    use_disconnected_edges = hyperparameters.pop("use_disconnected_edges", False)
    G = clustering._adjacency_matrix_to_nxgraph(
        adj_matrix,
        use_disconnected_edges=use_disconnected_edges,
    )
    hyperparameters.pop("use_disconnected_edges", None)

    clusters = clustering.correlation_clustering(G, **hyperparameters)
    return clustering._convert_graph_cluster_list_set_to_list(G, clusters)

In [4]:
def generate_hyperparameter_combinations_for_cc():
    combinations = []

    for number_sense in [10]:
        for attempt in [2000]:
            for iteration in [50000]:
                combinations.append(
                    {
                        "max_attempts": attempt,
                        "max_iters": iteration,
                        "max_senses": number_sense,
                    }
                )

    return combinations

In [11]:
quantile = range(0, 7)
fill_diagonal = [True, False]
normalize = [True, False]
hc = product(
    quantile,
    fill_diagonal,
    normalize,
    score_paths,
    generate_hyperparameter_combinations_for_cc(),
)

hyperparameter_combinations = [
    {
        "quantile": q,
        "fill_diagonal": fd,
        "normalize": nm,
        "prompt": sp,
        "model_hyperparameters": mhc,
    }
    for q, fd, nm, sp, mhc in hc
]

In [12]:
len(hyperparameter_combinations)

84

In [13]:
hyperparameter_combinations

[{'quantile': 0,
  'fill_diagonal': True,
  'normalize': True,
  'prompt': 'zs',
  'model_hyperparameters': {'max_attempts': 2000,
   'max_iters': 50000,
   'max_senses': 10}},
 {'quantile': 0,
  'fill_diagonal': True,
  'normalize': True,
  'prompt': 'fs',
  'model_hyperparameters': {'max_attempts': 2000,
   'max_iters': 50000,
   'max_senses': 10}},
 {'quantile': 0,
  'fill_diagonal': True,
  'normalize': True,
  'prompt': 'ct',
  'model_hyperparameters': {'max_attempts': 2000,
   'max_iters': 50000,
   'max_senses': 10}},
 {'quantile': 0,
  'fill_diagonal': True,
  'normalize': False,
  'prompt': 'zs',
  'model_hyperparameters': {'max_attempts': 2000,
   'max_iters': 50000,
   'max_senses': 10}},
 {'quantile': 0,
  'fill_diagonal': True,
  'normalize': False,
  'prompt': 'fs',
  'model_hyperparameters': {'max_attempts': 2000,
   'max_iters': 50000,
   'max_senses': 10}},
 {'quantile': 0,
  'fill_diagonal': True,
  'normalize': False,
  'prompt': 'ct',
  'model_hyperparameters': {'ma

In [None]:
metadata = {
    "fill_diagonal": True,
    "normalize": True,
    "method": method,
    "path_to_gold_data": path_to_gold_data,
    "path_to_data": path_to_data,
    "path_to_sense_data": "../dwug_es_sense.csv",
    "llm": llm,
    "score_paths": score_paths,
    "dataset": dataset,
    "wic_data": wic_data,
    "path_to_save_results": f"../cv-experiments-lscd-ari-dwug-cleaned/{method}/{llm}/{dataset}",
}

In [None]:
start_time = datetime.now()


grid_search_without_nclusters(
    partial(load_data, path_to_data),
    get_clusters,
    hyperparameter_combinations,
    metadata=metadata,
)

print(f"Elapsed time: {datetime.now() - start_time}")