In [1]:
from datetime import datetime
import functools as f

from numpy.typing import NDArray
import numpy as np

from common_generate_predictions import load_data, grid_search_without_nclusters
import clustering

In [2]:
wic_data = False
method = "wsbm"
llm = "random"
dataset = "dwug_es"
path_to_data = f"../input/llama3.1-8B/{dataset}"
path_to_gold_data = "../test_data_es.csv"
prompts = ["zs"]

In [3]:
def get_clusters(adj_matrix: NDArray[np.float64 | np.int32], hyperparameters: dict):
    graph = clustering._adjacency_matrix_to_nxgraph(
        adj_matrix, use_disconnected_edges=False
    )
    clusters = clustering.wsbm_clustering(graph, **hyperparameters)

    return clustering._convert_graph_cluster_list_set_to_list(graph, clusters)

In [4]:
model_hyperparameter_combinations = []
distributions = [
    "discrete-geometric",
    "discrete-poisson",
    "discrete-binomial",
    "real-normal",
    "real-exponential",
]

for distribution in distributions:
    model_hyperparameter_combinations.append({"distribution": distribution})

In [5]:
metadata = {
    "fill_diagonal": True,
    "normalize": True,
    "method": method,
    "path_to_gold_data": path_to_gold_data,
    "path_to_data": path_to_data,
    "llm": llm,
    "prompts": prompts,
    "dataset": dataset,
    "wic_data": wic_data,
}

In [6]:
start_time = datetime.now()

grid_search_without_nclusters(
    f.partial(load_data, path_to_data),
    get_clusters,
    model_hyperparameter_combinations,
    metadata=metadata,
)

print(f"Elapsed time: {datetime.now() - start_time}")

2024-08-29 21:45:37,242 - INFO - loading data from ../input/llama3.1-8B/dwug_es/zs ...
2024-08-29 21:45:37,461 - INFO - generating random numbers ...
2024-08-29 21:45:37,482 - INFO - data loaded ...
2024-08-29 21:45:37,483 - INFO - prompt: zs
2024-08-29 21:45:37,489 - INFO - training wsbm method ...
2024-08-29 21:45:37,489 - INFO - prompt: zs
2024-08-29 21:45:37,491 - INFO -   1/100 - {'quantile': 0, 'fill_diagonal': True, 'normalize': False, 'model_hyperparameters': {'distribution': 'discrete-geometric'}}
2024-08-29 21:45:37,491 - INFO - get predictions without nclusters ...
2024-08-29 21:45:37,493 - INFO - processing word: metal
2024-08-29 21:45:37,495 - INFO - building adjacency matrix ...
2024-08-29 21:45:37,518 - INFO - adjacency matrix built ...
2024-08-29 21:45:37,518 - INFO - calculating predictions ...
2024-08-29 21:45:37,698 - INFO - predictions calculated ...
2024-08-29 21:45:37,698 - INFO - processing word: banco
2024-08-29 21:45:37,701 - INFO - building adjacency matrix ..

KeyboardInterrupt: 