In [None]:
import ast
from IPython.display import display, HTML
import itertools
import math
import matplotlib.pyplot as plt
import numexpr as ne
import numpy as np
import pandas as pd
from scipy import spatial
import random as rdm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, ConfusionMatrixDisplay
from tabulate import tabulate
import time
from tqdm.notebook import tqdm

In [21]:
cd ../

/home/guillaume/recimpute


In [17]:
%load_ext autoreload
%autoreload 2
from recimpute import init_training_set, FEATURES_EXTRACTORS
from Datasets.Dataset import Dataset
from Clustering.ShapeBasedClustering import ShapeBasedClustering
from Labeling.ImputationTechniques.ImputeBenchLabeler import ImputeBenchLabeler

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Compute profile vectors

In [4]:
MODE = 'eval' # 'use' or 'eval'

### Profile vectors for already labeled time series / clusters

In [5]:
ERROR = 'average rank'

In [None]:
CLUSTERER = ShapeBasedClustering()
DATASETS = Dataset.instantiate_from_dir(CLUSTERER)
LABELER = ImputeBenchLabeler.get_instance()
LABELER_PROPERTIES = LABELER.get_default_properties()
FEATURES_EXTRACTOR_init = [fe.get_instance() for fe in FEATURES_EXTRACTORS.values()]
training_set = init_training_set(LABELER, LABELER_PROPERTIES, None, None, FEATURES_EXTRACTOR_init)

In [8]:
all_data_info, labels_set = training_set._load(data_to_load= 'all' if MODE == 'use' else 'train')

In [9]:
all_cids = all_data_info['Cluster ID'].unique()
existing_vectors = pd.DataFrame(index=sorted(all_cids), columns=['Features Vector', 'Benchmark Results'])
existing_vectors.index.name = 'Cluster ID'

In [10]:
def get_ds_name_from_cid(cid, datasets):
    for ds in datasets:
        if cid in ds.cids:
            return ds.name

In [11]:
def get_cluster_bench_res(cid, labeler, properties, datasets):
    ds_name = get_ds_name_from_cid(cid, datasets)
    # load clusters labels
    labels_filename = labeler._get_labels_filename(ds_name)
    all_benchmark_results = pd.read_csv(labels_filename, index_col='Cluster ID')
    
    # identify algorithms to exclude from labels list if some reduction threshold has been specified
    algos_to_exclude = labeler._get_algos_to_exclude(all_benchmark_results, properties) \
                        if properties['reduction_threshold'] > 0.0 else []

    row = all_benchmark_results.loc[cid]
    # convert bench_res to DataFrame
    benchmark_results = labeler._convert_bench_res_to_df(row.values[0])
    # get a ranked list of algorithms for this cluster (from best to worse)
    ranking_strat = ImputeBenchLabeler.CONF['BENCH_RES_AGG_AND_RANK_STRATEGY']
    ranked_algos_for_cid = labeler._get_ranking_from_bench_res(
        benchmark_results,
        ranking_strat=ranking_strat,
        ranking_strat_params=ImputeBenchLabeler.CONF['BENCH_RES_AGG_AND_RANK_STRATEGY_PARAMS'][ranking_strat],
        error_to_minimize=ImputeBenchLabeler.CONF['BENCHMARK_ERROR_TO_MINIMIZE'],
        algos_to_exclude=algos_to_exclude,
        return_scores=True
    )

    return ranked_algos_for_cid

In [22]:
# load all existing profile vectors and benchmark results
for cid in tqdm(all_cids):
    # load cluster's features vector
    existing_vectors.at[cid, 'Features Vector'] = all_data_info.loc[all_data_info['Cluster ID'] == cid, 
                                                                    ~all_data_info.columns.isin(('Data Set Name', 
                                                                                                 'Label', 
                                                                                                 'Cluster ID'))].mean()
    # load cluster's bench res
    ranked_algos = get_cluster_bench_res(cid, LABELER, LABELER_PROPERTIES, DATASETS)
    existing_vectors.at[cid, 'Benchmark Results'] = ranked_algos
existing_vectors.head(3)

  0%|          | 0/6112 [00:00<?, ?it/s]

Unnamed: 0_level_0,Features Vector,Benchmark Results
Cluster ID,Unnamed: 1_level_1,Unnamed: 2_level_1
0,Values__variance_larger_than_standard_deviatio...,weighted average rmse average rms...
1,Values__variance_larger_than_standard_deviatio...,weighted average rmse average rms...
2,Values__variance_larger_than_standard_deviatio...,weighted average rmse average rms...


### Profile vectors for unlabeled time series / clusters

In [23]:
if MODE == 'eval':
    test_data = []
    features_name = existing_vectors.iloc[0]['Features Vector'].index
    test_all_data_info, _ = training_set._load(data_to_load='test')
    # for each test entry: load its profile vector and the benchmark results of its cluster
    for _, row in tqdm(test_all_data_info.iterrows(), total=test_all_data_info.shape[0]):
        # load profile vec
        profile_vector = pd.DataFrame(row).T
        profile_vector = profile_vector[profile_vector.columns.intersection(features_name)]
        profile_vector = profile_vector[features_name]
        # load cluster's features vector
        cid = row['Cluster ID']
        ranked_algos = get_cluster_bench_res(cid, LABELER, LABELER_PROPERTIES, DATASETS)
        
        test_data.append((profile_vector, ranked_algos, row['Data Set Name']))
elif MODE == 'use':
    new_sequence = None # put raw sequence to label here
    new_profile_vector = FEATURES_EXTRACTER.extract_from_timeseries(pd.DataFrame(new_sequence), 1, len(new_sequence))
    features_name = existing_vectors.loc[0, 'Features Vector'].index
    new_profile_vector = new_profile_vector[new_profile_vector.columns.intersection(features_name)]
    new_profile_vector = new_profile_vector[features_name]

0it [00:00, ?it/s]

## Similarity search

In [24]:
def custom_cosine_distance(a, b):
    _custom_cosine_similarity = lambda a,b: sum(map(lambda i: i[0]*i[1], zip(a,b))) / (np.linalg.norm(a) * np.linalg.norm(b))
    sim = _custom_cosine_similarity(a,b)
    return 1.-0. if np.isnan(sim) else 1.-sim

In [25]:
def get_recommendations(existing_vectors, new_profile_vector, all_cids, dist_metric, 
                        get_recommendations_strat='all', norm_error=True):
    # initialization
    t0 = time.time()
    all_techniques = ['cdrec_k2','cdrec_k3','dynammo','grouse','rosl','softimp','svdimp','svt','stmvl','spirit','tenmf','tkcm','trmf']
    my_index = pd.MultiIndex.from_tuples(
        list(itertools.product(sorted(all_cids), all_techniques)), 
        names=['Cluster ID', 'Technique']
    )
    all_scores = pd.DataFrame(index=my_index, columns=['Score', 'Distance', ERROR])
    t1 = time.time()
    
    # compute score for each existing profile vector
    if norm_error:
        g_max_error = existing_vectors['Benchmark Results'].map(lambda bench_res: bench_res[ERROR].max()).max()
        existing_vectors['G Normalized Benchmark Results'] = existing_vectors['Benchmark Results']\
                                                                .map(lambda bench_res: bench_res[ERROR] / g_max_error)

    for cid, row in existing_vectors.iterrows():
        dist = custom_cosine_distance(
            row['Features Vector'].to_numpy(), 
            new_profile_vector.iloc[0].to_numpy(),
        )
        for technique in all_techniques:
            try:
                rmse = row['G Normalized Benchmark Results'].loc[technique] if norm_error else \
                        row['Benchmark Results'][ERROR].loc[technique]
                score = dist * rmse
                all_scores.at[(cid, technique)] = (score, dist, rmse)
            except KeyError:
                all_scores.at[(cid, technique)] = (np.inf, np.inf, np.inf)
    if norm_error:
        del existing_vectors['G Normalized Benchmark Results']
    t2 = time.time()
    #display(all_scores[~all_scores.isin([np.nan, np.inf, -np.inf]).any(1)]['Distance']) # TODO tmp print
    
    # minimize the scores
    sorted_techniques = all_scores.copy(deep=True).sort_values('Score', ascending=True)
    #display(sorted_techniques[:6]) # TODO tmp print
    sorted_techniques.index = sorted_techniques.index.droplevel(0)
    sorted_techniques = sorted_techniques.groupby('Technique').head(1).sort_values('Score', ascending=True)
    t3 = time.time()
    
    #display(sorted_techniques[:6]) # TODO tmp print
    if get_recommendations_strat == 'all':
        return sorted_techniques
    elif get_recommendations_strat == 'top3':
        return sorted_techniques[:3]
    else:
        raise Exception('TODO')

## Use

In [26]:
if MODE == 'use':
    DIST_FUNC = 'cosine'
    recommendations = get_recommendations(existing_vectors, new_profile_vector, all_cids, DIST_FUNC)

## Evaluate

In [28]:
NB_CORRECT_REC = 1
NB_TEST = min(5000, len(test_data))

K = 3 # for recall and precision @ K

DIST_FUNC = 'cosine'

In [None]:
if MODE == 'eval':
    metrics = {'prec@%i' % K: [], 'rec@%i' % K: [], 'mrr': []}
    y_true, y_pred = [], []
    metrics_per_category = {category: {'prec@%i' % K: [], 'rec@%i' % K: [], 'mrr': []} 
                              for category in set(Dataset.CONF['CATEGORIES'].values())}
    y_true_per_category = {category: [] 
                             for category in set(Dataset.CONF['CATEGORIES'].values())}
    y_pred_per_category = {category: [] 
                             for category in set(Dataset.CONF['CATEGORIES'].values())}
    for new_profile_vector, ranked_algos, ds_name in tqdm(rdm.sample(test_data, NB_TEST), total=NB_TEST):
        recommendations = get_recommendations(existing_vectors, new_profile_vector, all_cids, DIST_FUNC).index.tolist()
        
        to_rmv = 'cdrec_k2' if recommendations.index('cdrec_k2') > recommendations.index('cdrec_k3') else 'cdrec_k3'
        del recommendations[recommendations.index(to_rmv)]
        recommendations = ['cdrec' if 'cdrec' in x else x for x in recommendations]

        #correct_labels = bench_res.sort_values(by=ERROR, ascending=True).iloc[:NB_CORRECT_REC].index.tolist()
        correct_labels = ranked_algos.iloc[:NB_CORRECT_REC].index.tolist()
        # rank at which each correct label is found
        correct_labels_rank = [recommendations.index(corr_lbl)+1 for corr_lbl in correct_labels]
        
        y_true.append(ranked_algos.index.tolist()[0])
        y_pred.append(recommendations[0])
        
        prec_at_K = sum(int(rank_i <= K) for rank_i in correct_labels_rank) / K
        rec_at_K = sum(int(rank_i <= K) for rank_i in correct_labels_rank) / len(correct_labels)
        mrr = 1 / correct_labels_rank[0]
        metrics['prec@%i' % K].append(prec_at_K)
        metrics['rec@%i' % K].append(rec_at_K)
        metrics['mrr'].append(mrr)
        
        category = Dataset.CONF['CATEGORIES'][ds_name]
        y_true_per_category[category].append(ranked_algos.index.tolist()[0])
        y_pred_per_category[category].append(recommendations[0])
        metrics_per_category[category]['prec@%i' % K].append(prec_at_K)
        metrics_per_category[category]['rec@%i' % K].append(rec_at_K)
        metrics_per_category[category]['mrr'].append(mrr)

  0%|          | 0/5000 [00:00<?, ?it/s]

### Print the results

In [None]:
if MODE == 'eval':
    print('\n\n============================================================\n', '\033[1m Average metrics: \033[0m')
    
    acc = accuracy_score(y_true, y_pred, normalize=True, sample_weight=None)
    prec = precision_score(y_true=y_true, y_pred=y_pred, average='weighted', zero_division=0).tolist()
    recall = recall_score(y_true=y_true, y_pred=y_pred, average='weighted', zero_division=0).tolist()
    f1 = f1_score(y_true=y_true, y_pred=y_pred, average='weighted', zero_division=0)
    
    avg_prec_at_k = sum(metrics['prec@%i' % K]) / len(metrics['prec@%i' % K])
    avg_rec_at_k = sum(metrics['rec@%i' % K]) / len(metrics['rec@%i' % K])
    mrr = (1 / NB_TEST) * sum(metrics['mrr'])
    print('Average precision@%i: %.2f | Average recall@%i: %.2f | Mean reciprocal rank: %.2f' % (K, avg_prec_at_k, K, avg_rec_at_k, mrr))
    print('Accuracy: %.2f | Precision: %.2f | Recall: %.2f | F1-Score: %.2f' % (acc, prec, recall, f1))
    
    ConfusionMatrixDisplay.from_predictions(y_true=y_true, y_pred=y_pred, xticks_rotation=45)
    plt.show()

In [None]:
if MODE == 'eval':
    print('\n\n============================================================\n', '\033[1m Average metrics per category: \033[0m')
    
    for category in metrics_per_category.keys():
        acc = accuracy_score(y_true_per_category[category], y_pred_per_category[category], normalize=True, sample_weight=None)
        prec = precision_score(y_true=y_true_per_category[category], y_pred=y_pred_per_category[category], average='weighted', zero_division=0).tolist()
        recall = recall_score(y_true=y_true_per_category[category], y_pred=y_pred_per_category[category], average='weighted', zero_division=0).tolist()
        f1 = f1_score(y_true=y_true_per_category[category], y_pred=y_pred_per_category[category], average='weighted', zero_division=0)

        avg_prec_at_k = sum(metrics_per_category[category]['prec@%i' % K]) / len(metrics_per_category[category]['prec@%i' % K])
        avg_rec_at_k = sum(metrics_per_category[category]['rec@%i' % K]) / len(metrics_per_category[category]['rec@%i' % K])
        mrr = (1 / len(metrics_per_category[category]['mrr'])) * sum(metrics_per_category[category]['mrr'])
        print('~~ Category: %s ~~' % category)
        print('Average precision@%i: %.2f | Average recall@%i: %.2f | Mean reciprocal rank: %.2f' % (K, avg_prec_at_k, K, avg_rec_at_k, mrr))
        print('Accuracy: %.2f | Precision: %.2f | Recall: %.2f | F1-Score: %.2f' % (acc, prec, recall, f1))