# Experiment: _Combined text-/graph features vs. text-only features_

In [None]:
from notebook_prelude import *

In [None]:
NA_VAL = '-'
EXPERIMENT_NAME = 'experiment_combined'
experiment_data = experiment_helper.get_experiment_config_for(EXPERIMENT_NAME)
param_grid = experiment_data['params_per_type']
df = results_helper.get_results(filter_out_experiment=EXPERIMENT_NAME, filter_out_non_complete_datasets=False)

df_ = results_helper.get_results(filter_out_experiment=EXPERIMENT_NAME + '_with_splitted', filter_out_non_complete_datasets=False)
df = df.append(df_)

df__ = results_helper.get_results(filter_out_experiment=EXPERIMENT_NAME + '_same_label', filter_out_non_complete_datasets=False)
df = df.append(df__)

df = df.fillna(NA_VAL)
pipeline_helper.remove_complex_types(pipeline_helper.flatten_nested_params(param_grid))

In [None]:
VECTORIZER_TFIDF = 'TfidfVectorizer'
VECTORIZER_COUNT = 'CountVectorizer'

## Result table

In [None]:
df.columns

In [None]:
def only_types(df, types):
    mask = np.zeros(len(df), dtype=bool)
    for t in types: mask |= (df['type'] == t)
    return mask

df[only_types(df, [TYPE_CONCEPT_MAP, TYPE_COOCCURRENCE, 'text']) & (df.text__vectorizer != VECTORIZER_TFIDF)].groupby(['dataset', 'features__fast_wl_pipeline__feature_extraction__graph_preprocessing', 'combined', 'type']).mean_test_f1_macro.max().to_frame().unstack().unstack()

## Significance test for _ng20_

In [None]:
[x for x in results_helper.get_predictions_files() if EXPERIMENT_NAME in x]

In [None]:
from utils import significance_test_utils

NUM_TRAILS = 5000
metric = significance_test_utils.f1


combinations = [
    # TfidfVectorizer
    (
        # Combined
        'result___experiment_combined_with_splitted_words__ng20__graph_combined__dataset_graph_concept_map_ng20-single-v2.npy',
        #'result___experiment__graph_combined__ng20__graph_combined__dataset_graph_concept_map_ng20-single-v2.npy',
        
        # Text-only
        'result___experiment_graph_combined__ng20__text.npy'
    )
]

filenames = []
for a, b in combinations:
    filenames.append(a)
    filenames.append(b)

data = collections.defaultdict(lambda: [])

predictions = {k.split('/')[-1]: v['results']['results'] for k, v in results_helper.get_predictions(filenames=filenames)}
for filenames in combinations:
    assert np.all([x in predictions for x in filenames])
    models = [predictions[x] for x in filenames]
    keys = ['Y_real', 'Y_pred', 'X_test']
    assert np.all([len(models[0][key]) == len(models[1][key]) for key in keys])
    y_true = models[0]['Y_real']
    y_preds = [model['Y_pred'] for model in models]
    y_pred_a, y_pred_b = y_preds
    
    metric_real = [metric(y_true, y_pred) for y_pred in y_preds]
    diff_global = metric_real[0] - metric_real[1]
    
    metrics = significance_test_utils.randomization_test(y_true, y_pred_a, y_pred_b, metric=significance_test_utils.f1, num_trails=NUM_TRAILS)
    diffs = metrics[:, 0] - metrics[:, 1]
    confidence = significance_test_utils.get_confidence(diff_global, diffs, num_trails=NUM_TRAILS)

    data['filename_a'].append(filenames[0])
    data['filename_b'].append(filenames[1])
    data['confidence'].append(confidence)
    data['diffs'].append(diffs)
    data['diff_global'].append(diff_global)
    data['metric_a'].append(metric_real[0])
    data['metric_b'].append(metric_real[1])
    data['num_trails'].append(NUM_TRAILS)

In [None]:
df_ = pd.DataFrame(data).set_index(['filename_a', 'filename_b'])
df_[[x for x in df_.columns if x != 'diffs']]

In [None]:
for (f_a, f_b), df__ in df_.iterrows():
        diffs = df__.diffs
    fig, ax = plt.subplots(figsize=(10, 3.4))
    significance_test_utils.plot_randomization_test_distribution_(diffs, df__.diff_global, num_trails=df__.num_trails, p=df__.confidence, metric_name='f1 macro', ax=ax)

In [None]:
[filename_utils.get_dataset_from_filename(x) for x in dataset_helper.get_all_cached_graph_datasets(graph_type=TYPE_CONCEPT_MAP)]