In [60]:
# !pip install --upgrade pip
# !pip install xgboost 

In [30]:
import os 
import random
import pandas as pd
import numpy as np
from copy import deepcopy
import pickle
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

In [2]:
def set_random_seed(seed):
    np.random.seed(seed)
    random.seed(seed)

In [17]:
seed = 1
set_random_seed(seed)

base_dir = './dataset/'

# load the positive and negative QA pairs
all_pos_df = pd.read_csv(os.path.join(base_dir, 'positive_general_labels.csv'))
all_neg_df = pd.read_csv(os.path.join(base_dir, 'negative_labels.csv'))

# shuffle each set individually
# p = np.random.permutation(all_pos_df.index)
# all_pos_df = all_pos_df.reindex(p)
# all_neg_df = all_neg_df.reindex(p)

print('All positive samples:', all_pos_df.shape)
print('All negative samples:', all_neg_df.shape)

# check a sample data point for positive and negative labels
print(all_pos_df.iloc[10])
print(all_neg_df.iloc[10])


# put the first "test_size" QA pos and neg labels in the test dataset
test_size = 500
test_df = all_pos_df[:test_size]
test_df = test_df.append(all_neg_df[:test_size])

# put the second "test_size" QA pos and neg labels in the validation dataset
valid_df = all_pos_df[test_size:2*test_size]
valid_df = valid_df.append(all_neg_df[test_size:2*test_size])

# put all the other pairs in the training dataset
train_df = all_pos_df[2*test_size:]
train_df = train_df.append(all_neg_df[2*test_size:])

print('Training dataset:', train_df.shape)
print('Validation dataset:', valid_df.shape)
print('Test dataset:', test_df.shape)

All positive samples: (4410, 5)
All negative samples: (4137, 5)
question_id                             56bc751eac7ad10019000013
question          Name synonym of Acrokeratosis paraneoplastica.
answer         Acrokeratosis paraneoplastic (Bazex syndrome) ...
label                                                       True
source                                     BioASQ_TR_IdealAnswer
Name: 10, dtype: object
question_id                             56bc751eac7ad10019000013
question          Name synonym of Acrokeratosis paraneoplastica.
answer         he conditioned infections due to opportunistic...
label                                                      False
source                                                   CORD-19
Name: 10, dtype: object
Training dataset: (6547, 5)
Validation dataset: (1000, 5)
Test dataset: (1000, 5)


In [18]:
# verify that the questions in different sets do NOT overlap
test_qs = set(test_df['question'].values)
valid_qs = set(valid_df['question'].values)
train_qs = set(train_df['question'].values)
print('test', len(test_qs), ' - train', len(train_qs), ' - intersection:', test_qs.intersection(train_qs))
print('test', len(test_qs), ' - valid', len(valid_qs), ' - intersection:', test_qs.intersection(valid_qs))
print('valid', len(valid_qs), ' - train', len(train_qs), ' - intersection:', valid_qs.intersection(train_qs))

test 500  - train 3410  - intersection: set()
test 500  - valid 500  - intersection: set()
valid 500  - train 3410  - intersection: set()


In [19]:
def clustering_active_learner(data, b_size, probs):
        
    X = np.zeros((data.shape[0], len(sentences_embeds[0])))
    for i in range(data.shape[0]):
        s_id = sentence_ids[data.iloc[i]['question'] + ' ||| ' + data.iloc[i]['answer']]
        X[i, :] = sentences_embeds[s_id]
        
    kernel = KMeans(n_clusters=b_size, verbose=0, n_jobs=-1, random_state=1) 
    kernel = kernel.fit(X)     
    labels = kernel.predict(X)

    cluster_min = np.ones((b_size, 2))
    sampled = pd.DataFrame(columns=data.columns)
    for i in range(data.shape[0]):
        if np.max(probs[i, :]) < cluster_min[labels[i], 0]:
            cluster_min[labels[i], 0] = np.max(probs[i, :])
            cluster_min[labels[i], 1] = i

    for i in cluster_min[:, 1]:
        sampled = sampled.append(data.iloc[int(i)])

    remaining = data.drop(sampled.index, axis=0)

    return sampled, remaining
    

In [20]:
def uncertainty_active_learner(data, b_size, probs):
    
    # step 1: find the higher probability
    # step 2: sort these probabilities
    # step 3: choose the ones that have the lowest probability
    sorted_arg = np.argsort(np.amax(probs, axis=1))
    
    sampled = data.drop(list(sorted_arg[b_size:]))
    remaining = data.drop(list(sorted_arg[:b_size]))

    return sampled, remaining

In [21]:
def random_active_learner(data, b_size=10, seed=0):
    sampled = data.sample(n=b_size, random_state=seed)
    remaining = data.drop(sampled.index)

    return sampled, remaining
    

In [22]:
sentence_ids = pickle.load(open('pos_general_neg_bert_sentence_ids.pkl', 'rb'))
sentences_embeds = pickle.load(open('pos_general_neg_bert_sentence_embeddings.pkl', 'rb'))

def to_features(data):
    X = np.zeros((data.shape[0], 768))
    Y = np.zeros((data.shape[0], 1), dtype=bool)
    
    for i in range(data.shape[0]):
        s_id = sentence_ids[data.iloc[i]['question'] + ' ||| ' + data.iloc[i]['answer']]
        X[i, :] = sentences_embeds[s_id]
        Y[i, :] = data.iloc[i]['label']
        
    return X, Y

In [23]:
def eval_results(model, X_train, Y_train, X_valid, Y_valid, X_test, Y_test, eval_metrics):
    Y_train_pred = model.predict(X_train)
    train_predictions = [round(value) for value in Y_train_pred]
    train_accuracy = accuracy_score(Y_train, train_predictions)
    train_f1 = f1_score(Y_train, train_predictions)
    print('Train accuracy:', train_accuracy, ' - Train f1:', train_f1)

    Y_valid_pred = model.predict(X_valid)
    valid_predictions = [round(value) for value in Y_valid_pred]
    valid_accuracy = accuracy_score(Y_valid, valid_predictions)
    valid_f1 = f1_score(Y_valid, valid_predictions)
    print('Valid accuracy:', valid_accuracy, ' - Valid f1:', valid_f1)

    Y_test_pred = model.predict(X_test)
    test_predictions = [round(value) for value in Y_test_pred]
    test_accuracy = accuracy_score(Y_test, test_predictions)
    test_f1 = f1_score(Y_test, test_predictions)
    print('Test accuracy:', test_accuracy, ' - Test f1:', test_f1)

    eval_metrics.append([train_accuracy, train_f1, valid_accuracy, valid_f1, test_accuracy, test_f1])
    return test_accuracy, test_f1

In [24]:
xgb_params = {
    "n_jobs": -1, 
    "max_depth": 25, 
    "min_child_weight": 0.1,
    "gamma": 1,
    "eta": 0.001,
    "num_round": 10000,
    "subsample": 0.5,
    "colsample_bytree": 0.5,
    "verbosity": 0, 
    "tree_method": 'hist', 
    "objective": 'binary:logistic'    
}

xgb_model = XGBClassifier()
xgb_model.set_params(**xgb_params)
X_train, Y_train = to_features(train_df)
X_valid, Y_valid = to_features(valid_df)
X_test, Y_test = to_features(test_df)

print('Started fitting...')
xgb_model.fit(X_train, Y_train,
              eval_set=[(X_train, Y_train), (X_valid, Y_valid)],
              eval_metric='error',
              verbose=False)
print('Finished fitting.')
eval_metrics = []
print('Started predicting...')
acc, f1 = eval_results(xgb_model, X_train, Y_train, X_valid, Y_valid, X_test, Y_test, eval_metrics)
print('Finished predicting.')
print(acc, f1)

Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 0.9883916297540858  - Train f1: 0.9887773183697578
Valid accuracy: 0.885  - Valid f1: 0.8841893252769387
Test accuracy: 0.904  - Test f1: 0.904
Finished predicting.
0.904 0.904


In [25]:
pickle.dump(eval_metrics, open('pos_general_neg_bert_eval_metrics-AllTraining.pkl', 'wb'))

In [26]:
seed = 0
set_random_seed(seed)

batch_size = 20

pool_df = deepcopy(train_df)
pool_df.reset_index(drop=True, inplace=True)
annotated_df = pd.DataFrame(columns=pool_df.columns)

seed_df, pool_no_seed_df = random_active_learner(data=pool_df, 
                                               b_size=batch_size, 
                                               seed=seed)

pool_no_seed_df.reset_index(drop=True, inplace=True)
seed_df.reset_index(drop=True, inplace=True)    

#X_train, Y_train = to_features(pool_df)
X_train, Y_train = to_features(seed_df)
X_valid, Y_valid = to_features(valid_df)
X_test, Y_test = to_features(test_df)
    
print(X_train.shape)
print(X_valid.shape)
print(X_test.shape)

print(np.unique(Y_train))
print(np.unique(Y_valid))
print(np.unique(Y_test))

(20, 768)
(1000, 768)
(1000, 768)
[False  True]
[False  True]
[False  True]


In [27]:
xgb_params = {
    "n_jobs": -1, 
    "max_depth": 25, 
    "min_child_weight": 0.1,
    "gamma": 1,
    "eta": 0.001,
    "num_round": 10000,
    "subsample": 0.5,
    "colsample_bytree": 0.5,
    "verbosity": 0, 
    "tree_method": 'hist', 
    "objective": 'binary:logistic'    
}

xgb_model = XGBClassifier()
xgb_model.set_params(**xgb_params)

print('Started fitting...')
xgb_model.fit(X_train, Y_train,
              eval_set=[(X_train, Y_train), (X_valid, Y_valid)],
              eval_metric='error',
              verbose=False)
print('Finished fitting.')
eval_metrics = []
pickle.dump(xgb_model, open('bert_xgboost_start_model.pkl', "wb"))
print('Started predicting...')
acc, f1 = eval_results(xgb_model, X_train, Y_train, X_valid, Y_valid, X_test, Y_test, eval_metrics)
print('Finished predicting.')
print(acc, f1)

Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.651  - Valid f1: 0.5199449793672627
Test accuracy: 0.63  - Test f1: 0.48467966573816157
Finished predicting.
0.63 0.48467966573816157


In [28]:
batch_size = 5

strategy_results = {}
for al_strategy in ['uncertainty', 'random', 'clustering']:
    print("Using {} Strategy ====>".format(al_strategy))
    al_iterations = 50
    
    xgb_model = pickle.load(open('bert_xgboost_start_model.pkl', "rb"))
    
    eval_metrics = []
    print('Started predicting...')
    acc, f1 = eval_results(xgb_model, X_train, Y_train, X_valid, Y_valid, X_test, Y_test, eval_metrics)
    print('Finished predicting.')

    pool_df = deepcopy(pool_no_seed_df)
    pool_df.reset_index(drop=True, inplace=True)
    annotated_df = deepcopy(seed_df)
    print('Pool size:', pool_df.shape, ' - Annotated size:', annotated_df.shape)
    
    f1_list = [f1]
    acc_list = [acc]
    
    for i in range(al_iterations):
        print('********** AL Iteration', i, '**************')

        print('Evaluate pool dataset...')
        X_pool, Y_pool = to_features(pool_df)
        predicted_prob = xgb_model.predict_proba(X_pool)
        print('Evaluation Done.')

        if al_strategy == 'random':
            to_annotate_i, pool_df = random_active_learner(data=pool_df, 
                                                   b_size=batch_size, 
                                                   seed=seed)
        elif al_strategy == 'uncertainty':
            to_annotate_i, pool_df = uncertainty_active_learner(data=pool_df, 
                                                        b_size=batch_size, 
                                                        probs=predicted_prob)
        elif al_strategy == 'uncertainty_diversity':
            to_annotate_i, pool_df = uncertainty_diversity_active_learner(data=pool_df, 
                                                                          b_size=batch_size, 
                                                                          probs=predicted_prob)
        elif al_strategy == 'clustering':
            to_annotate_i, pool_df = clustering_active_learner(data=pool_df, 
                                                              b_size=batch_size, 
                                                              probs=predicted_prob)
        else:
            print('ERROR: Wrong AL Option.')
            break

        pool_df.reset_index(drop=True, inplace=True)
        to_annotate_i.reset_index(drop=True, inplace=True)

        annotated_df = pd.concat([annotated_df, to_annotate_i], axis=0, join='outer')
        annotated_df.reset_index(drop=True, inplace=True)

        print('Pool:', pool_df.shape, ' - to_annotate_i:', to_annotate_i.shape, ' - annotated:', annotated_df.shape)

        print('Creating features...')
        X_train, Y_train = to_features(annotated_df)
        print('Features created.')

        print('Started fitting...')
        xgb_model = XGBClassifier()
        xgb_model.set_params(**xgb_params)

        xgb_model.fit(X_train, Y_train,
                      eval_set=[(X_train, Y_train), (X_valid, Y_valid)],
                      eval_metric='logloss',
                      verbose=False)
        print('Finished fitting.')

        print('Started predicting...')
        acc, f1 = eval_results(xgb_model, X_train, Y_train, X_valid, Y_valid, X_test, Y_test, eval_metrics)
        acc_list.append(acc)
        f1_list.append(f1)
        print('Finished predicting.')
        
    strategy_results[al_strategy] = [acc_list, f1_list]
    if al_strategy == 'clustering':
        pickle.dump(eval_metrics, open('pos_general_neg_bert_eval_metrics-ALC1-bs5.pkl', 'wb'))
    elif al_strategy == 'uncertainty':
        pickle.dump(eval_metrics, open('pos_general_neg_bert_eval_metrics-ALU1-bs5.pkl', 'wb'))
    else:
        pickle.dump(eval_metrics, open('pos_general_neg_bert_eval_metrics-RND1-bs5.pkl', 'wb'))

Using uncertainty Strategy ====>
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.651  - Valid f1: 0.5199449793672627
Test accuracy: 0.63  - Test f1: 0.48467966573816157
Finished predicting.
Pool size: (6527, 5)  - Annotated size: (20, 5)
********** AL Iteration 0 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6522, 5)  - to_annotate_i: (5, 5)  - annotated: (25, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.704  - Valid f1: 0.6433734939759036
Test accuracy: 0.711  - Test f1: 0.6409937888198758
Finished predicting.
********** AL Iteration 1 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6517, 5)  - to_annotate_i: (5, 5)  - annotated: (30, 5)
Creating features...
Features created.
Started fitting...
Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.756  - Valid f1: 0.7239819004524887
Test accuracy: 0.753  - Test f1: 0.709753231492362
Finished predicting.
********** AL Iteration 2 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6512, 5)  - to_annotate_i: (5, 5)  - annotated: (35, 5)
Creating features...
Features created.
Started fitting...
Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.78  - Valid f1: 0.7577092511013216
Test accuracy: 0.806  - Test f1: 0.7853982300884955
Fin

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.709  - Valid f1: 0.6420664206642067
Test accuracy: 0.726  - Test f1: 0.6625615763546798
Finished predicting.
********** AL Iteration 1 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6517, 5)  - to_annotate_i: (5, 5)  - annotated: (30, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.691  - Valid f1: 0.6468571428571428
Test accuracy: 0.719  - Test f1: 0.6751445086705203
Finished predicting.
********** AL Iteration 2 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6512, 5)  - to_annotate_i: (5, 5)  - annotated: (35, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.734  - Valid f1: 0.7714776632302406
Test accuracy: 0.746  - Test f1: 0.7817869415807559
Finished predicting.
********** AL Iteration 3 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6507, 5)  - to_annotate_i: (5, 5)  - annotated: (40, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.776  - Valid f1: 0.7795275590551182
Test accuracy: 0.805  - Test f1: 0.8078817733990147
Finished predicting.
********** AL Iteration 4 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6502, 5)  - to_annotate_i: (5, 5)  - annotated: (45, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.789  - Valid f1: 0.8040854224698235
Test accuracy: 0.82  - Test f1: 0.8330241187384045
Finished predicting.
********** AL Iteration 5 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6497, 5)  - to_annotate_i: (5, 5)  - annotated: (50, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.778  - Valid f1: 0.7571115973741795
Test accuracy: 0.798  - Test f1: 0.7780219780219779
Finished predicting.
********** AL Iteration 6 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6492, 5)  - to_annotate_i: (5, 5)  - annotated: (55, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.807  - Valid f1: 0.7862679955703211
Test accuracy: 0.816  - Test f1: 0.7941834451901565
Finished predicting.
********** AL Iteration 7 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6487, 5)  - to_annotate_i: (5, 5)  - annotated: (60, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.817  - Valid f1: 0.8030139935414423
Test accuracy: 0.829  - Test f1: 0.8190476190476191
Finished predicting.
********** AL Iteration 8 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6482, 5)  - to_annotate_i: (5, 5)  - annotated: (65, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.814  - Valid f1: 0.8113590263691685
Test accuracy: 0.82  - Test f1: 0.8189134808853117
Finished predicting.
********** AL Iteration 9 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6477, 5)  - to_annotate_i: (5, 5)  - annotated: (70, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.809  - Valid f1: 0.8107036669970268
Test accuracy: 0.838  - Test f1: 0.8373493975903614
Finished predicting.
********** AL Iteration 10 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6472, 5)  - to_annotate_i: (5, 5)  - annotated: (75, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.835  - Valid f1: 0.819277108433735
Test accuracy: 0.824  - Test f1: 0.8078602620087336
Finished predicting.
********** AL Iteration 11 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6467, 5)  - to_annotate_i: (5, 5)  - annotated: (80, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.818  - Valid f1: 0.8100208768267224
Test accuracy: 0.828  - Test f1: 0.8204592901878914
Finished predicting.
********** AL Iteration 12 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6462, 5)  - to_annotate_i: (5, 5)  - annotated: (85, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.816  - Valid f1: 0.8079331941544885
Test accuracy: 0.817  - Test f1: 0.8115345005149331
Finished predicting.
********** AL Iteration 13 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6457, 5)  - to_annotate_i: (5, 5)  - annotated: (90, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.854  - Valid f1: 0.8513238289205703
Test accuracy: 0.848  - Test f1: 0.8455284552845528
Finished predicting.
********** AL Iteration 14 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6452, 5)  - to_annotate_i: (5, 5)  - annotated: (95, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.85  - Valid f1: 0.8443983402489627
Test accuracy: 0.844  - Test f1: 0.8361344537815125
Finished predicting.
********** AL Iteration 15 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6447, 5)  - to_annotate_i: (5, 5)  - annotated: (100, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.864  - Valid f1: 0.8629032258064515
Test accuracy: 0.861  - Test f1: 0.8605817452357071
Finished predicting.
********** AL Iteration 16 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6442, 5)  - to_annotate_i: (5, 5)  - annotated: (105, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.857  - Valid f1: 0.850261780104712
Test accuracy: 0.858  - Test f1: 0.8539094650205762
Finished predicting.
********** AL Iteration 17 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6437, 5)  - to_annotate_i: (5, 5)  - annotated: (110, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.845  - Valid f1: 0.8363252375923971
Test accuracy: 0.848  - Test f1: 0.8413361169102296
Finished predicting.
********** AL Iteration 18 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6432, 5)  - to_annotate_i: (5, 5)  - annotated: (115, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.838  - Valid f1: 0.8366935483870969
Test accuracy: 0.854  - Test f1: 0.8522267206477733
Finished predicting.
********** AL Iteration 19 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6427, 5)  - to_annotate_i: (5, 5)  - annotated: (120, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.835  - Valid f1: 0.8374384236453202
Test accuracy: 0.839  - Test f1: 0.8435374149659863
Finished predicting.
********** AL Iteration 20 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6422, 5)  - to_annotate_i: (5, 5)  - annotated: (125, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.842  - Valid f1: 0.8474903474903475
Test accuracy: 0.849  - Test f1: 0.8538238141335914
Finished predicting.
********** AL Iteration 21 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6417, 5)  - to_annotate_i: (5, 5)  - annotated: (130, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.834  - Valid f1: 0.8372549019607843
Test accuracy: 0.843  - Test f1: 0.8471275559883156
Finished predicting.
********** AL Iteration 22 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6412, 5)  - to_annotate_i: (5, 5)  - annotated: (135, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.836  - Valid f1: 0.8388998035363459
Test accuracy: 0.826  - Test f1: 0.8290766208251473
Finished predicting.
********** AL Iteration 23 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6407, 5)  - to_annotate_i: (5, 5)  - annotated: (140, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.875  - Valid f1: 0.8741188318227593
Test accuracy: 0.868  - Test f1: 0.8695652173913043
Finished predicting.
********** AL Iteration 24 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6402, 5)  - to_annotate_i: (5, 5)  - annotated: (145, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.866  - Valid f1: 0.8649193548387096
Test accuracy: 0.862  - Test f1: 0.8636363636363635
Finished predicting.
********** AL Iteration 25 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6397, 5)  - to_annotate_i: (5, 5)  - annotated: (150, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.851  - Valid f1: 0.8529121421520237
Test accuracy: 0.872  - Test f1: 0.8740157480314961
Finished predicting.
********** AL Iteration 26 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6392, 5)  - to_annotate_i: (5, 5)  - annotated: (155, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.827  - Valid f1: 0.8399629972247918
Test accuracy: 0.826  - Test f1: 0.8409506398537477
Finished predicting.
********** AL Iteration 27 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6387, 5)  - to_annotate_i: (5, 5)  - annotated: (160, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.842  - Valid f1: 0.850943396226415
Test accuracy: 0.847  - Test f1: 0.8582020389249304
Finished predicting.
********** AL Iteration 28 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6382, 5)  - to_annotate_i: (5, 5)  - annotated: (165, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.867  - Valid f1: 0.8687068114511354
Test accuracy: 0.882  - Test f1: 0.8836291913214991
Finished predicting.
********** AL Iteration 29 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6377, 5)  - to_annotate_i: (5, 5)  - annotated: (170, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.871  - Valid f1: 0.873405299313052
Test accuracy: 0.87  - Test f1: 0.874274661508704
Finished predicting.
********** AL Iteration 30 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6372, 5)  - to_annotate_i: (5, 5)  - annotated: (175, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.876  - Valid f1: 0.8744939271255061
Test accuracy: 0.878  - Test f1: 0.8796844181459568
Finished predicting.
********** AL Iteration 31 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6367, 5)  - to_annotate_i: (5, 5)  - annotated: (180, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.871  - Valid f1: 0.8721506442021804
Test accuracy: 0.886  - Test f1: 0.8875739644970414
Finished predicting.
********** AL Iteration 32 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6362, 5)  - to_annotate_i: (5, 5)  - annotated: (185, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.879  - Valid f1: 0.8781470292044311
Test accuracy: 0.876  - Test f1: 0.8757515030060121
Finished predicting.
********** AL Iteration 33 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6357, 5)  - to_annotate_i: (5, 5)  - annotated: (190, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.883  - Valid f1: 0.8821752265861027
Test accuracy: 0.885  - Test f1: 0.8864758144126357
Finished predicting.
********** AL Iteration 34 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6352, 5)  - to_annotate_i: (5, 5)  - annotated: (195, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.874  - Valid f1: 0.87374749498998
Test accuracy: 0.878  - Test f1: 0.8782435129740519
Finished predicting.
********** AL Iteration 35 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6347, 5)  - to_annotate_i: (5, 5)  - annotated: (200, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.885  - Valid f1: 0.8841893252769387
Test accuracy: 0.889  - Test f1: 0.8893320039880359
Finished predicting.
********** AL Iteration 36 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6342, 5)  - to_annotate_i: (5, 5)  - annotated: (205, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.868  - Valid f1: 0.8693069306930692
Test accuracy: 0.883  - Test f1: 0.8851815505397449
Finished predicting.
********** AL Iteration 37 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6337, 5)  - to_annotate_i: (5, 5)  - annotated: (210, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 0.9952380952380953  - Train f1: 0.9954751131221719
Valid accuracy: 0.855  - Valid f1: 0.862298195631529
Test accuracy: 0.841  - Test f1: 0.8490028490028491
Finished predicting.
********** AL Iteration 38 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6332, 5)  - to_annotate_i: (5, 5)  - annotated: (215, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.857  - Valid f1: 0.8539325842696629
Test accuracy: 0.873  - Test f1: 0.8715874620829119
Finished predicting.
********** AL Iteration 39 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6327, 5)  - to_annotate_i: (5, 5)  - annotated: (220, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.875  - Valid f1: 0.8738647830474269
Test accuracy: 0.881  - Test f1: 0.881592039800995
Finished predicting.
********** AL Iteration 40 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6322, 5)  - to_annotate_i: (5, 5)  - annotated: (225, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.891  - Valid f1: 0.8877445932028836
Test accuracy: 0.889  - Test f1: 0.888888888888889
Finished predicting.
********** AL Iteration 41 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6317, 5)  - to_annotate_i: (5, 5)  - annotated: (230, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.886  - Valid f1: 0.8846153846153847
Test accuracy: 0.881  - Test f1: 0.8794326241134751
Finished predicting.
********** AL Iteration 42 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6312, 5)  - to_annotate_i: (5, 5)  - annotated: (235, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.885  - Valid f1: 0.8839556004036326
Test accuracy: 0.889  - Test f1: 0.8882175226586102
Finished predicting.
********** AL Iteration 43 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6307, 5)  - to_annotate_i: (5, 5)  - annotated: (240, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.882  - Valid f1: 0.8788501026694044
Test accuracy: 0.872  - Test f1: 0.8707070707070708
Finished predicting.
********** AL Iteration 44 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6302, 5)  - to_annotate_i: (5, 5)  - annotated: (245, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 0.9959183673469387  - Train f1: 0.99581589958159
Valid accuracy: 0.878  - Valid f1: 0.8702127659574468
Test accuracy: 0.883  - Test f1: 0.8767123287671234
Finished predicting.
********** AL Iteration 45 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6297, 5)  - to_annotate_i: (5, 5)  - annotated: (250, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.881  - Valid f1: 0.8756530825496344
Test accuracy: 0.891  - Test f1: 0.8875128998968008
Finished predicting.
********** AL Iteration 46 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6292, 5)  - to_annotate_i: (5, 5)  - annotated: (255, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.889  - Valid f1: 0.8844953173777316
Test accuracy: 0.896  - Test f1: 0.8930041152263376
Finished predicting.
********** AL Iteration 47 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6287, 5)  - to_annotate_i: (5, 5)  - annotated: (260, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.885  - Valid f1: 0.8834853090172239
Test accuracy: 0.892  - Test f1: 0.889795918367347
Finished predicting.
********** AL Iteration 48 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6282, 5)  - to_annotate_i: (5, 5)  - annotated: (265, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.873  - Valid f1: 0.868937048503612
Test accuracy: 0.893  - Test f1: 0.8920282542885973
Finished predicting.
********** AL Iteration 49 **************
Evaluate pool dataset...
Evaluation Done.
Pool: (6277, 5)  - to_annotate_i: (5, 5)  - annotated: (270, 5)
Creating features...
Features created.
Started fitting...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Finished fitting.
Started predicting...
Train accuracy: 1.0  - Train f1: 1.0
Valid accuracy: 0.888  - Valid f1: 0.8864097363083163
Test accuracy: 0.897  - Test f1: 0.896896896896897
Finished predicting.


In [31]:
ACH_performance = np.asarray(pickle.load(open('pos_general_neg_bert_eval_metrics-AllTraining.pkl', 'rb')))
ALU_performance = np.asarray(pickle.load(open('pos_general_neg_bert_eval_metrics-ALU1-bs5.pkl', 'rb')))
ALC_performance = np.asarray(pickle.load(open('pos_general_neg_bert_eval_metrics-ALC1-bs5.pkl', 'rb')))
RND_performance = np.asarray(pickle.load(open('pos_general_neg_bert_eval_metrics-RND1-bs5.pkl', 'rb')))

RND_marker, ALU_marker, ALC_marker = 'x', '^', 's'
RND_color, ALU_color, ALC_color = 'black', 'red', 'blue'
RND_lstyle, ALU_lstyle, ALC_lstyle = 'dotted', 'solid', 'solid'
RND_fstyle, ALU_fstyle, ALC_fstyle = 'none', 'none', 'none'
RND_msize, ALU_msize, ALC_msize = 5, 5, 5
RND_lwidth, ALU_lwidth, ALC_lwidth = 0.5, 0.5, 0.5

ACH_color = 'black'
ACH_lwidth = 0.5

p_step = float(5)/6274


#### TEST
plt.figure(figsize=(10,6))
#plt.suptitle('Performance of the Test Dataset')
plt.subplot(211)
plt.hlines(y=ACH_performance[0, 4], xmin=0, xmax=len(RND_performance)*(5/6274), label='100% Data',
          color=ACH_color, linewidth=ACH_lwidth) 
plt.plot(np.arange(0, len(RND_performance)*p_step, p_step), ALC_performance[0:, 4], label='AL-Clustering', 
         marker=ALC_marker, color=ALC_color, linestyle=ALC_lstyle, fillstyle=ALC_fstyle, 
         markersize=ALC_msize, linewidth=ALC_lwidth)
plt.plot(np.arange(0, len(RND_performance)*p_step, p_step), ALU_performance[0:, 4], label='AL-Uncertainty', 
         marker=ALU_marker, color=ALU_color, linestyle=ALU_lstyle, fillstyle=ALU_fstyle, 
         markersize=ALU_msize, linewidth=ALC_lwidth)
plt.plot(np.arange(0, len(RND_performance)*p_step, p_step), RND_performance[0:, 4], label='Random', 
         marker=RND_marker, color=RND_color, linestyle=RND_lstyle, fillstyle=RND_fstyle, 
         markersize=RND_msize, linewidth=ALC_lwidth)
plt.legend(loc=4)
#plt.xlabel('Number of active iterations')
plt.xlabel('Percentage of data used')
plt.ylabel('Accuracy')
plt.xlim([0, len(RND_performance)*p_step])

plt.subplot(212)
plt.hlines(y=ACH_performance[0, 5], xmin=0, xmax=len(RND_performance)*(5/6274), label='100% Data',
          color=ACH_color, linewidth=ACH_lwidth) 
plt.plot(np.arange(0, len(RND_performance)*p_step, p_step), ALC_performance[0:, 5], label='AL-Clustering', 
         marker=ALC_marker, color=ALC_color, linestyle=ALC_lstyle, fillstyle=ALC_fstyle, 
         markersize=ALC_msize, linewidth=ALC_lwidth)
plt.plot(np.arange(0, len(RND_performance)*p_step, p_step), ALU_performance[0:, 5], label='AL-Uncertainty', 
         marker=ALU_marker, color=ALU_color, linestyle=ALU_lstyle, fillstyle=ALU_fstyle, 
         markersize=ALU_msize, linewidth=ALC_lwidth)
plt.plot(np.arange(0, len(RND_performance)*p_step, p_step), RND_performance[0:, 5], label='Random', 
         marker=RND_marker, color=RND_color, linestyle=RND_lstyle, fillstyle=RND_fstyle, 
         markersize=RND_msize, linewidth=ALC_lwidth)
plt.legend(loc=4)
#plt.xlabel('Number of active iterations')
plt.xlabel('Percentage of data used')
plt.ylabel('F1 Score')
plt.xlim([0, len(RND_performance)*p_step])

plt.tight_layout(pad=3.0)
%config InlineBackend.figure_format = 'svg'

# plt.savefig('Results-pos_general_neg_bert_xgb-test.svg', format='svg')
plt.show()


TypeError: scalar() argument 1 must be numpy.dtype, not numpy.dtype

'1.15.4'