# Visualizing results masked inference


## remeber in the classification task we got the following results using BERT:

- Boolean Connective 100%
    
- Quantifiers 90.5%

- Counting 87.5%



### all imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib as plt
import seaborn as sns
from inference.text_generation.util import num2word

### reading csv

In [2]:
df_bc = pd.read_csv("results/BC_basic.csv")
# df_q = pd.read_csv()
df_count = pd.read_csv("results/count_basic.csv")

#### General functions

In [4]:
## creating accuracy from top 10 score
features = ["bert_base_uncased_pre_trained", "bert_base_uncased_fine_tuned"]
for feature in features:
    df_bc[feature + "_hard"] =  np.floor(df_bc[feature].values)
    df_count[feature + "_hard"] =  np.floor(df_count[feature].values)
    


def print_score(df_, label_list=None,score=False, acc=True):
    pre_trained = df_.bert_base_uncased_pre_trained.values
    fine_tuned = df_.bert_base_uncased_fine_tuned.values

    print("==== General results===\n")

    if score:
        print("top 10 score - max = 1.0, min = 0.0\n")
        print("pre_trained: score = {:.1f}".format(np.mean(pre_trained)))
        print("fine_tuned: score = {:.1f}".format(np.mean(fine_tuned)))

    pre_trained = df_.bert_base_uncased_pre_trained_hard.values
    fine_tuned = df_.bert_base_uncased_fine_tuned_hard.values

    if acc:
        print("\nAccuracy")
        print("pre_trained, acc = {:.1f} %".format(np.mean(pre_trained)*100))
        print("fine_tuned, acc = {:.1f} %".format(np.mean(fine_tuned)*100))

    print("\n==== Results for each connective ===\n")
    
    if label_list is None:
        label_list = set(df_.label) 

    for label in label_list:
        df_label = df_[df_.label==label]

        pre_trained = df_label.bert_base_uncased_pre_trained.values
        fine_tuned = df_label.bert_base_uncased_fine_tuned.values
        if score:
            print("top 10 score - max = 1.0, min = 0.0\n")
            print("(label={}) pre_trained: score = {:.1f}".format(label, np.mean(pre_trained)))
            print("(label={}) fine_tuned: score = {:.1f}".format(label, np.mean(fine_tuned)))

        pre_trained = df_label.bert_base_uncased_pre_trained_hard.values
        fine_tuned = df_label.bert_base_uncased_fine_tuned_hard.values
        
        if acc:
            print("\nAccuracy\n")
            print("(label={}) pre_trained: acc = {:.1f} %".format(label, np.mean(pre_trained) * 100))
            print("(label={}) fine_tuned: acc = {:.1f} %".format(label, np.mean(fine_tuned) * 100))
        print()

# Boolean connective results

In [5]:
print_score(df_bc)

==== General results===


Accuracy
pre_trained, acc = 0.0 %
fine_tuned, acc = 55.3 %

==== Results for each connective ===


Accuracy

(label=or) pre_trained: acc = 0.0 %
(label=or) fine_tuned: acc = 80.4 %


Accuracy

(label=and) pre_trained: acc = 0.0 %
(label=and) fine_tuned: acc = 30.2 %



# Quantifiers results

# Count results

In [7]:
numerals = list(num2word.values())
numerals = [n for n in numerals if n in set(df_count.label)]
print_score(df_count, numerals)

==== General results===


Accuracy
pre_trained, acc = 4.7 %
fine_tuned, acc = 13.2 %

==== Results for each connective ===


Accuracy

(label=one) pre_trained: acc = 0.0 %
(label=one) fine_tuned: acc = 0.0 %


Accuracy

(label=two) pre_trained: acc = 5.2 %
(label=two) fine_tuned: acc = 10.3 %


Accuracy

(label=three) pre_trained: acc = 95.7 %
(label=three) fine_tuned: acc = 76.1 %


Accuracy

(label=four) pre_trained: acc = 0.0 %
(label=four) fine_tuned: acc = 22.8 %


Accuracy

(label=five) pre_trained: acc = 0.0 %
(label=five) fine_tuned: acc = 0.0 %


Accuracy

(label=six) pre_trained: acc = 0.0 %
(label=six) fine_tuned: acc = 0.0 %


Accuracy

(label=seven) pre_trained: acc = 0.0 %
(label=seven) fine_tuned: acc = 0.0 %


Accuracy

(label=eight) pre_trained: acc = 0.0 %
(label=eight) fine_tuned: acc = 0.0 %


Accuracy

(label=nine) pre_trained: acc = 0.0 %
(label=nine) fine_tuned: acc = 0.0 %


Accuracy

(label=ten) pre_trained: acc = 0.0 %
(label=ten) fine_tuned: acc = 14.3 %


Ac