In [1]:
import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split
from simpletransformers.seq2seq import Seq2SeqModel, Seq2SeqArgs
import numpy as np
import difflib

In [9]:
def avg_p(labels, preds):
    if isinstance(labels, list):
        labels = np.array(labels)
    if isinstance(preds, list):
        preds = np.array(preds)
    check = labels == preds
    invert = np.invert(check)
    return check.mean(), invert

class Experiment:
    def __init__(self, model_name="20211207153056", model_path="/home/fabian/Documents/2021-2022/word-problem-solver/Experiments/models/", use_cuda=True) -> None:
        self.model = self.init_model(model_name, model_path, use_cuda)
        self.prefix = "nltocl:"

    def init_model(self, model_name, model_path, use_cuda=True):
        # Model arguments
        model_args = Seq2SeqArgs()

        model_args.num_return_sequences = 1
        model_args.max_length = 256
        model_args.top_k = 50
        model_args.top_p = 0.95

        model_args.wandb_project = "NL to CL with BART"
        model = Seq2SeqModel(
            encoder_decoder_type="bart",
            encoder_decoder_name=model_path+model_name,
            args=model_args,
            use_cuda=use_cuda
        )
        return model

    def difference(self, labels, preds, show_output=True):
        cases = zip(preds, labels)
        outputs = []
        for a,b in cases:
            output = ['pred  : {}\n=>\nlabel : {}'.format(a,b)]
            if show_output:     
                print(output[0])
            del_word = []
            add_word = []
            for i,s in enumerate(difflib.ndiff(a, b)):
                if s[0]==' ':
                    if len(del_word):
                        output.append("".join(del_word))
                    if len(add_word):
                        output.append("".join(add_word))
                    if show_output:
                        if len(del_word):
                            print("".join(del_word))
                        if len(add_word):
                            print("".join(add_word))
                    del_word = []
                    add_word = []
                    continue
                elif s[0]=='-':
                    if not len(del_word):
                        del_word.append(f'-- {i} : ')
                    del_word.append(s[-1])
                elif s[0]=='+':
                    if not len(add_word):
                        add_word.append(f'++ {i} : ')
                    add_word.append(s[-1])
            outputs.append("\n".join(output))
            if show_output:
                print()
        return outputs

    def calculate_difference(self, labels, preds,f=avg_p, lower=False, show_output=False):
        if isinstance(labels, list):
            labels = np.array(labels)
        if isinstance(preds, list):
            preds = np.array(preds)
        if lower:
            labels = np.char.lower(labels)
            preds = np.char.lower(preds)

        score, mask = f(labels, preds)
        outputs = self.difference(labels[mask], preds[mask], show_output=show_output)
        print(f'- {f.__name__}: {score}')
        return outputs, mask

    def custom_eval(self, df, f=avg_p, lower=False):
        labels = np.array(df['target_text'].tolist())
        input_text = df['input_text'].tolist()
        preds = np.array(self.model.predict(input_text))
        input_text = np.array(input_text)
        outputs, mask = self.calculate_difference(labels, preds, lower=lower, show_output=False)
        return outputs, labels, preds, mask

    def eval_train_test(self, train_df, test_df, lower=False):
        print('Eval train:')
        outputs_train, labels_train, preds_train, mask_train = self.custom_eval(train_df, lower=lower)
        print('\nEval test:')
        outputs_test, labels_test, preds_test, mask_test = self.custom_eval(test_df, lower=lower)
        return {"train":{"outputs":outputs_train, "labels":labels_train, "preds":preds_train, "mask":mask_train},\
               "test":{"outputs":outputs_test, "labels":labels_test, "preds":preds_test, "mask":mask_test}}

    def predict(self, problems):
        if isinstance(problems, str):
            problems = [problems]
        return self.model.predict(problems)

E = Experiment(model_name="20220109165331") 

In [8]:
E.predict(["A man has 4 apples and 2 bananas. How many apples does the man have?"])

Generating outputs: 100%|██████████| 1/1 [00:00<00:00,  4.90it/s]


['A man has 4 apples. He has 2 bananas. How many apples does the man have?']

In [3]:
E.predict(["A man used to have 1 apples. A woman gave 3 apples to the man. How many apples did the man have?"])

Generating outputs: 100%|██████████| 1/1 [00:00<00:00,  3.66it/s]


['A man has 1 apples. A woman gives 3 apples to the man. How many apples does the man have?']

In [11]:
data_dir = 'data/ACE - Word Problems/'
df = pd.read_csv(data_dir+'gen_4_pairs.csv')
df["prefix"] = "nltocl"
df = df[["prefix", "input_text", "target_text"]]
q_train, q_test = train_test_split(df, random_state=10)
print(q_train.shape, q_test.shape)
q_train.head()

(513, 3) (171, 3)


Unnamed: 0,prefix,input_text,target_text
647,nltocl,There are 5 cars. If there is a car then it ne...,There are 5 cars. If there is a car then it ne...
231,nltocl,A rabbit has 2 holes with 8 carrots in each ho...,A rabbit has 2 holes with 8 carrots in each ho...
100,nltocl,A man had 20 euros. He lost 10 euros. How many...,A man has 20 euros. He loses 10 euros. How man...
278,nltocl,A school has 50 boys in the classroom. 90 girl...,A school has 50 male students. The school has ...
254,nltocl,A boy has 8 bananas. A girl gives 1 banana to ...,A boy has 8 bananas. A girl gives 1 banana to ...


In [13]:
present_simple =  pd.read_csv(data_dir+'ACE - Word Problems - Present Simple 2.csv')
past_simple = pd.read_csv(data_dir+'ACE - Word Problems - Past Simple.csv')
past_simple.head()

Unnamed: 0,index,problem,answer
0,1.1.1,A man had 3 apples. A woman gave 4 apples to t...,7
1,1.1.2,A woman had 1 ball. A man gave 9 balls to the ...,10
2,1.1.3,A boy had 8 bananas. A girl gave 1 banana to t...,9
3,1.1.4,A girl had 3 melons. A boy gave 6 melons to th...,9
4,1.2.1,A restaurant had 175 normal chairs and 20 juni...,195


In [14]:
labels = present_simple['problem'].tolist()
past_simple_pred = E.predict(past_simple['problem'].tolist())

past_simple_results = E.calculate_difference(labels=labels, preds=past_simple_pred)
print('lower:')
past_simple_results_lower = E.calculate_difference(labels=labels, preds=past_simple_pred, lower=True)

Generating outputs: 100%|██████████| 10/10 [00:05<00:00,  1.94it/s]

- avg_p: 0.7
lower:
- avg_p: 0.7875





In [15]:
results = E.eval_train_test(train_df=q_train, test_df=q_test, lower=True)

Eval train:


Generating outputs: 100%|██████████| 65/65 [00:40<00:00,  1.61it/s]


- avg_p: 0.7582846003898636

Eval test:


Generating outputs: 100%|██████████| 22/22 [00:13<00:00,  1.59it/s]

- avg_p: 0.7251461988304093





In [23]:
print(results['train']['outputs'][10])

pred  : there is a boy. there is a girl. a man has 4 pieces. if he has 2 pieces then the girl gets 1 piece from the man and the boy gets 1 pieces from the father. how many pieces does the boy have?
=>
label : there is a boy. there is a girl. a man has 4 pieces. if he has 2 pieces then the girl gets 1 piece from the man and the boy gets 1 piece from the man. how many pieces does the boy have?
-- 136 : s
-- 147 : f
++ 148 : m
-- 151 : ther
++ 150 : n


## Eval difference

In [25]:
labels = present_simple['problem'].tolist()
input_df = pd.read_csv(data_dir+'ACE - Word Problems - Present Simple 2.csv')
preds = E.predict(input_df['problem'].tolist())

past_simple_results = E.calculate_difference(labels=labels, preds=preds)
print('lower:')
past_simple_results_lower = E.calculate_difference(labels=labels, preds=preds, lower=True)

Generating outputs: 100%|██████████| 10/10 [00:05<00:00,  1.85it/s]

- avg_p: 0.7125
lower:
- avg_p: 0.775



