In [7]:
from utils import *
import paddle
import paddle.nn.functional as F
import pandas as pd


V = TextVectorizer()
V.load_vocab_from_json()

In [8]:
def get_model(model):
    model = paddle.Model(model)
    optimizer = paddle.optimizer.Adam(
    parameters=model.parameters(), learning_rate=5e-4)
    criterion = paddle.nn.CrossEntropyLoss()
    metric = paddle.metric.Accuracy()
    model.prepare(optimizer, criterion, metric)
    return model


def predict(model, data_loader):
    labels, predictions = [], []
    logits = model.predict(data_loader)
    for batch in data_loader:
        labels.extend(batch[-1].tolist())
        
    for batch in logits[0]:
        batch = paddle.to_tensor(batch)
        probs = F.softmax(batch, axis=1)
        preds = paddle.argmax(probs, axis=1).numpy().tolist()
        predictions.extend(preds)
        
    return predictions, labels


def get_accu_pre_recall_f1(preds, labels): 
    
    tp, fp, tn, fn, right = 0, 0, 0, 0, 0
    
    for (i, j) in zip(preds, labels):
        if i == 1 and j == 1:
            tp += 1
            right += 1
        elif i == 1 and j == 0:
            fp += 1
        elif i == 0 and j == 0:
            tn += 1
            right += 1
        else:
            fn += 1
    
    A = round(right/len(preds), 3)
    try:
        P = round(tp / (tp + fp), 3)
    except:
        return [A, 'Nan', 'Nan', 'Nan']
    R = round(tp / (tp + fn), 3)
    F1 = round(2 * P * R / (P + R), 3)
    
    return [A, P, R, F1]


In [9]:
from paddle_models.BoW import BoW
from paddle_models.CNN import CNN
from paddle_models.LSTM import LSTM
from paddle_models.GRU import GRU


def do_train_and_evaluate(train_path, 
                          network, 
                          epochs="3", 
                          dev_path='dev.tsv', 
                          test_path='test.tsv', 
                          batch_size=64,
                          save=False, 
                          save_dir='ckpt',
                          device="gpu", 
                          log_freq=500):
    
    paddle.set_device(device)
    
    if network.lower() == "bow":
        model = BoW(len(V.vocab_to_idx), 2)
        include_seq_len = False
    elif network.lower() == 'cnn':
        model = CNN(len(V.vocab_to_idx), 2)
        include_seq_len = False
    elif network.lower() == 'lstm':
        model = LSTM(len(V.vocab_to_idx), 2)
        include_seq_len = True
    elif network.lower() == 'gru':
        model = GRU(len(V.vocab_to_idx), 2)
        include_seq_len = True
    else:
        raise ValueError(f"Only supports: bow, cnn, lstm, gru, not {network}")

    train_set, dev_set, test_set = load_dataset([train_path, dev_path, test_path])
    trans_fn = get_trans_fn(V, include_seq_len)
    batchify_fn = get_batchify_fn(include_seq_len)
    train_loader = create_dataloader(train_set, trans_fn, batchify_fn)
    dev_loader = create_dataloader(dev_set, trans_fn, batchify_fn)
    test_loader = create_dataloader(test_set, trans_fn, batchify_fn, shuffle=False)
    
    
    model = get_model(model)
    if save:
        model.fit(train_loader, dev_loader, epochs=epochs, batch_size=batch_size, 
                  verbose=2, log_freq=log_freq, save_dir=save_dir)
    else:
        model.fit(train_loader, dev_loader, epochs=epochs, batch_size=batch_size,
                  verbose=2, log_freq=log_freq)
        
    preds, labels = predict(model, test_loader)
    accu, prec, recall, f1 = get_accu_pre_recall_f1(preds, labels)
    
    return [accu, prec, recall, f1]

In [10]:
from IPython.display import clear_output

out = []

for size in ['0.5k', '1k', '3k', '6k', 'full']:
    for net in ['bow', 'cnn', 'lstm', 'gru']:
        print(f"Size: {size}; Model: {net}")
        res = do_train_and_evaluate(f'./all_data/train_{size}.txt', net, 3, device='gpu')
        out.append(['base', 'None', net, size] + res)
        clear_output(wait=True)

        columns = ['TrainType', 'EditType', 'ClfModel', 'TrainSize', 'Accuracy', 'Precision', 'Recall', 'F1']
        df = pd.DataFrame(out, columns=columns)
        df.to_excel('base_model_stats.xlsx', index=False)

Size: full; Model: gru
The loss value printed in the log is the current step, and the metric is the average value of previous steps.
Epoch 1/3
step 150/150 - loss: 0.3252 - acc: 0.7055 - 44ms/step
Eval begin...
step 19/19 - loss: 0.3810 - acc: 0.8525 - 37ms/step
Eval samples: 1200
Epoch 2/3
step 150/150 - loss: 0.2743 - acc: 0.9218 - 45ms/step
Eval begin...
step 19/19 - loss: 0.3564 - acc: 0.8842 - 36ms/step
Eval samples: 1200
Epoch 3/3
step 150/150 - loss: 0.0610 - acc: 0.9718 - 44ms/step
Eval begin...
step 19/19 - loss: 0.5016 - acc: 0.8925 - 36ms/step
Eval samples: 1200
Predict begin...
Predict samples: 1200


In [12]:
from IPython.display import clear_output

out = []

for aug_type in ['reda', 'reda_ngram']:
    for size in ['0.5k', '1k', '3k', '6k', 'full']:
        for net in ['bow', 'cnn', 'lstm', 'gru']:
            print(f"aug_type:{aug_type}. Size: {size}; Model: {net}")
            res = do_train_and_evaluate(f'./all_data/train_{size}_aug_{aug_type}.txt', net, 3, device="gpu")
            out.append([aug_type, 'All', net, size] + res)
            clear_output(wait=True)

            columns = ['TrainType', 'EditType', 'ClfModel', 'TrainSize', 'Accuracy', 'Precision', 'Recall', 'F1']
            df = pd.DataFrame(out, columns=columns)
            df.to_excel('aug_model_stats.xlsx', index=False)

aug_type:reda_ngram. Size: full; Model: gru
The loss value printed in the log is the current step, and the metric is the average value of previous steps.
Epoch 1/3
step 500/899 - loss: 0.0324 - acc: 0.8716 - 45ms/step
step 899/899 - loss: 0.0112 - acc: 0.9170 - 44ms/step
Eval begin...
step 19/19 - loss: 0.3225 - acc: 0.8867 - 37ms/step
Eval samples: 1200
Epoch 2/3
step 500/899 - loss: 0.0119 - acc: 0.9881 - 46ms/step
step 899/899 - loss: 0.1134 - acc: 0.9889 - 45ms/step
Eval begin...
step 19/19 - loss: 0.2845 - acc: 0.8800 - 36ms/step
Eval samples: 1200
Epoch 3/3
step 500/899 - loss: 0.0390 - acc: 0.9922 - 45ms/step
step 899/899 - loss: 0.0629 - acc: 0.9918 - 45ms/step
Eval begin...
step 19/19 - loss: 0.7802 - acc: 0.8658 - 37ms/step
Eval samples: 1200
Predict begin...
Predict samples: 1200
