*Copyright (c) Microsoft Corporation. All rights reserved.*

*Licensed under the MIT License.*

# Text Classification of MultiNLI Sentences using Multiple Transformer Models  


#### TODO
1. Make sure that all the packages can be installed on Colab


In [1]:
import json
import os
import sys
from tempfile import TemporaryDirectory

import numpy as np
import pandas as pd
import scrapbook as sb
import torch
import torch.nn as nn
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
from utils_nlp.common.timer import Timer
from utils_nlp.common.pytorch_utils import dataloader_from_dataset
from utils_nlp.dataset.multinli import load_pandas_df
from utils_nlp.models.transformers.sequence_classification import (
    Processor, SequenceClassifier)

  import pandas.util.testing as tm


In [2]:
import os
import tempfile
import shutil
import urllib
import zipfile
import pandas as pd



# your imports may be different
import pandas as pd
import numpy as np
import pickle
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.decomposition import NMF, LatentDirichletAllocation
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import accuracy_score
from sklearn.svm import LinearSVC
import matplotlib.pyplot as plt
from spacy.lang.en import English

In [3]:
!python -m spacy download en_core_web_md
import en_core_web_md
nlp = en_core_web_md.load()
en = English()

OMP: Info #154: KMP_AFFINITY: Initial OS proc set respected: 0-5
OMP: Info #214: KMP_AFFINITY: decoding x2APIC ids.
OMP: Info #156: KMP_AFFINITY: 6 available OS procs
OMP: Info #157: KMP_AFFINITY: Uniform topology
OMP: Info #285: KMP_AFFINITY: topology layer "LL cache" is equivalent to "socket".
OMP: Info #285: KMP_AFFINITY: topology layer "L3 cache" is equivalent to "socket".
OMP: Info #285: KMP_AFFINITY: topology layer "L2 cache" is equivalent to "core".
OMP: Info #285: KMP_AFFINITY: topology layer "L1 cache" is equivalent to "core".
OMP: Info #285: KMP_AFFINITY: topology layer "thread" is equivalent to "core".
OMP: Info #191: KMP_AFFINITY: 1 socket x 6 cores/socket x 1 thread/core (6 total cores)
OMP: Info #216: KMP_AFFINITY: OS proc to physical thread map:
OMP: Info #171: KMP_AFFINITY: OS proc 0 maps to socket 0 core 0 
OMP: Info #171: KMP_AFFINITY: OS proc 1 maps to socket 0 core 1 
OMP: Info #171: KMP_AFFINITY: OS proc 2 maps to socket 0 core 2 
OMP: Info #171: KMP_AFFINITY: OS p

In [270]:
# Temporary folder for data we need during execution of this notebook (we'll clean up
# at the end, we promise)
# temp_dir = os.path.join(tempfile.gettempdir(), 'mind')

data_folder = "mind_dataset"
temp_dir = data_folder

os.makedirs(temp_dir, exist_ok=True)


# The dataset is split into training and validation set, each with a large and small version.
# The format of the four files are the same.
# For demonstration purpose, we will use small version validation set only.
base_url = 'https://mind201910small.blob.core.windows.net/release'
training_small_url = f'{base_url}/MINDsmall_train.zip'
validation_small_url = f'{base_url}/MINDsmall_dev.zip'
training_large_url = f'{base_url}/MINDlarge_train.zip'
validation_large_url = f'{base_url}/MINDlarge_dev.zip'

## Functions

In [271]:
def download_url(url,
                 destination_filename=None,
                 progress_updater=None,
                 force_download=False,
                 verbose=True):
    """
    Download a URL to a temporary file
    """
    if not verbose:
        progress_updater = None
    # This is not intended to guarantee uniqueness, we just know it happens to guarantee
    # uniqueness for this application.
    if destination_filename is None:
        url_as_filename = url.replace('://', '_').replace('/', '_')
        destination_filename = \
            os.path.join(temp_dir,url_as_filename)
    if (not force_download) and (os.path.isfile(destination_filename)):
        if verbose:
            print('Bypassing download of already-downloaded file {}'.format(
                os.path.basename(url)))
        return destination_filename
    if verbose:
        print('Downloading file {} to {}'.format(os.path.basename(url),
                                                 destination_filename),
              end='')
    urllib.request.urlretrieve(url, destination_filename, progress_updater)
    assert (os.path.isfile(destination_filename))
    nBytes = os.path.getsize(destination_filename)
    if verbose:
        print('...done, {} bytes.'.format(nBytes))
    return destination_filename

## Download and extract the files

In [272]:
# For demonstration purpose, we will use small version validation set only.
# This file is about 30MB.
zip_path = download_url(training_large_url, verbose=True)
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(temp_dir)

os.listdir(temp_dir)

Downloading file MINDlarge_train.zip to mind_dataset/https_mind201910small.blob.core.windows.net_release_MINDlarge_train.zip...done, 530196631 bytes.


['behaviors.tsv',
 'https_mind201910small.blob.core.windows.net_release_MINDlarge_train.zip',
 'relation_embedding.vec',
 '__placeholder__',
 'https_mind201910small.blob.core.windows.net_release_MINDsmall_train.zip',
 'entity_embedding.vec',
 '.ipynb_checkpoints',
 'news.tsv',
 'https_mind201910small.blob.core.windows.net_release_MINDsmall_dev.zip']

## Read the files with pandas  
 The news.tsv file contains the detailed information of news articles involved in the behaviors.tsv file.
 It has 7 columns, which are divided by the tab symbol:
 - News ID
 - Category
 - Subcategory
 - Title
 - Abstract
 - URL
 - Title Entities (entities contained in the title of this news)
 - Abstract Entities (entities contained in the abstract of this news)

In [121]:
news_path = os.path.join(temp_dir, 'news.tsv')
news = pd.read_table(news_path,
              header=None,
              names=[
                  'id', 'category', 'subcategory', 'title', 'abstract', 'url',
                  'title_entities', 'abstract_entities'
              ])

In [122]:
news.head()

Unnamed: 0,id,category,subcategory,title,abstract,url,title_entities,abstract_entities
0,N55528,lifestyle,lifestyleroyals,"The Brands Queen Elizabeth, Prince Charles, an...","Shop the notebooks, jackets, and more that the...",https://assets.msn.com/labs/mind/AAGH0ET.html,"[{""Label"": ""Prince Philip, Duke of Edinburgh"",...",[]
1,N19639,health,weightloss,50 Worst Habits For Belly Fat,These seemingly harmless habits are holding yo...,https://assets.msn.com/labs/mind/AAB19MK.html,"[{""Label"": ""Adipose tissue"", ""Type"": ""C"", ""Wik...","[{""Label"": ""Adipose tissue"", ""Type"": ""C"", ""Wik..."
2,N61837,news,newsworld,The Cost of Trump's Aid Freeze in the Trenches...,Lt. Ivan Molchanets peeked over a parapet of s...,https://assets.msn.com/labs/mind/AAJgNsz.html,[],"[{""Label"": ""Ukraine"", ""Type"": ""G"", ""WikidataId..."
3,N53526,health,voices,I Was An NBA Wife. Here's How It Affected My M...,"I felt like I was a fraud, and being an NBA wi...",https://assets.msn.com/labs/mind/AACk2N6.html,[],"[{""Label"": ""National Basketball Association"", ..."
4,N38324,health,medical,"How to Get Rid of Skin Tags, According to a De...","They seem harmless, but there's a very good re...",https://assets.msn.com/labs/mind/AAAKEkt.html,"[{""Label"": ""Skin tag"", ""Type"": ""C"", ""WikidataI...","[{""Label"": ""Skin tag"", ""Type"": ""C"", ""WikidataI..."


In [123]:
news.shape

(51282, 8)

## Introduction
In this notebook, we fine-tune and evaluate a number of pretrained models on a subset of the [MultiNLI](https://www.nyu.edu/projects/bowman/multinli/) dataset.

We use a [sequence classifier](../../utils_nlp/models/transformers/sequence_classification.py) that wraps [Hugging Face's PyTorch implementation](https://github.com/huggingface/transformers) of different transformers, like [BERT](https://github.com/google-research/bert), [XLNet](https://github.com/zihangdai/xlnet), and [RoBERTa](https://github.com/pytorch/fairseq).

In [222]:
# notebook parameters
DATA_FOLDER = TemporaryDirectory().name
CACHE_DIR = TemporaryDirectory().name
NUM_EPOCHS = 1
BATCH_SIZE = 32
NUM_GPUS = torch.cuda.device_count()
MAX_LEN = 100
TRAIN_DATA_FRACTION = 0.5
TEST_DATA_FRACTION = 0.5
TRAIN_SIZE = 0.75
LABEL_COL = "genre"
TEXT_COL = "sentence1"
MODEL_NAMES = ["distilbert-base-uncased", "roberta-base", "xlnet-base-cased"]

In [223]:
NUM_GPUS

1

## Read Dataset
We start by loading a subset of the data. The following function also downloads and extracts the files, if they don't exist in the data folder.

The MultiNLI dataset is mainly used for natural language inference (NLI) tasks, where the inputs are sentence pairs and the labels are entailment indicators. The sentence pairs are also classified into *genres* that allow for more coverage and better evaluation of NLI models.

For our classification task, we use the first sentence only as the text input, and the corresponding genre as the label. We select the examples corresponding to one of the entailment labels (*neutral* in this case) to avoid duplicate rows, as the sentences are not unique, whereas the sentence pairs are.

In [260]:
df = news[["title", "abstract", "category"]]

Exploring the data, we observed that there are rows with NaN. This is interesting, because you would expect that from a numerical data and not text. However, we need to spend some time data wrangling and cleaning.  

1. First, we remove the rows with Nan
1. To avoid scrapping the entire news article with their links, we decided to use a combination of the news title and abstract, from the MIND dataset, as the full text to train our classifiers
1. There is a category of article called `news`. We decided to choose the top unique 6 categories removing the `news` category.

In [261]:
df.isnull()

Unnamed: 0,title,abstract,category
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False
4,False,False,False
...,...,...,...
51277,False,False,False
51278,False,True,False
51279,False,False,False
51280,False,False,False


In [262]:
# df = df.replace(np.nan, '', regex=True)
df.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [263]:
df.shape

(48616, 3)

In [264]:
df["text"] = df["title"].astype(str) + df["abstract"].astype(str)
df.drop(columns=['title', 'abstract'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [265]:
df.head()
display(df.shape)

(48616, 2)

In [232]:
# df[[LABEL_COL, TEXT_COL]].head()

In [241]:
categories_dict = df.category.value_counts(dropna=True).to_dict()
categories_dict = dict(filter(lambda cat: cat[1] > 200, categories_dict.items()))

In [162]:
categories = (df.category.value_counts())

In [242]:
# chosen = ['news', 'sports', 'finance', 'foodanddrink', 'lifestyle', 'travel']
chosen = categories_dict.keys()[:7] - ['news']

In [243]:
df = df[df.category.isin(chosen)]

In [244]:
display(df.head(10))
display(df.shape)

Unnamed: 0,category,text
0,lifestyle,"The Brands Queen Elizabeth, Prince Charles, an..."
1,health,50 Worst Habits For Belly FatThese seemingly h...
2,news,The Cost of Trump's Aid Freeze in the Trenches...
3,health,I Was An NBA Wife. Here's How It Affected My M...
4,health,"How to Get Rid of Skin Tags, According to a De..."
5,sports,Should NFL be able to fine players for critici...
6,weather,It's been Orlando's hottest October ever so fa...
7,news,Chile: Three die in supermarket fire amid prot...
8,entertainment,Best PS5 games: top PlayStation 5 titles to lo...
9,news,"How to report weather-related closings, delays..."


(48597, 2)

We split the data for training and testing, sample a fraction for faster execution, and encode the class labels:

In [245]:
# split
df_train, df_test = train_test_split(df, train_size = TRAIN_SIZE, random_state=0)



In [246]:
# sample
df_train = df_train.sample(frac=TRAIN_DATA_FRACTION).reset_index(drop=True)
df_test = df_test.sample(frac=TEST_DATA_FRACTION).reset_index(drop=True)

The examples in the dataset are grouped into 5 genres:

In [247]:
LABEL_COL = 'category'
TEXT_COL = 'text'

In [248]:
df_train[LABEL_COL].value_counts()

news             5694
sports           4896
finance          1126
foodanddrink      993
lifestyle         871
travel            857
video             743
weather           722
health            709
autos             599
tv                297
music             282
movies            226
entertainment     209
Name: category, dtype: int64

In [249]:
df_test[LABEL_COL].value_counts()

news             1909
sports           1694
finance           375
foodanddrink      285
lifestyle         283
travel            280
video             268
weather           231
health            229
autos             192
tv                108
music              84
movies             71
entertainment      66
Name: category, dtype: int64

In [250]:
# encode labels
label_encoder = LabelEncoder()
df_train[LABEL_COL] = label_encoder.fit_transform(df_train[LABEL_COL])
df_test[LABEL_COL] = label_encoder.transform(df_test[LABEL_COL])

num_labels = len(np.unique(df_train[LABEL_COL]))

In [251]:
print("Number of unique labels: {}".format(num_labels))
print("Number of training examples: {}".format(df_train.shape[0]))
print("Number of testing examples: {}".format(df_test.shape[0]))

Number of unique labels: 14
Number of training examples: 18224
Number of testing examples: 6075


In [252]:
display( len(np.unique(df_train[LABEL_COL])))
display( len(np.unique(df_test[LABEL_COL])))

14

14

## Select Pretrained Models

Several pretrained models have been made available by [Hugging Face](https://github.com/huggingface/transformers). For text classification, the following pretrained models are supported.

In [258]:
pd.DataFrame({"model_name": SequenceClassifier.list_supported_models()})

Unnamed: 0,model_name
0,albert-base-v1
1,albert-base-v2
2,albert-large-v1
3,albert-large-v2
4,albert-xlarge-v1
...,...
74,xlm-roberta-large-finetuned-conll02-spanish
75,xlm-roberta-large-finetuned-conll03-english
76,xlm-roberta-large-finetuned-conll03-german
77,xlnet-base-cased


## Fine-tune

Our wrappers make it easy to fine-tune different models in a unified way, hiding the preprocessing details that are needed before training. In this example, we're going to select the following models and use the same piece of code to fine-tune them on our genre classification task. Note that some models were pretrained on multilingual datasets and can be used with non-English datasets.

In [254]:
print(MODEL_NAMES)

['distilbert-base-uncased', 'roberta-base', 'xlnet-base-cased']


In [255]:
MODEL_NAMES[0] in SequenceClassifier.list_supported_models()

True

For each pretrained model, we preprocess the data, fine-tune the classifier, score the test set, and store the evaluation results.

In [259]:
results = {}
for name in tqdm(MODEL_NAMES[:1], disable=True):

    # preprocess
    processor = Processor(
        model_name=str(name),
        to_lower=name.endswith("uncased"),
        cache_dir=CACHE_DIR
    )
    train_dataset = processor.dataset_from_dataframe(
        df_train, TEXT_COL, LABEL_COL, max_len=MAX_LEN
    )
    train_dataloader = dataloader_from_dataset(
        train_dataset, batch_size=BATCH_SIZE, num_gpus=NUM_GPUS, shuffle=True
    )
    test_dataset = processor.dataset_from_dataframe(
        df_test, TEXT_COL, LABEL_COL, max_len=MAX_LEN
    )
    test_dataloader = dataloader_from_dataset(
        test_dataset, batch_size=BATCH_SIZE, num_gpus=NUM_GPUS, shuffle=False
    )

    # fine-tune
    classifier = SequenceClassifier(
        model_name=name, num_labels=num_labels, cache_dir=CACHE_DIR
    )
    with Timer() as t:
        classifier.fit(
            train_dataloader, num_epochs=NUM_EPOCHS, num_gpus=NUM_GPUS, verbose=True,
        )
    train_time = t.interval / 3600

    # predict
    preds = classifier.predict(test_dataloader, num_gpus=NUM_GPUS, verbose=True)

    # eval
    accuracy = accuracy_score(df_test[LABEL_COL], preds)
    class_report = classification_report(
        df_test[LABEL_COL], preds, target_names=label_encoder.classes_, output_dict=True
    )

    # save results
    results[name] = {
        "accuracy": accuracy,
        "f1-score": class_report["macro avg"]["f1-score"],
        "time(hrs)": train_time,
    }

Model =====>  distilbert-base-uncased


Iteration:   2%|▏         | 10/570 [00:05<04:48,  1.94it/s]

timestamp: 06/08/2020 01:29:40, average loss: 2.299556, time duration: 5.246074,
                            number of examples in current reporting: 320, step 10
                            out of total 570


Iteration:   4%|▎         | 20/570 [00:10<04:43,  1.94it/s]

timestamp: 06/08/2020 01:29:46, average loss: 1.828251, time duration: 5.156210,
                            number of examples in current reporting: 320, step 20
                            out of total 570


Iteration:   5%|▌         | 30/570 [00:15<04:41,  1.92it/s]

timestamp: 06/08/2020 01:29:51, average loss: 1.480904, time duration: 5.243054,
                            number of examples in current reporting: 320, step 30
                            out of total 570


Iteration:   7%|▋         | 40/570 [00:20<04:29,  1.96it/s]

timestamp: 06/08/2020 01:29:56, average loss: 1.387157, time duration: 5.108166,
                            number of examples in current reporting: 320, step 40
                            out of total 570


Iteration:   9%|▉         | 50/570 [00:25<04:25,  1.96it/s]

timestamp: 06/08/2020 01:30:01, average loss: 1.223475, time duration: 5.128988,
                            number of examples in current reporting: 320, step 50
                            out of total 570


Iteration:  11%|█         | 60/570 [00:31<04:24,  1.93it/s]

timestamp: 06/08/2020 01:30:06, average loss: 1.268825, time duration: 5.167764,
                            number of examples in current reporting: 320, step 60
                            out of total 570


Iteration:  12%|█▏        | 70/570 [00:36<04:18,  1.93it/s]

timestamp: 06/08/2020 01:30:11, average loss: 1.038846, time duration: 5.184612,
                            number of examples in current reporting: 320, step 70
                            out of total 570


Iteration:  14%|█▍        | 80/570 [00:41<04:12,  1.94it/s]

timestamp: 06/08/2020 01:30:17, average loss: 1.060197, time duration: 5.155249,
                            number of examples in current reporting: 320, step 80
                            out of total 570


Iteration:  16%|█▌        | 90/570 [00:46<04:08,  1.93it/s]

timestamp: 06/08/2020 01:30:22, average loss: 1.137420, time duration: 5.139284,
                            number of examples in current reporting: 320, step 90
                            out of total 570


Iteration:  18%|█▊        | 100/570 [00:51<04:06,  1.91it/s]

timestamp: 06/08/2020 01:30:27, average loss: 1.044322, time duration: 5.224073,
                            number of examples in current reporting: 320, step 100
                            out of total 570


Iteration:  19%|█▉        | 110/570 [00:56<03:57,  1.94it/s]

timestamp: 06/08/2020 01:30:32, average loss: 0.959090, time duration: 5.181787,
                            number of examples in current reporting: 320, step 110
                            out of total 570


Iteration:  21%|██        | 120/570 [01:02<03:55,  1.91it/s]

timestamp: 06/08/2020 01:30:37, average loss: 0.899837, time duration: 5.226679,
                            number of examples in current reporting: 320, step 120
                            out of total 570


Iteration:  23%|██▎       | 130/570 [01:07<03:47,  1.94it/s]

timestamp: 06/08/2020 01:30:42, average loss: 0.826398, time duration: 5.166980,
                            number of examples in current reporting: 320, step 130
                            out of total 570


Iteration:  25%|██▍       | 140/570 [01:12<03:44,  1.92it/s]

timestamp: 06/08/2020 01:30:48, average loss: 0.829793, time duration: 5.244097,
                            number of examples in current reporting: 320, step 140
                            out of total 570


Iteration:  26%|██▋       | 150/570 [01:17<03:40,  1.91it/s]

timestamp: 06/08/2020 01:30:53, average loss: 1.024284, time duration: 5.247450,
                            number of examples in current reporting: 320, step 150
                            out of total 570


Iteration:  28%|██▊       | 160/570 [01:23<03:34,  1.91it/s]

timestamp: 06/08/2020 01:30:58, average loss: 0.919913, time duration: 5.208244,
                            number of examples in current reporting: 320, step 160
                            out of total 570


Iteration:  30%|██▉       | 170/570 [01:28<03:28,  1.92it/s]

timestamp: 06/08/2020 01:31:03, average loss: 0.893251, time duration: 5.226618,
                            number of examples in current reporting: 320, step 170
                            out of total 570


Iteration:  32%|███▏      | 180/570 [01:33<03:24,  1.90it/s]

timestamp: 06/08/2020 01:31:09, average loss: 0.877417, time duration: 5.232664,
                            number of examples in current reporting: 320, step 180
                            out of total 570


Iteration:  33%|███▎      | 190/570 [01:38<03:19,  1.90it/s]

timestamp: 06/08/2020 01:31:14, average loss: 0.814778, time duration: 5.248767,
                            number of examples in current reporting: 320, step 190
                            out of total 570


Iteration:  35%|███▌      | 200/570 [01:43<03:13,  1.91it/s]

timestamp: 06/08/2020 01:31:19, average loss: 0.795289, time duration: 5.242972,
                            number of examples in current reporting: 320, step 200
                            out of total 570


Iteration:  37%|███▋      | 210/570 [01:49<03:09,  1.90it/s]

timestamp: 06/08/2020 01:31:24, average loss: 0.934758, time duration: 5.280176,
                            number of examples in current reporting: 320, step 210
                            out of total 570


Iteration:  39%|███▊      | 220/570 [01:54<03:06,  1.88it/s]

timestamp: 06/08/2020 01:31:30, average loss: 0.777315, time duration: 5.319599,
                            number of examples in current reporting: 320, step 220
                            out of total 570


Iteration:  40%|████      | 230/570 [01:59<03:01,  1.87it/s]

timestamp: 06/08/2020 01:31:35, average loss: 0.773851, time duration: 5.325432,
                            number of examples in current reporting: 320, step 230
                            out of total 570


Iteration:  42%|████▏     | 240/570 [02:05<02:55,  1.88it/s]

timestamp: 06/08/2020 01:31:40, average loss: 1.006330, time duration: 5.324053,
                            number of examples in current reporting: 320, step 240
                            out of total 570


Iteration:  44%|████▍     | 250/570 [02:10<02:50,  1.88it/s]

timestamp: 06/08/2020 01:31:46, average loss: 0.783110, time duration: 5.321403,
                            number of examples in current reporting: 320, step 250
                            out of total 570


Iteration:  46%|████▌     | 260/570 [02:15<02:44,  1.89it/s]

timestamp: 06/08/2020 01:31:51, average loss: 0.741106, time duration: 5.276181,
                            number of examples in current reporting: 320, step 260
                            out of total 570


Iteration:  47%|████▋     | 270/570 [02:21<02:39,  1.88it/s]

timestamp: 06/08/2020 01:31:56, average loss: 0.881073, time duration: 5.304994,
                            number of examples in current reporting: 320, step 270
                            out of total 570


Iteration:  49%|████▉     | 280/570 [02:26<02:34,  1.87it/s]

timestamp: 06/08/2020 01:32:02, average loss: 0.818452, time duration: 5.329850,
                            number of examples in current reporting: 320, step 280
                            out of total 570


Iteration:  51%|█████     | 290/570 [02:31<02:29,  1.88it/s]

timestamp: 06/08/2020 01:32:07, average loss: 0.788973, time duration: 5.303473,
                            number of examples in current reporting: 320, step 290
                            out of total 570


Iteration:  53%|█████▎    | 300/570 [02:37<02:23,  1.88it/s]

timestamp: 06/08/2020 01:32:12, average loss: 0.892200, time duration: 5.303161,
                            number of examples in current reporting: 320, step 300
                            out of total 570


Iteration:  54%|█████▍    | 310/570 [02:42<02:17,  1.89it/s]

timestamp: 06/08/2020 01:32:18, average loss: 0.813662, time duration: 5.287463,
                            number of examples in current reporting: 320, step 310
                            out of total 570


Iteration:  56%|█████▌    | 320/570 [02:47<02:12,  1.89it/s]

timestamp: 06/08/2020 01:32:23, average loss: 0.819593, time duration: 5.286753,
                            number of examples in current reporting: 320, step 320
                            out of total 570


Iteration:  58%|█████▊    | 330/570 [02:52<02:06,  1.89it/s]

timestamp: 06/08/2020 01:32:28, average loss: 0.827823, time duration: 5.299879,
                            number of examples in current reporting: 320, step 330
                            out of total 570


Iteration:  59%|█████▉    | 336/570 [02:56<02:04,  1.88it/s]

KeyboardInterrupt: 

## Evaluate

Finally, we report the accuracy and F1-score metrics for each model, as well as the fine-tuning time in hours.

In [257]:
df_results = pd.DataFrame(results)
df_results

Unnamed: 0,distilbert-base-uncased,roberta-base,xlnet-base-cased
accuracy,0.77284,0.783374,0.781399
f1-score,0.63245,0.647413,0.648331
time(hrs),0.083547,0.164331,0.23344


In [34]:
# # for testing
# sb.glue("accuracy", df_results.iloc[0, :].mean())
# sb.glue("f1", df_results.iloc[1, :].mean())

In [190]:
# from transformers import AutoConfig, AutoTokenizer

In [192]:
# config = AutoConfig.from_pretrained(MODEL_NAMES[2])
# config

In [191]:
# tokenizer = AutoTokenizer.from_pretrained(
#             MODEL_NAMES[0],
#             do_lower_case=True,
#             cache_dir=CACHE_DIR,
#             output_loading_info=False,
#         )

In [194]:
# processor = Processor(
#     model_name=MODEL_NAMES[0],
#     to_lower=True,
#     cache_dir=CACHE_DIR,
# )

In [44]:
# from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer

In [45]:

# # Define model 
# model_name = MODEL_NAMES[0] # distillbert  uncased
# tokenizer_initial_checkpoint = "bert-base-uncased"
# model = AutoModelForTokenClassification.from_pretrained(model_name)
# tokenizer = AutoTokenizer.from_pretrained(tokenizer_initial_checkpoint)

# # Text Classification pipeline, specifying the checkpoint identifier
# pipeline('text-classification', model=model_name, tokenizer=tokenizer_initial_checkpoint)
