In [29]:
# !pip install simpletransformers

In [2]:
from __future__ import absolute_import, division, print_function, unicode_literals

import collections
import logging
import os
import unicodedata
from io import open

def load_vocab(vocab_file):
    """Loads a vocabulary file into a dictionary."""
    vocab = collections.OrderedDict()
    index = 0
    with open(vocab_file, "r", encoding="utf-8") as reader:
        while True:
            token = reader.readline()
            if not token:
                break
            token = token.strip()
            vocab[token] = index
            index += 1
    return vocab


def whitespace_tokenize(text):
    """Runs basic whitespace cleaning and splitting on a piece of text."""
    text = text.strip()
    if not text:
        return []
    tokens = text.split()
    return tokens


class BertTokenizer(object):
    """Runs end-to-end tokenization: punctuation splitting + wordpiece"""

    def __init__(self, vocab_file, do_lower_case=True, max_len=None, do_basic_tokenize=True,
                 never_split=("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]")):
        """Constructs a BertTokenizer.

        Args:
          vocab_file: Path to a one-wordpiece-per-line vocabulary file
          do_lower_case: Whether to lower case the input
                         Only has an effect when do_wordpiece_only=False
          do_basic_tokenize: Whether to do basic tokenization before wordpiece.
          max_len: An artificial maximum length to truncate tokenized sequences to;
                         Effective maximum length is always the minimum of this
                         value (if specified) and the underlying BERT model's
                         sequence length.
          never_split: List of tokens which will never be split during tokenization.
                         Only has an effect when do_wordpiece_only=False
        """
        if not os.path.isfile(vocab_file):
            raise ValueError(
                "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained "
                "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file))
        self.vocab = load_vocab(vocab_file)
        self.ids_to_tokens = collections.OrderedDict(
            [(ids, tok) for tok, ids in self.vocab.items()])
        self.do_basic_tokenize = do_basic_tokenize
        if do_basic_tokenize:
            self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case,
                                                never_split=never_split)
        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
        self.max_len = max_len if max_len is not None else int(1e12)

    def tokenize(self, text):
        split_tokens = []
        if self.do_basic_tokenize:
            for token in self.basic_tokenizer.tokenize(text):
                for sub_token in self.wordpiece_tokenizer.tokenize(token):
                    split_tokens.append(sub_token)
        else:
            split_tokens = self.wordpiece_tokenizer.tokenize(text)
        return split_tokens

    def convert_tokens_to_ids(self, tokens):
        """Converts a sequence of tokens into ids using the vocab."""
        ids = []
        for token in tokens:
            ids.append(self.vocab[token])
        if len(ids) > self.max_len:
            logger.warning(
                "Token indices sequence length is longer than the specified maximum "
                " sequence length for this BERT model ({} > {}). Running this"
                " sequence through BERT will result in indexing errors".format(len(ids), self.max_len)
            )
        return ids

    def convert_ids_to_tokens(self, ids):
        """Converts a sequence of ids in wordpiece tokens using the vocab."""
        tokens = []
        for i in ids:
            tokens.append(self.ids_to_tokens[i])
        return tokens

    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, cache_dir=None, *inputs, **kwargs):
        """
        Instantiate a PreTrainedBertModel from a pre-trained model file.
        Download and cache the pre-trained model file if needed.
        """
        if pretrained_model_name_or_path in PRETRAINED_VOCAB_ARCHIVE_MAP:
            vocab_file = PRETRAINED_VOCAB_ARCHIVE_MAP[pretrained_model_name_or_path]
            if '-cased' in pretrained_model_name_or_path and kwargs.get('do_lower_case', True):
                logger.warning("The pre-trained model you are loading is a cased model but you have not set "
                               "`do_lower_case` to False. We are setting `do_lower_case=False` for you but "
                               "you may want to check this behavior.")
                kwargs['do_lower_case'] = False
            elif '-cased' not in pretrained_model_name_or_path and not kwargs.get('do_lower_case', True):
                logger.warning("The pre-trained model you are loading is an uncased model but you have set "
                               "`do_lower_case` to False. We are setting `do_lower_case=True` for you "
                               "but you may want to check this behavior.")
                kwargs['do_lower_case'] = True
        else:
            vocab_file = pretrained_model_name_or_path
        if os.path.isdir(vocab_file):
            vocab_file = os.path.join(vocab_file, VOCAB_NAME)
        # redirect to the cache, if necessary
        try:
            resolved_vocab_file = cached_path(vocab_file, cache_dir=cache_dir)
        except EnvironmentError:
            logger.error(
                "Model name '{}' was not found in model name list ({}). "
                "We assumed '{}' was a path or url but couldn't find any file "
                "associated to this path or url.".format(
                    pretrained_model_name_or_path,
                    ', '.join(PRETRAINED_VOCAB_ARCHIVE_MAP.keys()),
                    vocab_file))
            return None
        if resolved_vocab_file == vocab_file:
            logger.info("loading vocabulary file {}".format(vocab_file))
        else:
            logger.info("loading vocabulary file {} from cache at {}".format(
                vocab_file, resolved_vocab_file))
        if pretrained_model_name_or_path in PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP:
            # if we're using a pretrained model, ensure the tokenizer wont index sequences longer
            # than the number of positional embeddings
            max_len = PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP[pretrained_model_name_or_path]
            kwargs['max_len'] = min(kwargs.get('max_len', int(1e12)), max_len)
        # Instantiate tokenizer.
        tokenizer = cls(resolved_vocab_file, *inputs, **kwargs)
        return tokenizer
    
    
class BasicTokenizer(object):
    """Runs basic tokenization (punctuation splitting, lower casing, etc.)."""

    def __init__(self,
                 do_lower_case=True,
                 never_split=("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]")):
        """Constructs a BasicTokenizer.

        Args:
          do_lower_case: Whether to lower case the input.
        """
        self.do_lower_case = do_lower_case
        self.never_split = never_split

    def tokenize(self, text):
        """Tokenizes a piece of text."""
        text = self._clean_text(text)
        # This was added on November 1st, 2018 for the multilingual and Chinese
        # models. This is also applied to the English models now, but it doesn't
        # matter since the English models were not trained on any Chinese data
        # and generally don't have any Chinese data in them (there are Chinese
        # characters in the vocabulary because Wikipedia does have some Chinese
        # words in the English Wikipedia.).
        text = self._tokenize_chinese_chars(text)
        orig_tokens = whitespace_tokenize(text)
        split_tokens = []
        for token in orig_tokens:
            if self.do_lower_case and token not in self.never_split:
                token = token.lower()
                token = self._run_strip_accents(token)
            split_tokens.extend(self._run_split_on_punc(token))

        output_tokens = whitespace_tokenize(" ".join(split_tokens))
        return output_tokens

    def _run_strip_accents(self, text):
        """Strips accents from a piece of text."""
        text = unicodedata.normalize("NFD", text)
        output = []
        for char in text:
            cat = unicodedata.category(char)
            if cat == "Mn":
                continue
            output.append(char)
        return "".join(output)

    def _run_split_on_punc(self, text):
        """Splits punctuation on a piece of text."""
        if text in self.never_split:
            return [text]
        chars = list(text)
        i = 0
        start_new_word = True
        output = []
        while i < len(chars):
            char = chars[i]
            if _is_punctuation(char):
                output.append([char])
                start_new_word = True
            else:
                if start_new_word:
                    output.append([])
                start_new_word = False
                output[-1].append(char)
            i += 1

        return ["".join(x) for x in output]

    def _tokenize_chinese_chars(self, text):
        """Adds whitespace around any CJK character."""
        output = []
        for char in text:
            cp = ord(char)
            if self._is_chinese_char(cp):
                output.append(" ")
                output.append(char)
                output.append(" ")
            else:
                output.append(char)
        return "".join(output)

    def _is_chinese_char(self, cp):
        """Checks whether CP is the codepoint of a CJK character."""
        # This defines a "chinese character" as anything in the CJK Unicode block:
        #   https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
        #
        # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
        # despite its name. The modern Korean Hangul alphabet is a different block,
        # as is Japanese Hiragana and Katakana. Those alphabets are used to write
        # space-separated words, so they are not treated specially and handled
        # like the all of the other languages.
        if ((cp >= 0x4E00 and cp <= 0x9FFF) or  #
                (cp >= 0x3400 and cp <= 0x4DBF) or  #
                (cp >= 0x20000 and cp <= 0x2A6DF) or  #
                (cp >= 0x2A700 and cp <= 0x2B73F) or  #
                (cp >= 0x2B740 and cp <= 0x2B81F) or  #
                (cp >= 0x2B820 and cp <= 0x2CEAF) or
                (cp >= 0xF900 and cp <= 0xFAFF) or  #
                (cp >= 0x2F800 and cp <= 0x2FA1F)):  #
            return True

        return False

    def _clean_text(self, text):
        """Performs invalid character removal and whitespace cleanup on text."""
        output = []
        for char in text:
            cp = ord(char)
            if cp == 0 or cp == 0xfffd or _is_control(char):
                continue
            if _is_whitespace(char):
                output.append(" ")
            else:
                output.append(char)
        return "".join(output)


class WordpieceTokenizer(object):
    """Runs WordPiece tokenization."""

    def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=100):
        self.vocab = vocab
        self.unk_token = unk_token
        self.max_input_chars_per_word = max_input_chars_per_word

    def tokenize(self, text):
        """Tokenizes a piece of text into its word pieces.

        This uses a greedy longest-match-first algorithm to perform tokenization
        using the given vocabulary.

        For example:
          input = "unaffable"
          output = ["un", "##aff", "##able"]

        Args:
          text: A single token or whitespace separated tokens. This should have
            already been passed through `BasicTokenizer`.

        Returns:
          A list of wordpiece tokens.
        """

        output_tokens = []
        for token in whitespace_tokenize(text):
            chars = list(token)
            if len(chars) > self.max_input_chars_per_word:
                output_tokens.append(self.unk_token)
                continue

            is_bad = False
            start = 0
            sub_tokens = []
            while start < len(chars):
                end = len(chars)
                cur_substr = None
                while start < end:
                    substr = "".join(chars[start:end])
                    if start > 0:
                        substr = "##" + substr
                    if substr in self.vocab:
                        cur_substr = substr
                        break
                    end -= 1
                if cur_substr is None:
                    is_bad = True
                    break
                sub_tokens.append(cur_substr)
                start = end

            if is_bad:
                output_tokens.append(self.unk_token)
            else:
                output_tokens.extend(sub_tokens)
        return output_tokens


def _is_whitespace(char):
    """Checks whether `chars` is a whitespace character."""
    # \t, \n, and \r are technically contorl characters but we treat them
    # as whitespace since they are generally considered as such.
    if char == " " or char == "\t" or char == "\n" or char == "\r":
        return True
    cat = unicodedata.category(char)
    if cat == "Zs":
        return True
    return False


def _is_control(char):
    """Checks whether `chars` is a control character."""
    # These are technically control characters but we count them as whitespace
    # characters.
    if char == "\t" or char == "\n" or char == "\r":
        return False
    cat = unicodedata.category(char)
    if cat.startswith("C"):
        return True
    return False


def _is_punctuation(char):
    """Checks whether `chars` is a punctuation character."""
    cp = ord(char)
    # We treat all non-letter/number ASCII as punctuation.
    # Characters such as "^", "$", and "`" are not in the Unicode
    # Punctuation class but we treat them as punctuation anyways, for
    # consistency.
    if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or
            (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
        return True
    cat = unicodedata.category(char)
    if cat.startswith("P"):
        return True
    return False

In [3]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="5" # for single gpu   

In [4]:
import pandas as pd
import numpy as np
import re
from nltk import sent_tokenize,word_tokenize

In [5]:
import html,nltk
from nltk.corpus import wordnet 
from collections import Counter 
from string import digits
from nltk.stem.snowball import DanishStemmer
from nltk.corpus import stopwords

def text_cleaning(text, escape_list=[], stop=[]):
    """
    Text cleaning function:
    """
    text=text.lower()
    StopWords = list(set(stopwords.words('dutch')))
    custom_stop = StopWords + stop
    text = html.unescape(text)
    text = re.sub('[^A-Za-z]+', ' ', text)
#     text=text.replace('/',' ').replace('?',' ').replace(',',' ').replace('\'',' ')
    tokenz=nltk.word_tokenize(text)
    tokenz=([token for token in tokenz if token not in custom_stop]) 
    # remove_digits = str.maketrans('', '', digits)
#     tokenz=[DanishStemmer().stem(token) if token not in escape_list else token for token in tokenz ]
    return ' '.join(tokenz)



# Preparing Data For Extracting Feature importance Later

In [30]:
df = pd.read_csv("../data/court_cases.csv", lineterminator='\n', index_col=0)
df['Full Text'] = df['process'] + ' ' + df['considerations']
df.dropna(subset=['Full Text'],inplace=True)
df = df.sample(frac=1,random_state=0).reset_index(drop=True).copy()
# df['Full Text'] = df['Full Text'].apply(text_cleaning)

In [31]:
df = df.head(50)

In [32]:
df['Full Text'] = df['Full Text'].apply(lambda x: re.sub('[^A-Za-z.]+', ' ',x))

In [33]:
df_old = df.copy()

In [34]:
def get_aligned(real,bert_tokenized):
    '''
    Align the output
    '''
    bert_tokenized = sent_tokenize(re.sub(r'##\w+ ', '', bert_tokenized))
    length_bert_tokenized = len(bert_tokenized)
    real = sent_tokenize(real)[-length_bert_tokenized:]
    length_first_sentence = len(bert_tokenized[0].split(' ') )
    real[0] =' '.join(word_tokenize(real[0])[-length_first_sentence:])
    return ' '.join( word_tokenize(' '.join(real)))

In [35]:
tokenizer = BertTokenizer('outputs/vocab.txt')
df['Full Text'] = df['Full Text'].apply(lambda x : ' '.join( tokenizer.tokenize(x))[-495:])

In [36]:
# df = pd.read_csv("../data/court_cases_berttokenizer.csv")
# df.to_csv("../data/court_cases_berttokenizer.csv",index=False)

In [37]:
df_bert_data = df[['Full Text','outcome']].copy()
df_bert_data.columns = ['sent','label']

In [38]:
from sklearn.model_selection import train_test_split
train, valid = train_test_split(df_bert_data , test_size=0.3, random_state=0)
dev, test = train_test_split(valid , test_size=0.5, random_state=0)
SAVE_ROOT_PATH = '../feature-importance/data/court_cases/'
# train.to_csv(SAVE_ROOT_PATH+'train.tsv',sep='\t',index=False)
# dev.to_csv(SAVE_ROOT_PATH+'dev.tsv',sep='\t',index=False)
# test.to_csv(SAVE_ROOT_PATH+'test.tsv',sep='\t',index=False)

In [39]:
test_real = df_old.iloc[test.index,-1]

In [41]:
test_real

11     bij besluit van februari het primaire besluit...
38     . op december heeft verzoekster een aanvraag ...
22     bij besluit van juli het bestreden besluit he...
4      bij besluit van oktober heeft de minister voo...
18     datum januari . zitting hebben mr. m.a.a. mon...
2      . eiser is geboren op ... in beiroet libanon ...
10     bij besluit van februari heeft de staatssecre...
34     bij besluit van januari heeft verweerder afwi...
Name: Full Text, dtype: object

In [46]:
no =11
real,bert_tokenized2  = test_real[no], test.sent[no]

bert_tokenized = sent_tokenize(re.sub(r'##\w+ ', '', bert_tokenized2))
length_bert_tokenized = len(bert_tokenized)
real = sent_tokenize(real)[-length_bert_tokenized:]
length_first_sentence = len(bert_tokenized[0].split(' ') )
real[0] =' '.join(word_tokenize(real[0])[-length_first_sentence:])
# return ' '.join( word_tokenize(' '.join(real)))

In [47]:
bert_tokenized2

'] niet aan de voorwaarde voldoe ##t dat zij het basis ##examen inburgering met goed gevolg heeft afgelegd ontkennen ##d dient te worden beantwoord . . aangezien aan de afwijzing van de [UNK] aanvraag van [UNK] geen andere afwijzing ##s ##grond ##en te ##n grondslag zijn gelegd zie ##t de rechtbank aanleiding onder her ##roep ##ing van het primaire besluit zelf in de zaak te voorzien in die zin dat verweerde ##r zal worden opge ##dragen tot af ##gift ##e van de gevraagd ##e [UNK] aan [UNK] .'

In [53]:
' '.join( word_tokenize(' '.join(real)))

'zich niet uitlaat omtrent de vraag welke integratievoorwaarden in het kader van artikel tweede lid van de gezinsherenigingsrichtlijn mogen worden gesteld . .uit het voorgaande volgt dat de vraag of verweerder de mvv aanvraag van eiseres en daarmee dus de toelating heeft mogen afwijzen omdat eiseres niet aan de voorwaarde voldoet dat zij het basisexamen inburgering met goed gevolg heeft afgelegd ontkennend dient te worden beantwoord . .aangezien aan de afwijzing van de mvv aanvraag van eiseres geen andere afwijzingsgronden ten grondslag zijn gelegd ziet de rechtbank aanleiding onder herroeping van het primaire besluit zelf in de zaak te voorzien in die zin dat verweerder zal worden opgedragen tot afgifte van de gevraagde mvv aan eiseres .'

In [52]:
real

['zich niet uitlaat omtrent de vraag welke integratievoorwaarden in het kader van artikel tweede lid van de gezinsherenigingsrichtlijn mogen worden gesteld .',
 '.uit het voorgaande volgt dat de vraag of verweerder de mvv aanvraag van eiseres en daarmee dus de toelating heeft mogen afwijzen omdat eiseres niet aan de voorwaarde voldoet dat zij het basisexamen inburgering met goed gevolg heeft afgelegd ontkennend dient te worden beantwoord.',
 '.aangezien aan de afwijzing van de mvv aanvraag van eiseres geen andere afwijzingsgronden ten grondslag zijn gelegd ziet de rechtbank aanleiding onder herroeping van het primaire besluit zelf in de zaak te voorzien in die zin dat verweerder zal worden opgedragen tot afgifte van de gevraagde mvv aan eiseres.']

In [51]:
bert_tokenized

['] niet aan de voorwaarde voldoe dat zij het basis inburgering met goed gevolg heeft afgelegd ontkennen dient te worden beantwoord .',
 '.',
 'aangezien aan de afwijzing van de [UNK] aanvraag van [UNK] geen andere afwijzing te grondslag zijn gelegd zie de rechtbank aanleiding onder her van het primaire besluit zelf in de zaak te voorzien in die zin dat verweerde zal worden opge tot af van de gevraagd [UNK] aan [UNK] .']

In [50]:
test_real[no][-495:]

's de toelating heeft mogen afwijzen omdat eiseres niet aan de voorwaarde voldoet dat zij het basisexamen inburgering met goed gevolg heeft afgelegd ontkennend dient te worden beantwoord. .aangezien aan de afwijzing van de mvv aanvraag van eiseres geen andere afwijzingsgronden ten grondslag zijn gelegd ziet de rechtbank aanleiding onder herroeping van het primaire besluit zelf in de zaak te voorzien in die zin dat verweerder zal worden opgedragen tot afgifte van de gevraagde mvv aan eiseres.'

In [182]:
test_aligned.to_csv(SAVE_ROOT_PATH+'test_aligned.tsv',sep='\t',index=False)

# Training Bert Model

In [103]:
X,y = df['Full Text'], df.outcome
X_train, X_test, y_train, y_test = train_test_split(X,y , test_size=0.2, random_state=0)

In [104]:
from simpletransformers.classification import ClassificationModel
from transformers import logging
logging.set_verbosity_error()

train_args = {
    'reprocess_input_data' : True,
    'fp16':False,
    'num_train_epochs':4,
    'cuda_device':5,
    'overwrite_output_dir':True
    
}

model = ClassificationModel(
                            "bert","wietsedv/bert-base-dutch-cased",
                            num_labels=2,args=train_args)

In [105]:
df2 = df[['Full Text','outcome']].copy()

In [106]:
# Getting the entire text 
full_text = ''
for i in df2['Full Text']:
    full_text = full_text+i + ' '
    
from nltk.tokenize import word_tokenize,sent_tokenize
# Tokenized words from document
tokenized_words =  [j for i in sent_tokenize(full_text) for j in word_tokenize(i)]
tokenized_words_set = set(tokenized_words)

In [107]:
print(f'Only {round(len(set(model.tokenizer.vocab.keys()).intersection(tokenized_words_set)) / len(tokenized_words_set)*100,2)} % coverage of the words.')

Only 74.21 % coverage of the words.


In [108]:
train_df, test_df = train_test_split(df2,test_size=.2,random_state=0)

In [109]:
model.train_model(train_df)

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."
I1216 14:59:22.718198 139873987962624 classification_model.py:1082]  Converting to features started. Cache is not used.


HBox(children=(FloatProgress(value=0.0, max=6540.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=4.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 4', max=818.0, style=ProgressStyle(des…






HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 4', max=818.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 4', max=818.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 4', max=818.0, style=ProgressStyle(des…





I1216 15:07:07.151360 139873987962624 classification_model.py:414]  Training of bert model complete. Saved to outputs/.


(3272, 0.279949324493901)

In [110]:
from sklearn.metrics import accuracy_score

In [111]:
result, model_outputs, wrong_predictions = model.eval_model(test_df,accuracy=accuracy_score)

I1216 15:07:07.187206 139873987962624 classification_model.py:1082]  Converting to features started. Cache is not used.


HBox(children=(FloatProgress(value=0.0, max=1635.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=205.0, style=ProgressStyle(descr…

I1216 15:07:21.090505 139873987962624 classification_model.py:842] {'mcc': 0.6982203548199091, 'tp': 802, 'tn': 590, 'fp': 107, 'fn': 136, 'accuracy': 0.8513761467889909, 'eval_loss': 0.8409855537380592}





In [113]:
result ,model_outputs

({'mcc': 0.6982203548199091,
  'tp': 802,
  'tn': 590,
  'fp': 107,
  'fn': 136,
  'accuracy': 0.8513761467889909,
  'eval_loss': 0.8409855537380592},
 array([[-3.34855866,  3.11301565],
        [-4.10109663,  3.9348588 ],
        [ 2.26436663, -1.83824766],
        ...,
        [-4.30278254,  3.66309142],
        [ 4.08798313, -4.01954126],
        [-2.89374089,  2.79026508]]))

# Commands to run in the command line while in feature-importance folder

python bert_att_weight_retrieval.py --data_dir data/court_cases --bert_model data/court_cases/court_cases_model/ --task_name sst-2 --output_dir /data/court_cases/output --do_eval --max_seq_length 500 --eval_batch_size 1

python bert_lime.py --data_dir data/court_cases --bert_model data/court_cases/court_cases_model/ --task_name sst-2 --output_dir /data/court_cases/output --do_eval --max_seq_length 500 --eval_batch_size 1

python bert_shap.py --data_dir data/court_cases --bert_model data/court_cases/court_cases_model/ --task_name sst-2 --output_dir /data/court_cases/output --do_eval --max_seq_length 500 --eval_batch_size 1