# Importing functions

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')
import logging
import torch
!pip install transformers
!pip install datasets
logging.basicConfig(level=logging.INFO)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


'cuda'

In [2]:
import pandas as pd
import os
from typing import List, Tuple, Union

DIR = 'twitter-datasets'


def _read_data(path: str) -> List[str]:
  with open(path, 'r') as f:
    return [x for x in f]


def _read_data_with_ids(path: str) -> Tuple[List[str], List[str]]:
  index = []
  rows = []
  with open(path, 'r') as f:
    for line in f:
      id, x = line.split(',', maxsplit=1)
      index.append(id)
      rows.append(x)
  return index, rows


def load_train(full=False, eval_frac=None, cols=None, neg_label=-1) -> Union[pd.DataFrame, Tuple[pd.DataFrame, pd.DataFrame]]:
    if cols is None:
        cols = ['x', 'y']
    pos_path = '/content/gdrive/MyDrive/train_pos.txt'
    neg_path = '/content/gdrive/MyDrive/train_neg.txt'
    pos_rows = _read_data(pos_path)
    pos = pd.DataFrame({cols[0]: pos_rows})
    pos[cols[1]] = 1

    neg_rows = _read_data(neg_path)
    neg = pd.DataFrame({cols[0]: neg_rows})
    neg[cols[1]] = neg_label

    df = pd.concat([pos, neg], ignore_index=True).reset_index()
    if eval_frac is None:
        return df

    else:
        val = df.sample(frac=eval_frac)
        train = df.drop(val.index)
        return train, val


def load_test(dir=DIR, cols='x') -> pd.DataFrame:
    path = os.path.join(dir, 'test_data.txt')
    index, rows = _read_data_with_ids(path)
    df = pd.DataFrame({cols: rows}, index)

    return df

INFO:numexpr.utils:NumExpr defaulting to 4 threads.


# evaluate.py

In [3]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, log_loss, roc_auc_score
from typing import Callable, Dict
import logging


def _log_metrics(metrics):
  logging.info(
    '---\n' +
    '\n'.join([f'* {x}: {y}' for x,y in metrics.items()]) +
    '\n---'
  )


def evaluate_prob(y: np.array, y_pred: np.array, verbose=True) -> Dict[str, float]:
  """
  Returns BCE loss, AUC in this order.
  """

  bce = log_loss(y, y_pred)
  auc = roc_auc_score(y, y_pred)
  result = {'bce': bce, 'auc': auc}

  if verbose:
    _log_metrics(result)
  return result


def evaluate(y: np.array, y_pred: np.array) -> Dict[str, float]:
  """
  Returns accuracy, precision, recall, F1, BCE loss, AUC in this order.

  * accuracy: proportion of correctly classified answers
  * precision: proportion of correctly classified positives
  * recall: proportion of actual positives correctly classified
  * F1: combination of precision & recall
  """

  accuracy = accuracy_score(y, y_pred)
  precision = precision_score(y, y_pred)
  recall = recall_score(y, y_pred)
  f1 = f1_score(y, y_pred)
  prob_metrics = evaluate_prob(y, y_pred, verbose=False)
  result = {'accuracy': accuracy, 'precision': precision, 'recall': recall, 'f1': f1, **prob_metrics}

  _log_metrics(result)
  return result


def evaluate_model(model: Callable[[pd.DataFrame], np.array], df: pd.DataFrame) -> Dict[str, float]:
  """
  Expects a dataframe with columns `x` and `y`.
  """

  y = df['y'].to_numpy()
  y_pred = model(df)

  return evaluate(y, y_pred)



# preprocessing

In [4]:
from typing import Dict, Optional
import pandas as pd
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import WhitespaceTokenizer
from nltk.corpus import stopwords
from textblob import Word
from textblob import TextBlob
from tqdm import tqdm


nltk.download('stopwords')
nltk.download('omw-1.4')
stop_words = set(stopwords.words('english'))

nltk.download('wordnet')
tokenizer = WhitespaceTokenizer()
lemmatizer = WordNetLemmatizer()

tqdm.pandas()

def to_lower(df: pd.DataFrame, x_col='x'):
  """
  To be applied to a dataframe with a column called 'x' that contains sentences.
  """
  df[x_col] = df[x_col].apply(lambda sentence: sentence.lower())

def tokenize(df: pd.DataFrame, x_col='x'):
  """
  To be applied to a dataframe with a column called 'x' that contains sentences.
  """
  df[x_col] = df[x_col].apply(lambda sentence: tokenizer.tokenize(sentence))

def remove_tags(df: pd.DataFrame, x_col='x'):
  """
  To be applied to a dataframe with a column called 'x' that contains sentences.
  Deprecated in favour of remove_tag_tokens(df: pd.DataFrame)
  """
  df[x_col] = df[x_col].apply(lambda sentence: sentence.replace('<user>', '').replace('<url>', '').strip())

def replace_hashtags(df: pd.DataFrame, x_col='x'):
  df[x_col] = df[x_col].apply(lambda sentence: sentence.replace("#", ""))

def remove_tag_tokens(df: pd.DataFrame, x_col='x'):
  """
  To be applied to a dataframe with a column called 'x' that contains tokens.
  """
  df[x_col] = df[x_col].apply(lambda tokens: [w for w in tokens if not w in ['user', '<url>']])

def remove_stopwords(df: pd.DataFrame, x_col='x'):
  """
  To be applied to a dataframe with a column called 'x' that contains tokens.
  """
  df[x_col] = df[x_col].apply(lambda tokens: [w for w in tokens if not w in stop_words])

def lemmatize(df: pd.DataFrame, x_col='x'):
  """
  To be applied to a dataframe with a column called 'x' that contains tokens.
  """
  df[x_col] = df[x_col].apply(lambda tokens: [lemmatizer.lemmatize(w) for w in tokens])

def remove_single_symbols(df: pd.DataFrame, x_col='x'):
  """
  To be applied to a dataframe with a column called 'x' that contains tokens.
  """
  df[x_col] = df[x_col].apply(lambda tokens: [w for w in tokens if len(w) > 1])

def spelling_correction(df: pd.DataFrame, x_col='x'):
  """
  To be applied to a dataframe with a column called 'x' that contains tokens.
  """
  df[x_col] = df[x_col].progress_apply(lambda tokens: [Word(w).correct() for w in tokens])


def replace_user_handles(df: pd.DataFrame, x_col='x'):
  """
  To be applied to a dataframe with a column called 'x' that contains tokens.
  """
  df[x_col] = df[x_col].apply(lambda tokens: [w if not (w.startswith("@") and len(w) > 1) else "<user>" for w in tokens])

def replace_urls(df: pd.DataFrame, x_col='x'):
  """
  To be applied to a dataframe with a column called 'x' that contains tokens.
  """
  df[x_col] = df[x_col].apply(lambda tokens: [w if not (w.startswith("http://") or w.startswith("https://") or w.startswith("www.")) else "<url>" for w in tokens])

def untokenize(df: pd.DataFrame, x_col='x'):
  """
  To be applied to a dataframe with a column called 'x' that contains tokens.
  """
  df[x_col] = df[x_col].apply(lambda tokens: " ".join(tokens))

def preprocess(df: pd.DataFrame, flags: Optional[Dict[str, bool]], x_col='x'):
  if flags is not None:
    if flags.get('remove_tags', False):
      remove_tags(df, x_col=x_col)
    if flags.get('replace_hashtags', False):
      replace_hashtags(df, x_col=x_col)
    if flags.get('to_lower', False):
      print("changing to lower case...")
      to_lower(df, x_col=x_col)
    if flags.get('tokenize', False):
      tokenize(df, x_col=x_col)
    if flags.get('replace_user_handles', False):
      replace_user_handles(df, x_col=x_col)
    if flags.get('replace_urls', False):
      replace_urls(df, x_col=x_col)  
    if flags.get('remove_tag_tokens', False):
      remove_tag_tokens(df, x_col=x_col)
    if flags.get('remove_stopwords', False):
      remove_stopwords(df, x_col=x_col)
    if flags.get('lemmatize', False):
      lemmatize(df, x_col=x_col)
    if flags.get('remove_single_symbols', False):
      remove_single_symbols(df, x_col=x_col)
    if flags.get('spelling_correction', False):
      print("correcting spelling...")
      spelling_correction(df, x_col=x_col)
      print("finished correcting spelling")

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


# init.py

In [5]:
from pathlib import Path
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from datasets import Dataset, ClassLabel, load_metric



def load(full=False, preprocessing=None, cluster=None):
  #df = pd.read_csv("/content/gdrive/MyDrive/ClusteredFasttextSmallTrain")
  #df = df.loc[df['cluster'] == cluster] #start training with cluster 0
  #df_new = df[['x', 'label']]
  #df_new = df_new.dropna()
  #df_new.rename(columns = {'x':'text'}, inplace = True)
  #df_val = df_new.sample(frac=0.2)
  #df_train = df_new.drop(df_val.index)
  df_train, df_val = load_train(full=full, eval_frac=0.2, cols=["x", "label"], neg_label=0)
  df_train.rename(columns = {'x':'text'}, inplace = True)
  df_val.rename(columns = {'x':'text'}, inplace = True)

  preprocess(df_train, flags=preprocessing, x_col='text')
  preprocess(df_val, flags=preprocessing, x_col='text')

  dataset_train = Dataset.from_pandas(df_train)
  dataset_val = Dataset.from_pandas(df_val)

  new_features = dataset_train.features.copy()
  new_features['label'] = ClassLabel(names=['0', '1'])

  dataset_train = dataset_train.cast(new_features)
  dataset_val = dataset_val.cast(new_features)

  return dataset_train, dataset_val


def tokenize(ds, tokenizer, path, force=True):
  def tokenize_function(ds):
    return tokenizer(ds['text'], padding=True, truncation=True)

  def load_or_tokenize(ds, path, force):
    if not force and Path(path).exists():
      return Dataset.load_from_disk(path)
    else:
      ds_tokenized = ds.map(tokenize_function, batched=True)
      ds_tokenized.save_to_disk(path)
      return ds_tokenized

  return load_or_tokenize(ds, path=path, force=force)


def get_BERT(model_name, device):
  model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2).to(device)
  model.save_pretrained(model_name)
  return model


def train(model_name, tokenizer_name, device, full=False, preprocessing=None, batch_size=32, epochs=1, force_tokenize=True, cluster=None):
  dataset_train, dataset_val = load(full=full, preprocessing=preprocessing, cluster=cluster)

  tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
  train_tokenized = tokenize(
    dataset_train,
    tokenizer,
    path=f'bert/cache/train_tokenized__{tokenizer_name}{"__full" if full else ""}',
    force=force_tokenize)
  val_tokenized = tokenize(
    dataset_val,
    tokenizer,
    path=f'bert/cache/val_tokenized__{tokenizer_name}{"__full" if full else ""}',
    force=force_tokenize)

  model = get_BERT(model_name, device)

  training_args = TrainingArguments(
    output_dir='bert_data/test_trainer',
    num_train_epochs=epochs,
    save_strategy='epoch',
    evaluation_strategy='epoch',
    per_device_train_batch_size=batch_size,
    load_best_model_at_end=True)

  metric = load_metric('accuracy')
  def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return metric.compute(predictions=predictions, references=labels)

  trainer = Trainer(
    model,
    training_args,
    train_dataset=train_tokenized,
    eval_dataset=val_tokenized,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics)

  trainer.train()

  val_pred = trainer.predict(val_tokenized)
  y_pred = np.argmax(val_pred.predictions, axis=1)
  y = val_tokenized.to_pandas()['label']
  metrics = evaluate(y, y_pred)
  return model, metrics


def objective(args, model_name, tokenizer_name, device, full=False):
  print(args)
  _, metrics = train(model_name, tokenizer_name, device, full=full, **args)
  return -metrics['accuracy']


# Start of Code

In [6]:
FULL=False

MODEL = 'distilbert-base-uncased' # 'cardiffnlp/twitter-roberta-base-sentiment-latest'
TOKENIZER = 'bert-base-uncased'

EPOCHS = 1
#BATCH_SIZE = 1028
BATCH_SIZE = 32

In [7]:
model, accuracy = train(MODEL, TOKENIZER, device, full=FULL, batch_size=BATCH_SIZE, epochs=EPOCHS, cluster=0)

Casting the dataset:   0%|          | 0/16 [00:00<?, ?ba/s]

Casting the dataset:   0%|          | 0/4 [00:00<?, ?ba/s]



  0%|          | 0/160 [00:00<?, ?ba/s]

  0%|          | 0/40 [00:00<?, ?ba/s]

The following columns in the training set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text, __index_level_0__, index, token_type_ids. If text, __index_level_0__, index, token_type_ids are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 160000
  Num Epochs = 1
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 5000


Epoch,Training Loss,Validation Loss,Accuracy
1,0.288,0.28918,0.8742


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text, __index_level_0__, index, token_type_ids. If text, __index_level_0__, index, token_type_ids are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 40000
  Batch size = 8
Saving model checkpoint to bert_data/test_trainer/checkpoint-5000
Configuration saved in bert_data/test_trainer/checkpoint-5000/config.json
Model weights saved in bert_data/test_trainer/checkpoint-5000/pytorch_model.bin
tokenizer config file saved in bert_data/test_trainer/checkpoint-5000/tokenizer_config.json
Special tokens file saved in bert_data/test_trainer/checkpoint-5000/special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from bert_data/test_trainer/checkpoint-5000 (score: 0.289179563522

INFO:root:---
* accuracy: 0.8742
* precision: 0.8661728395061729
* recall: 0.8830933440741113
* f1: 0.8745512564818507
* bce: 4.34503224320566
* auc: 0.874260943563523
---


In [8]:
model.save_pretrained('/content/gdrive/MyDrive/modelonTrainingData')

Configuration saved in /content/gdrive/MyDrive/modelonTrainingData/config.json
Model weights saved in /content/gdrive/MyDrive/modelonTrainingData/pytorch_model.bin


# Evaluation 

In [9]:
FULL=False
EVAL_FRAC = 0.2

MODEL = 'distilbert-base-uncased'
TOKENIZER = 'bert-base-uncased'
PREPROCESSING = None

In [10]:
def compute_metrics(eval_pred):
  predictions, labels = eval_pred
  predictions = np.argmax(predictions, axis=1)
  return evaluate(labels, predictions)


def evaluate_cluster(cluster=None):
  df = pd.read_csv("/content/gdrive/MyDrive/ClusteredFasttextSmallTrainUnpreprocessed")
  df = df.loc[df['cluster'] == cluster] #start training with cluster x
  df_new = df[['text', 'label']]
  df_new = df_new.dropna()
  df_eval = df_new.sample(frac=0.2)

  preprocess(df_eval, flags=PREPROCESSING, x_col='text')
  dataset_eval = Dataset.from_pandas(df_eval)

  new_features = dataset_eval.features.copy()
  new_features['label'] = ClassLabel(names=['0', '1'])

  dataset_eval = dataset_eval.cast(new_features)

  tokenizer = AutoTokenizer.from_pretrained(TOKENIZER)
  eval_tokenized = tokenize(dataset_eval, tokenizer, path=f'bert/cache/eval_tokenized__{TOKENIZER}__{EVAL_FRAC}{"__full" if FULL else ""}')

  model = get_BERT(MODEL, device)

  trainer = Trainer(model, eval_dataset=eval_tokenized, tokenizer=tokenizer, compute_metrics=compute_metrics)
  trainer.evaluate()


# Result of Cluster 0


In [11]:
evaluate_cluster(cluster = 0)

Casting the dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.20.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/bert-base-uncased/resolve/ma

  0%|          | 0/7 [00:00<?, ?ba/s]

loading configuration file distilbert-base-uncased/config.json
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForSequenceClassification"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "torch_dtype": "float32",
  "transformers_version": "4.20.1",
  "vocab_size": 30522
}

loading weights file distilbert-base-uncased/pytorch_model.bin
All model checkpoint weights were used when initializing DistilBertForSequenceClassification.

All the weights of DistilBertForSequenceClassification were initialized from the model checkpoint at distilbert-base-uncased.
If your task is similar to the task the model of the che

INFO:root:---
* accuracy: 0.9430783242258652
* precision: 0.0
* recall: 0.0
* f1: 0.0
* bce: 1.966005881189059
* auc: 0.49943729903536976
---


# Result of Cluster 1

In [12]:
evaluate_cluster(cluster = 1)

Casting the dataset:   0%|          | 0/2 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.20.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/bert-base-uncased/resolve/ma

  0%|          | 0/17 [00:00<?, ?ba/s]

loading configuration file distilbert-base-uncased/config.json
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForSequenceClassification"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "torch_dtype": "float32",
  "transformers_version": "4.20.1",
  "vocab_size": 30522
}

loading weights file distilbert-base-uncased/pytorch_model.bin
All model checkpoint weights were used when initializing DistilBertForSequenceClassification.

All the weights of DistilBertForSequenceClassification were initialized from the model checkpoint at distilbert-base-uncased.
If your task is similar to the task the model of the che

INFO:root:---
* accuracy: 0.39924844452658165
* precision: 0.8
* recall: 0.000819840131174421
* f1: 0.0016380016380016379
* bce: 20.749223741906228
* auc: 0.5002554799111472
---


# Result of Cluster 2

In [13]:
evaluate_cluster(cluster = 2)

Casting the dataset:   0%|          | 0/2 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.20.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/bert-base-uncased/resolve/ma

  0%|          | 0/18 [00:00<?, ?ba/s]

loading configuration file distilbert-base-uncased/config.json
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForSequenceClassification"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "torch_dtype": "float32",
  "transformers_version": "4.20.1",
  "vocab_size": 30522
}

loading weights file distilbert-base-uncased/pytorch_model.bin
All model checkpoint weights were used when initializing DistilBertForSequenceClassification.

All the weights of DistilBertForSequenceClassification were initialized from the model checkpoint at distilbert-base-uncased.
If your task is similar to the task the model of the che

INFO:root:---
* accuracy: 0.4266837417777519
* precision: 0.4666666666666667
* recall: 0.000710804224207961
* f1: 0.0014194464158978
* bce: 19.80164241866551
* auc: 0.49980977395769116
---
