In [None]:
# Run this cell to mount your Google Drive.
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import codecs

texts = {} ; labels = {}

####### TRANSCRIBED UTTERANCES
fpath = '/content/drive/My Drive/psych/klamanie/dane/transkrypcje'
files = [_ for _ in os.listdir(fpath) if _.endswith('txt')]

texts['t'] = [] ; labels['t'] = []

for filename in files:
  if filename.endswith('.txt'):
    with codecs.open(fpath+'/'+filename, 'r', 'utf8') as fr:
      texts['t'].append( fr.read() )
      # W TRANSKRYPCJACH te mające id+b to KŁAMSTWA a te bez literki b to PRAWDA.
      labels['t'].append( 0 if filename.endswith('b.txt') else 1 )

print(f"loaded TRANSCRIBED: { len(texts['t'])}")


####### TYPED UTTERANCES
fpath = '/content/drive/My Drive/psych/klamanie/dane/pisemne'
files = [_ for _ in os.listdir(fpath) if _.endswith('txt')]

texts['p'] = [] ; labels['p'] = []

for filename in files:
  if filename.endswith('.txt'):
    with codecs.open(fpath+'/'+filename, 'r', 'utf8') as fr:
      texts['p'].append( fr.read() )
      # # W wypowiedziach PISEMNYCH wypowiedzi z id+b są PRAWDZIWE a te bez literki b to kłamstwa.
      labels['p'].append( 1 if filename.endswith('b.txt') else 0 )

print(f"loaded TYPED: { len(texts['p'])}")


In [None]:
import pickle
pickle.dump(texts, open('/content/drive/My Drive/psych/klamanie/dane/all_texts.pkl', 'wb'))
pickle.dump(labels, open('/content/drive/My Drive/psych/klamanie/dane/all_labels.pkl', 'wb'))

In [None]:
import pickle

all_texts = pickle.load(open('/content/drive/My Drive/psych/klamanie/dane/all_texts.pkl', 'rb'))
all_labels = pickle.load(open('/content/drive/My Drive/psych/klamanie/dane/all_labels.pkl', 'rb'))

In [None]:
!pip install --upgrade transformers
!pip install --upgrade mlxtend
!pip install 'ray[tune]'



In [None]:
!nvidia-smi

Tue Nov 30 10:01:48 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.44       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    26W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
import tensorflow as tf

import os
print(tf.__version__)

2.7.0


In [None]:
from transformers import (
    TF2_WEIGHTS_NAME,
    BertConfig,
    BertTokenizer,
    AutoTokenizer,
    TFBertForSequenceClassification,
    AutoTokenizer,
    create_optimizer)

def get_BERT_model(model_name, lower_case=False):

  MODEL_CLASSES = {"bert": (BertConfig, TFBertForSequenceClassification, AutoTokenizer)}
  config_class, model_class, tokenizer_class = MODEL_CLASSES['bert']

  config = config_class.from_pretrained(model_name, num_labels=2)
  model = model_class.from_pretrained(
                  model_name,
                  from_pt=True,
                  config=config)

  tokenizer = AutoTokenizer.from_pretrained(model_name, do_lower_case=lower_case)

  model.layers[-1].activation = tf.keras.activations.sigmoid
  return model, tokenizer

In-domain

In [None]:
import numpy as np
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import classification_report
from transformers.optimization_tf import AdamWeightDecay
from transformers import TFTrainer, TFTrainingArguments
import transformers
import gc

transformers.logging.set_verbosity_error()

all_results = {}

for text_type in ['p', 't']:

  kf = KFold(n_splits=10, shuffle=True, random_state=42)
  fold_nr=0
  
  texts = np.array(all_texts[text_type])
  labels = np.array(all_labels[text_type])

  all_results[text_type] = []

  for train_index, test_index in kf.split(texts):

    print(f'Fold_nr= {fold_nr} on type {text_type}')  

    X_train_val, X_test = texts[train_index], texts[test_index]
    y_train_val, y_test = labels[train_index], labels[test_index]


    #############################################

    #BERT_MODEL="allegro/herbert-base-cased", "allegro/herbert-large-cased", "dkleczek/bert-base-polish-uncased-v1","bert-base-uncased"    
    for BERT_MODEL in [ 
                       {'name':"dkleczek/bert-base-polish-uncased-v1", 'lowcase':True, 'batch':4},
                       {'name':"allegro/herbert-base-cased", 'lowcase':False, 'batch':8},
                       {'name':"bert-base-uncased", 'lowcase':True, 'batch':8}
                       ]:

      print(f"   training {BERT_MODEL['name']}")
      
      # split train_val into train and validation:
      train_texts, val_texts, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.05, random_state=42)
      test_texts = X_test

      training_args = TFTrainingArguments(
          output_dir='./results-'+BERT_MODEL['name'].replace('/','-')+'-'+str(fold_nr),          # output directory
          overwrite_output_dir=True,
          num_train_epochs=10,              # total number of training epochs
          per_device_train_batch_size=BERT_MODEL['batch'],  # batch size per device during training
          per_device_eval_batch_size=BERT_MODEL['batch'],   # batch size for evaluation
          warmup_steps=500,                # number of warmup steps for learning rate scheduler
          weight_decay=0.01,               # strength of weight decay
          logging_dir='./logs',            # directory for storing logs
          logging_steps=10,
          evaluation_strategy="steps",     # Evaluation is done at the end of each epoch, alternatives: "steps", "epoch"
          save_strategy="steps",           # Save is done at the end of each epoch, alternatives: "steps", "epoch"
          # save_steps=500                 # Number of updates steps before two checkpoint saves if save_strategy="steps"
          eval_steps = 1,
          save_total_limit = 5,
          load_best_model_at_end=True,     # Whether or not to load the best model found during training at the end of training.
      )

      with training_args.strategy.scope():
          model, tokenizer = get_BERT_model(BERT_MODEL['name'], lower_case=BERT_MODEL['lowcase'])

          train_encodings = tokenizer(train_texts.tolist() , padding=True, truncation=True)
          val_encodings = tokenizer(val_texts.tolist() , padding=True, truncation=True)
          test_encodings = tokenizer(test_texts.tolist() ,padding=True, truncation=True)

          train_dataset = tf.data.Dataset.from_tensor_slices(( dict(train_encodings), y_train ))
          val_dataset = tf.data.Dataset.from_tensor_slices(( dict(val_encodings), y_val ))
          test_dataset = tf.data.Dataset.from_tensor_slices(( dict(test_encodings), y_test ))

          trainer = TFTrainer(
              model=model,                         # the instantiated 🤗 Transformers model to be trained
              args=training_args,                  # training arguments, defined above
              train_dataset=train_dataset,         # training dataset
              eval_dataset=val_dataset             # evaluation dataset
          )

          trainer.train()
          predictions = trainer.predict(test_dataset)

      y_pred = np.argmax(predictions.predictions, axis=1)
      report = classification_report(y_pred, y_test, output_dict=True)
      print(report)
      model_results = { 'report':report, 'y_pred':y_pred, 'y_true':y_test , 'fold_nr':fold_nr, 'model':BERT_MODEL['name'], 'text_type':text_type }
      all_results[text_type].append(model_results)
      fname = BERT_MODEL['name'].replace('/','-')+'-'+str(fold_nr)+'.pkl'

      del model
      tf.keras.backend.clear_session()
      gc.collect()

    fold_nr += 1

pickle.dump(all_results, open('/content/drive/My Drive/psych/klamanie/experiments/in_domain_results_v2.pkl','wb'))


In [None]:
from sklearn.metrics import classification_report

# http://rasbt.github.io/mlxtend/user_guide/evaluate/cochrans_q/
from mlxtend.evaluate import cochrans_q

import numpy as np
all_results = pickle.load( open('/content/drive/My Drive/psych/klamanie/experiments/in_domain_results.pkl','rb'))

#model_results = { 'report':report, 'y_pred':y_pred, 'y_true':y_test , 'fold_nr':fold_nr, 'model':BERT_MODEL['name'], 'text_type':text_type }
#all_results[text_type].append(model_results)


for text_type in ['p', 't']:
  print(f"text type: {text_type}")  
  y_pred = {}
  y_true = {}

  for result_dict in all_results[text_type]:
    if result_dict['model'] not in y_pred:
      y_pred[ result_dict['model'] ] = []
      y_true[ result_dict['model'] ] = []
    y_pred[ result_dict['model'] ].extend( result_dict['y_pred'].astype(int) )
    y_true[ result_dict['model'] ].extend( result_dict['y_true'].astype(int) )

  for model in y_pred.keys():
    print(model)
    print(classification_report(y_true[model], y_pred[model]))

  q, p_value = cochrans_q(np.array(y_true['bert-base-uncased']), 
                          np.array(y_pred['dkleczek/bert-base-polish-uncased-v1']), 
                          np.array(y_pred['allegro/herbert-base-cased']), 
                          np.array(y_pred['bert-base-uncased']) )

  print('Q: %.3f' % q)
  print('p-value: %.3f' % p_value)


text type: p
dkleczek/bert-base-polish-uncased-v1
              precision    recall  f1-score   support

           0       0.61      0.66      0.63       378
           1       0.63      0.58      0.61       379

    accuracy                           0.62       757
   macro avg       0.62      0.62      0.62       757
weighted avg       0.62      0.62      0.62       757

allegro/herbert-base-cased
              precision    recall  f1-score   support

           0       0.75      0.53      0.63       378
           1       0.64      0.83      0.72       379

    accuracy                           0.68       757
   macro avg       0.70      0.68      0.67       757
weighted avg       0.70      0.68      0.67       757

bert-base-uncased
              precision    recall  f1-score   support

           0       0.54      0.28      0.37       378
           1       0.51      0.76      0.61       379

    accuracy                           0.52       757
   macro avg       0.53      0.52



---



all utterances mixed:



In [None]:
print( len(all_texts['p']))                         
print( len(all_labels['p']))                                        

2217
1487


In [None]:
import numpy as np
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import classification_report
from transformers import TFTrainer, TFTrainingArguments
import transformers
import gc

transformers.logging.set_verbosity_error()

kf = KFold(n_splits=10, shuffle=True, random_state=42)

fold_nr=0

all_texts = pickle.load(open('/content/drive/My Drive/psych/klamanie/dane/all_texts.pkl', 'rb'))
all_labels = pickle.load(open('/content/drive/My Drive/psych/klamanie/dane/all_labels.pkl', 'rb'))

texts = all_texts['p']
texts.extend(all_texts['t'])
texts = np.array( texts )
print(len(texts))

labels = all_labels['p']
labels.extend(all_labels['t'])
labels = np.array( labels )
print(len(labels))


all_results = []

for train_index, test_index in kf.split(texts):

  print(f'Fold_nr= {fold_nr}')  

  X_train_val, X_test = texts[train_index], texts[test_index]
  y_train_val, y_test = labels[train_index], labels[test_index]


  #############################################

  #BERT_MODEL="allegro/herbert-base-cased", "allegro/herbert-large-cased", "dkleczek/bert-base-polish-uncased-v1","bert-base-uncased"    
  for BERT_MODEL in [ 
                      #{'name':"dkleczek/bert-base-polish-uncased-v1", 'lowcase':True, 'batch':1},
                      {'name':"allegro/herbert-base-cased", 'lowcase':False, 'batch':4}
                      #{'name':"bert-base-uncased", 'lowcase':True, 'batch':2}
                      ]:

    print(f"   training {BERT_MODEL['name']}")
    
    # split train_val into train and validation:
    train_texts, val_texts, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.1, random_state=42)
    test_texts = X_test

    training_args = TFTrainingArguments(
        output_dir='./results-'+BERT_MODEL['name'].replace('/','-')+'-'+str(fold_nr),          # output directory
        overwrite_output_dir=True,
        num_train_epochs=10,              # total number of training epochs
        per_device_train_batch_size=BERT_MODEL['batch'],  # batch size per device during training
        per_device_eval_batch_size=BERT_MODEL['batch'],   # batch size for evaluation
        warmup_steps=500,                # number of warmup steps for learning rate scheduler
        weight_decay=0.01,               # strength of weight decay
        logging_dir='./logs',            # directory for storing logs
        logging_steps=10,
        evaluation_strategy="epoch",     # Evaluation is done at the end of each epoch, alternatives: "steps", "epoch"
        save_strategy="epoch",           # Save is done at the end of each epoch, alternatives: "steps", "epoch"
        # save_steps=500                 # Number of updates steps before two checkpoint saves if save_strategy="steps"
        eval_steps = 1,
        save_total_limit = 5,
        load_best_model_at_end=True,     # Whether or not to load the best model found during training at the end of training.
    )

    with training_args.strategy.scope():
        model, tokenizer = get_BERT_model(BERT_MODEL['name'], lower_case=BERT_MODEL['lowcase'])

        train_encodings = tokenizer(train_texts.tolist() , padding=True, truncation=True)
        val_encodings = tokenizer(val_texts.tolist() , padding=True, truncation=True)
        test_encodings = tokenizer(test_texts.tolist() ,padding=True, truncation=True)

        train_dataset = tf.data.Dataset.from_tensor_slices(( dict(train_encodings), y_train ))
        val_dataset = tf.data.Dataset.from_tensor_slices(( dict(val_encodings), y_val ))
        test_dataset = tf.data.Dataset.from_tensor_slices(( dict(test_encodings), y_test ))

        trainer = TFTrainer(
            model=model,                         # the instantiated 🤗 Transformers model to be trained
            args=training_args,                  # training arguments, defined above
            train_dataset=train_dataset,         # training dataset
            eval_dataset=val_dataset            # evaluation dataset
        )       

        trainer.train()
        predictions = trainer.predict(test_dataset)

    y_pred = np.argmax(predictions.predictions, axis=1)
    report = classification_report(y_pred, y_test, output_dict=True)
    print(report)
    model_results = { 'report':report, 'y_pred':y_pred, 'y_true':y_test , 'fold_nr':fold_nr, 'model':BERT_MODEL['name']  }
    all_results.append(model_results)
    fname = BERT_MODEL['name'].replace('/','-')+'-'+str(fold_nr)+'.pkl'

    del model
    tf.keras.backend.clear_session()
    gc.collect()

  fold_nr += 1

pickle.dump(all_results, open('/content/drive/My Drive/psych/klamanie/experiments/mixed_results.pkl','wb'))


1487
1487
Fold_nr= 0
   training dkleczek/bert-base-polish-uncased-v1


  return dispatch_target(*args, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'0': {'precision': 1.0, 'recall': 0.4899328859060403, 'f1-score': 0.6576576576576576, 'support': 149}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}, 'accuracy': 0.4899328859060403, 'macro avg': {'precision': 0.5, 'recall': 0.24496644295302014, 'f1-score': 0.3288288288288288, 'support': 149}, 'weighted avg': {'precision': 1.0, 'recall': 0.4899328859060403, 'f1-score': 0.6576576576576576, 'support': 149}}
   training allegro/herbert-base-cased


Downloading:   0%|          | 0.00/472 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/624M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/229 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/886k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/543k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/129 [00:00<?, ?B/s]

  return dispatch_target(*args, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'0': {'precision': 1.0, 'recall': 0.4899328859060403, 'f1-score': 0.6576576576576576, 'support': 149}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}, 'accuracy': 0.4899328859060403, 'macro avg': {'precision': 0.5, 'recall': 0.24496644295302014, 'f1-score': 0.3288288288288288, 'support': 149}, 'weighted avg': {'precision': 1.0, 'recall': 0.4899328859060403, 'f1-score': 0.6576576576576576, 'support': 149}}
   training bert-base-uncased


Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/420M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/455k [00:00<?, ?B/s]

  return dispatch_target(*args, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'0': {'precision': 1.0, 'recall': 0.4899328859060403, 'f1-score': 0.6576576576576576, 'support': 149}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}, 'accuracy': 0.4899328859060403, 'macro avg': {'precision': 0.5, 'recall': 0.24496644295302014, 'f1-score': 0.3288288288288288, 'support': 149}, 'weighted avg': {'precision': 1.0, 'recall': 0.4899328859060403, 'f1-score': 0.6576576576576576, 'support': 149}}
Fold_nr= 1
   training dkleczek/bert-base-polish-uncased-v1


  return dispatch_target(*args, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}, '1': {'precision': 1.0, 'recall': 0.4429530201342282, 'f1-score': 0.6139534883720931, 'support': 149}, 'accuracy': 0.4429530201342282, 'macro avg': {'precision': 0.5, 'recall': 0.2214765100671141, 'f1-score': 0.30697674418604654, 'support': 149}, 'weighted avg': {'precision': 1.0, 'recall': 0.4429530201342282, 'f1-score': 0.6139534883720931, 'support': 149}}
   training allegro/herbert-base-cased


  return dispatch_target(*args, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}, '1': {'precision': 1.0, 'recall': 0.4429530201342282, 'f1-score': 0.6139534883720931, 'support': 149}, 'accuracy': 0.4429530201342282, 'macro avg': {'precision': 0.5, 'recall': 0.2214765100671141, 'f1-score': 0.30697674418604654, 'support': 149}, 'weighted avg': {'precision': 1.0, 'recall': 0.4429530201342282, 'f1-score': 0.6139534883720931, 'support': 149}}
   training bert-base-uncased


  return dispatch_target(*args, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}, '1': {'precision': 1.0, 'recall': 0.4429530201342282, 'f1-score': 0.6139534883720931, 'support': 149}, 'accuracy': 0.4429530201342282, 'macro avg': {'precision': 0.5, 'recall': 0.2214765100671141, 'f1-score': 0.30697674418604654, 'support': 149}, 'weighted avg': {'precision': 1.0, 'recall': 0.4429530201342282, 'f1-score': 0.6139534883720931, 'support': 149}}
Fold_nr= 2
   training dkleczek/bert-base-polish-uncased-v1


  return dispatch_target(*args, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'0': {'precision': 1.0, 'recall': 0.48322147651006714, 'f1-score': 0.6515837104072398, 'support': 149}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}, 'accuracy': 0.48322147651006714, 'macro avg': {'precision': 0.5, 'recall': 0.24161073825503357, 'f1-score': 0.3257918552036199, 'support': 149}, 'weighted avg': {'precision': 1.0, 'recall': 0.48322147651006714, 'f1-score': 0.6515837104072398, 'support': 149}}
   training allegro/herbert-base-cased


  return dispatch_target(*args, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}, '1': {'precision': 1.0, 'recall': 0.5167785234899329, 'f1-score': 0.6814159292035399, 'support': 149}, 'accuracy': 0.5167785234899329, 'macro avg': {'precision': 0.5, 'recall': 0.25838926174496646, 'f1-score': 0.34070796460176994, 'support': 149}, 'weighted avg': {'precision': 1.0, 'recall': 0.5167785234899329, 'f1-score': 0.6814159292035399, 'support': 149}}
   training bert-base-uncased


  return dispatch_target(*args, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}, '1': {'precision': 1.0, 'recall': 0.5167785234899329, 'f1-score': 0.6814159292035399, 'support': 149}, 'accuracy': 0.5167785234899329, 'macro avg': {'precision': 0.5, 'recall': 0.25838926174496646, 'f1-score': 0.34070796460176994, 'support': 149}, 'weighted avg': {'precision': 1.0, 'recall': 0.5167785234899329, 'f1-score': 0.6814159292035399, 'support': 149}}
Fold_nr= 3
   training dkleczek/bert-base-polish-uncased-v1


  return dispatch_target(*args, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'0': {'precision': 1.0, 'recall': 0.5838926174496645, 'f1-score': 0.7372881355932204, 'support': 149}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}, 'accuracy': 0.5838926174496645, 'macro avg': {'precision': 0.5, 'recall': 0.29194630872483224, 'f1-score': 0.3686440677966102, 'support': 149}, 'weighted avg': {'precision': 1.0, 'recall': 0.5838926174496645, 'f1-score': 0.7372881355932204, 'support': 149}}
   training allegro/herbert-base-cased


  return dispatch_target(*args, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}, '1': {'precision': 1.0, 'recall': 0.4161073825503356, 'f1-score': 0.5876777251184834, 'support': 149}, 'accuracy': 0.4161073825503356, 'macro avg': {'precision': 0.5, 'recall': 0.2080536912751678, 'f1-score': 0.2938388625592417, 'support': 149}, 'weighted avg': {'precision': 1.0, 'recall': 0.4161073825503356, 'f1-score': 0.5876777251184834, 'support': 149}}
   training bert-base-uncased


  return dispatch_target(*args, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}, '1': {'precision': 1.0, 'recall': 0.4161073825503356, 'f1-score': 0.5876777251184834, 'support': 149}, 'accuracy': 0.4161073825503356, 'macro avg': {'precision': 0.5, 'recall': 0.2080536912751678, 'f1-score': 0.2938388625592417, 'support': 149}, 'weighted avg': {'precision': 1.0, 'recall': 0.4161073825503356, 'f1-score': 0.5876777251184834, 'support': 149}}
Fold_nr= 4
   training dkleczek/bert-base-polish-uncased-v1


  return dispatch_target(*args, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}, '1': {'precision': 1.0, 'recall': 0.5033557046979866, 'f1-score': 0.6696428571428572, 'support': 149}, 'accuracy': 0.5033557046979866, 'macro avg': {'precision': 0.5, 'recall': 0.2516778523489933, 'f1-score': 0.3348214285714286, 'support': 149}, 'weighted avg': {'precision': 1.0, 'recall': 0.5033557046979866, 'f1-score': 0.6696428571428572, 'support': 149}}
   training allegro/herbert-base-cased


  return dispatch_target(*args, **kwargs)


{'0': {'precision': 0.6216216216216216, 'recall': 0.71875, 'f1-score': 0.6666666666666667, 'support': 64}, '1': {'precision': 0.76, 'recall': 0.6705882352941176, 'f1-score': 0.7124999999999999, 'support': 85}, 'accuracy': 0.6912751677852349, 'macro avg': {'precision': 0.6908108108108109, 'recall': 0.6946691176470587, 'f1-score': 0.6895833333333333, 'support': 149}, 'weighted avg': {'precision': 0.7005623072737166, 'recall': 0.6912751677852349, 'f1-score': 0.6928131991051454, 'support': 149}}
   training bert-base-uncased


  return dispatch_target(*args, **kwargs)


{'0': {'precision': 0.25675675675675674, 'recall': 0.7307692307692307, 'f1-score': 0.38, 'support': 26}, '1': {'precision': 0.9066666666666666, 'recall': 0.5528455284552846, 'f1-score': 0.6868686868686869, 'support': 123}, 'accuracy': 0.5838926174496645, 'macro avg': {'precision': 0.5817117117117117, 'recall': 0.6418073796122576, 'f1-score': 0.5334343434343434, 'support': 149}, 'weighted avg': {'precision': 0.7932595682931253, 'recall': 0.5838926174496645, 'f1-score': 0.6333211307707952, 'support': 149}}
Fold_nr= 5
   training dkleczek/bert-base-polish-uncased-v1


  return dispatch_target(*args, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'0': {'precision': 1.0, 'recall': 0.46308724832214765, 'f1-score': 0.6330275229357799, 'support': 149}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0}, 'accuracy': 0.46308724832214765, 'macro avg': {'precision': 0.5, 'recall': 0.23154362416107382, 'f1-score': 0.31651376146788995, 'support': 149}, 'weighted avg': {'precision': 1.0, 'recall': 0.46308724832214765, 'f1-score': 0.6330275229357799, 'support': 149}}
   training allegro/herbert-base-cased


  return dispatch_target(*args, **kwargs)


{'0': {'precision': 0.5362318840579711, 'recall': 0.578125, 'f1-score': 0.556390977443609, 'support': 64}, '1': {'precision': 0.6625, 'recall': 0.6235294117647059, 'f1-score': 0.6424242424242423, 'support': 85}, 'accuracy': 0.6040268456375839, 'macro avg': {'precision': 0.5993659420289855, 'recall': 0.6008272058823529, 'f1-score': 0.5994076099339256, 'support': 149}, 'weighted avg': {'precision': 0.6082640307363097, 'recall': 0.6040268456375839, 'f1-score': 0.6054703567949771, 'support': 149}}
   training bert-base-uncased


  return dispatch_target(*args, **kwargs)


ResourceExhaustedError: ignored



---

INTERPRETABILITY

In [None]:
!pip install transformers_interpret



In [None]:
import torch

class KlamanieDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [None]:
from transformers import BertForSequenceClassification, AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer
from transformers_interpret import SequenceClassificationExplainer
from sklearn.model_selection import train_test_split

#MODEL_CLASSES = {"bert": (BertConfig, BertForSequenceClassification, AutoTokenizer)}

model_name = "allegro/herbert-base-cased"
config = BertConfig.from_pretrained(model_name, num_labels=2)
model = BertForSequenceClassification.from_pretrained(model_name, config=config)
tokenizer = AutoTokenizer.from_pretrained(model_name, do_lower_case=False)

text_type = 'p'

# split train_val into train and validation:
train_texts, val_texts, y_train, y_val = train_test_split(all_texts[text_type], all_labels[text_type], test_size=0.1, random_state=42)


training_args = TrainingArguments(
    output_dir='./results-temp',          # output directory
    overwrite_output_dir=True,
    num_train_epochs=10,              # total number of training epochs
    per_device_train_batch_size=4,  # batch size per device during training
    per_device_eval_batch_size=4,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=10,
    evaluation_strategy="epoch",     # Evaluation is done at the end of each epoch, alternatives: "steps", "epoch"
    save_strategy="epoch",           # Save is done at the end of each epoch, alternatives: "steps", "epoch"
    # save_steps=500                 # Number of updates steps before two checkpoint saves if save_strategy="steps"
    eval_steps = 1,
    save_total_limit = 5,
    load_best_model_at_end=True,     # Whether or not to load the best model found during training at the end of training.
)

train_encodings = tokenizer(train_texts , padding=True, truncation=True)
val_encodings = tokenizer(val_texts , padding=True, truncation=True)

train_dataset = KlamanieDataset(train_encodings, y_train)
val_dataset = KlamanieDataset(val_encodings, y_val)


trainer = Trainer(
    model=model,                         # the instantiated 🤗 Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=val_dataset            # evaluation dataset
)       

trainer.train()
#predictions = trainer.predict(test_dataset)

loading configuration file https://huggingface.co/allegro/herbert-base-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/d24c58747dbe6b61ed3e1eb5d488dfec9332ed13dd3f8983588f30d96f6f1bde.193ae07fbea6bb9ac46f854cd03094e486dfa4483e0596fd6a159dcfaef521a5
Model config BertConfig {
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 514,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "tokenizer_class": "HerbertTokenizerFast",
  "transformers_version": "4.12.5",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 50000
}

loading weights file https://huggingface.co/allegro/herbert-base-cased/resolve/main/pytorch

Epoch,Training Loss,Validation Loss
1,0.706,0.701333
2,0.6067,0.656613
3,0.6753,0.79208
4,0.4892,0.998677
5,0.3595,1.623061
6,0.2387,1.592891
7,0.0002,1.735892
8,0.0002,2.054321
9,0.0001,2.086537
10,0.0002,2.077995


***** Running Evaluation *****
  Num examples = 76
  Batch size = 4
Saving model checkpoint to ./results-temp/checkpoint-171
Configuration saved in ./results-temp/checkpoint-171/config.json
Model weights saved in ./results-temp/checkpoint-171/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 76
  Batch size = 4
Saving model checkpoint to ./results-temp/checkpoint-342
Configuration saved in ./results-temp/checkpoint-342/config.json
Model weights saved in ./results-temp/checkpoint-342/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 76
  Batch size = 4
Saving model checkpoint to ./results-temp/checkpoint-513
Configuration saved in ./results-temp/checkpoint-513/config.json
Model weights saved in ./results-temp/checkpoint-513/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 76
  Batch size = 4
Saving model checkpoint to ./results-temp/checkpoint-684
Configuration saved in ./results-temp/checkpoint-684/config.json
Model weights saved in ./re

In [None]:
print(word_attributions)

[('<s>', 0.0), ('Jestem</w>', -0.3234043501618738), ('Robert</w>', 0.6695397126524557), ('Lewandowski</w>', -0.6686749580159581), ('</s>', 0.0)]


In [None]:
y_val[:10]

[0, 1, 1, 0, 1, 0, 1, 1, 0, 1]

In [None]:
import gc
torch.cuda.empty_cache()
del cls_explainer
gc.collect()

val_text_id = 10

cls_explainer = SequenceClassificationExplainer(model, tokenizer)
word_attributions = cls_explainer(val_texts[val_text_id])
print(word_attributions)
cls_explainer.visualize("/content/drive/My Drive/psych/klamanie/experiments/val_texts_"+str(val_text_id)+".html")

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,LABEL_0 (0.57),LABEL_0,3.74,#s Bo g to wymy sl . Nie mozna wierzy c w cos czego nikt nigdy nie zoba czy l w rzeczywi stos ci i na mac alnie . To wymy sl ludzi ze by zy lo sie la t wiej i bezpieczniej . is nieje tylko to co widzimy . wszystko stwo rzy l cz l owie k i ewolu cja . Nor mal na fizyka podwa za istnienie takich duch owych isto t ponie wa z nie da sie tego z bada c wiec dowodu nie mamy . a re lig ie sa po to ze by manipul owac poste powa niem ludzi przez m owie nie ze istnieje bo g i ze strachu przed s mier cia i kara po s mier ci . Dla mnie temat boga jest fal szy wa manipu lacja ludzi i nigdy go sama nie do s wiad czy lam wzro kowo . Dla mnie te wszystkie znaki i symbole sa wymy s lem umy s lu cz l owie ka . tak samo jak nie istnieje bo g nie istnieje dia bel . istnieje tylko to co widzimy . to proste #/s
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,LABEL_0 (0.57),LABEL_0,3.74,#s Bo g to wymy sl . Nie mozna wierzy c w cos czego nikt nigdy nie zoba czy l w rzeczywi stos ci i na mac alnie . To wymy sl ludzi ze by zy lo sie la t wiej i bezpieczniej . is nieje tylko to co widzimy . wszystko stwo rzy l cz l owie k i ewolu cja . Nor mal na fizyka podwa za istnienie takich duch owych isto t ponie wa z nie da sie tego z bada c wiec dowodu nie mamy . a re lig ie sa po to ze by manipul owac poste powa niem ludzi przez m owie nie ze istnieje bo g i ze strachu przed s mier cia i kara po s mier ci . Dla mnie temat boga jest fal szy wa manipu lacja ludzi i nigdy go sama nie do s wiad czy lam wzro kowo . Dla mnie te wszystkie znaki i symbole sa wymy s lem umy s lu cz l owie ka . tak samo jak nie istnieje bo g nie istnieje dia bel . istnieje tylko to co widzimy . to proste #/s
,,,,
