# Classification of Pro-vax vs Anti-vax vs Neutral Tweets using BERT

In [1]:
import numpy as np
import pandas as pd
from sklearn import metrics
import transformers
import torch
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, BertModel, BertConfig
from sklearn.metrics import multilabel_confusion_matrix

datadir = '../data/'
resultdir = '../results/'
numFolds = 10

In [2]:
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'
print(device)

cuda


In [3]:
def classList(classval):
    if classval == 1:
        return [1,0,0]
    elif classval == -1:
        return [0,1,0]
    else:
        return [0,0,1]

augdata = pd.read_csv(datadir + "pro_anti_augmented.csv",encoding = 'utf-8', header = 'infer')
print(augdata.shape)
augdata['tweet']= augdata['X']
augdata = augdata.drop(columns=['screen_name','TweetID','X'])

print('Loading augmented data')
print('Pro-vs-anti class distribution:')
distrib = augdata['class'].value_counts()
print(distrib)
probs = distrib/sum(distrib)
print(probs)

temp = augdata['class'].apply(classList)
augdata['list'] = temp
augdata['rowid'] = augdata.index
augdata

(3038, 4)
Loading augmented data
Pro-vs-anti class distribution:
 0    1572
 1     982
-1     484
Name: class, dtype: int64
 0    0.517446
 1    0.323239
-1    0.159315
Name: class, dtype: float64


Unnamed: 0,class,tweet,list,rowid
0,1,I’m no doctor — I just won’t be putting a “flu...,"[1, 0, 0]",0
1,1,@redditships My dad has cancer and is immunoco...,"[1, 0, 0]",1
2,1,I see the Flu Shot peddlers are out in full fo...,"[1, 0, 0]",2
3,1,I’ve spent 3 hours of my life trying to get a ...,"[1, 0, 0]",3
4,1,Just got the flu shot. Do you want to hear the...,"[1, 0, 0]",4
...,...,...,...,...
3033,0,@SianGriffiths6 @TheClash709 @thesundaytimes @...,"[0, 0, 1]",3033
3034,0,How much would you pay to stay alive? How much...,"[0, 0, 1]",3034
3035,0,Imagine if every prescription drug ever produc...,"[0, 0, 1]",3035
3036,0,"@JoeKoffee Cops (esp corrupt ones!), insurance...","[0, 0, 1]",3036


In [4]:
df = pd.read_csv(datadir + "vax.csv")
df = df.drop(columns=['vax_class','vaccine'])
df['list'] = df[df.columns[3:]].values.tolist()
new_df = df[['tweet', 'class', 'list']].copy()
new_df['rowid'] = new_df.index
new_df

Unnamed: 0,tweet,class,list,rowid
0,@DickDugan @maddow They make much more selling...,-1,"[0, 1, 0]",0
1,@theheraldsun How does an unvaccinated person ...,-1,"[0, 1, 0]",1
2,@cameronjowens @leighsales We have much higher...,1,"[1, 0, 0]",2
3,Officials I trust fear there is not enough int...,1,"[1, 0, 0]",3
4,"My daughter 30 years old, care worker. Double ...",-1,"[0, 1, 0]",4
...,...,...,...,...
4837,M?ori tribe tells anti-Covid vaccine protester...,0,"[0, 0, 1]",4837
4838,Anti-vaccine protesters display Nazi symbols o...,0,"[0, 0, 1]",4838
4839,@Joc_face @TravisR96776163 @ElijahSchaffer If ...,1,"[1, 0, 0]",4839
4840,"@ponderousthings @1NewsNZ @jordyn_rudd ""The SA...",0,"[0, 0, 1]",4840


In [5]:
# Defining some key variables that will be used later on in the training
MAX_LEN = 150
TRAIN_BATCH_SIZE = 8
VALID_BATCH_SIZE = 4
EPOCHS = 15
LEARNING_RATE = 1e-05
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [6]:
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.comment_text = dataframe.tweet
        self.targets = self.data.list
        self.max_len = max_len

    def __len__(self):
        return len(self.comment_text)

    def __getitem__(self, index):
        comment_text = str(self.comment_text[index])
        comment_text = " ".join(comment_text.split())

        inputs = self.tokenizer.encode_plus(
            comment_text,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            pad_to_max_length=True,
            return_token_type_ids=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]

        return {
            'rowid': self.data.loc[index, 'rowid'],
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets[index], dtype=torch.float)
        }

In [7]:
# Creating the customized model, by adding a drop out and a dense layer on top of distil bert to get the final output for the model. 

class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        self.l1 = transformers.BertModel.from_pretrained('bert-base-uncased')
        self.l2 = torch.nn.Dropout(0.3)
        self.l3 = torch.nn.Linear(768, 3)
    
    def forward(self, ids, mask, token_type_ids):
        _, output_1= self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids, return_dict=False)
        output_2 = self.l2(output_1)
        output = self.l3(output_2)
        return output


In [8]:
def loss_fn(outputs, targets):
    return torch.nn.BCEWithLogitsLoss()(outputs, targets)

In [9]:
def train(model, optimizer, epoch, data_loader):
    model.train()
    for _,data in enumerate(data_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        optimizer.zero_grad()
        loss = loss_fn(outputs, targets)
        if _%5000==0:
            print(f'Epoch: {epoch}, Loss:  {loss.item()}')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [10]:
def data_augment(data, augdata, aug_prop = [0, 0, 0], aug_type=1):
    classes = [-1,0,1]
    if aug_type > 0:
        data = pd.concat([data, augdata], ignore_index=True)
        y_train = pd.concat((y_train, y_aug))
    
    for i in range(len(classes)):
        I = np.where(data['class']==classes[i])[0]
        for j in range(aug_prop[i]):
            i1 = np.random.randint(I.shape[0])
            i2 = np.random.randint(I.shape[0])
            txt = data.loc[I[i1],'tweet'] + ' ' + data.loc[I[i2],'tweet']
            temp = pd.Series(txt, name='tweet').to_frame()
            temp['class'] = classes[i]
            data = pd.concat([data, temp], ignore_index=True)
            
    return data

In [11]:
def validation(model, data_loader):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    dlist = []
    with torch.no_grad():
        for i, data in enumerate(data_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            dlist.extend(data['rowid'].cpu().detach().numpy().tolist())
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    return dlist, fin_outputs, fin_targets

In [12]:
from sklearn.model_selection import KFold
import pickle

numFolds = 10
EPOCHS = 15
kf = KFold(n_splits = numFolds, shuffle = True, random_state = 1234)

train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

test_params = {'batch_size': VALID_BATCH_SIZE,
                'shuffle': False,
                'num_workers': 0
                }

train_split = []
test_split = []

i = 0
for train_index, test_index in kf.split(new_df):
    i = i + 1
    print('\nFold', i)
    train_split.append(train_index)
    test_split.append(test_index)
    train_dataset = new_df.iloc[train_index].copy().reset_index()
    test_dataset =  new_df.iloc[test_index].copy().reset_index()
    train_dataset = data_augment(train_dataset, augdata, aug_prop=[500,200,0], aug_type=0)
    train_dataset['list'] = train_dataset['class'].apply(classList)
    
    print('Training class distribution:')
    distrib = train_dataset['class'].value_counts()
    print(distrib)
    probs = distrib/sum(distrib)
    print(probs)

    print('Test class distribution:')
    distrib = test_dataset['class'].value_counts()
    print(distrib)
    probs = distrib/sum(distrib)
    print(probs)

    train_dataset = train_dataset.drop(columns=['class'])
    test_dataset = test_dataset.drop(columns=['class'])

    print("FULL Dataset: {}".format(new_df.shape))
    print("TRAIN Dataset: {}".format(train_dataset.shape))
    print("TEST Dataset: {}".format(test_dataset.shape))

    training_set = CustomDataset(train_dataset, tokenizer, MAX_LEN)
    testing_set = CustomDataset(test_dataset, tokenizer, MAX_LEN)    
    training_loader = DataLoader(training_set, **train_params)
    testing_loader = DataLoader(testing_set, **test_params)
   
    model = BERTClass()
    model.to(device)
    optimizer = torch.optim.Adam(params =  model.parameters(), lr=LEARNING_RATE)

    for epoch in range(EPOCHS):
        train(model, optimizer, epoch, training_loader)
        
    dlist, outputs, targets = validation(model, testing_loader)

    pred = pd.DataFrame(dlist, columns=['rowid'])
    temp = pd.DataFrame(targets)
    pred['truth'] = temp.idxmax(axis=1)
    temp = pd.DataFrame(outputs)
    pred['predictions'] = temp.idxmax(axis=1)
    if i == 1:
        result = pred
    else:
        result = pd.concat([result, pred], ignore_index=True)
    
    outputs = np.array(outputs) >= 0.5
    accuracy = metrics.accuracy_score(targets, outputs)
    f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
    f1_score_macro = metrics.f1_score(targets, outputs, average='macro')
    cm = multilabel_confusion_matrix(targets, outputs)
    print(f"Accuracy Score = {accuracy}")
    print(f"F1 Score (Micro) = {f1_score_micro}")
    print(f"F1 Score (Macro) = {f1_score_macro}")
    print(f"Confusion matrix:")
    print(cm)   

    with open(resultdir + 'model' + str(i) + '_cv','wb') as model_file:
        pickle.dump(model, model_file)

result = result.sort_values(by=['rowid'])
result = result.set_index('rowid')
result = pd.concat((result, new_df), axis=1, ignore_index=True)
result.iloc[result.iloc[:,0]==1,0] = -1
result.iloc[result.iloc[:,0]==0,0] = 1
result.iloc[result.iloc[:,0]==2,0] = 0
result.iloc[result.iloc[:,1]==1,1] = -1
result.iloc[result.iloc[:,1]==0,1] = 1
result.iloc[result.iloc[:,1]==2,1] = 0
result = result.rename(columns={0: 'class', 1: 'predicted', 2: 'tweet'})
result = result.drop(columns=[3,4,5])
result.to_csv(resultdir + 'bert_results.csv', index=False)

with open(resultdir + 'train_split_cv','wb') as split_file:
    pickle.dump(train_split, split_file)        

with open(resultdir + 'test_split_cv','wb') as split_file:
    pickle.dump(test_split, split_file)        


Fold 1
Training class distribution:
-1    1764
 1    1694
 0    1599
Name: class, dtype: int64
-1    0.348823
 1    0.334981
 0    0.316195
Name: class, dtype: float64
Test class distribution:
 1    181
 0    166
-1    138
Name: class, dtype: int64
 1    0.373196
 0    0.342268
-1    0.284536
Name: class, dtype: float64
FULL Dataset: (4842, 4)
TRAIN Dataset: (5057, 4)
TEST Dataset: (485, 4)


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max 

Epoch: 0, Loss:  0.7574185729026794
Epoch: 1, Loss:  0.5307643413543701
Epoch: 2, Loss:  0.37962228059768677
Epoch: 3, Loss:  0.09457385540008545
Epoch: 4, Loss:  0.023066159337759018
Epoch: 5, Loss:  0.02703840099275112
Epoch: 6, Loss:  0.0077627371065318584
Epoch: 7, Loss:  0.05159822106361389
Epoch: 8, Loss:  0.017645718529820442
Epoch: 9, Loss:  0.0029216192197054625
Epoch: 10, Loss:  0.0055831847712397575
Epoch: 11, Loss:  0.020425375550985336
Epoch: 12, Loss:  0.0016577604692429304
Epoch: 13, Loss:  0.002340245060622692
Epoch: 14, Loss:  0.0013249560724943876
Accuracy Score = 0.7917525773195876
F1 Score (Micro) = 0.7983367983367983
F1 Score (Macro) = 0.791097014179665
Confusion matrix:
[[[257  47]
  [ 24 157]]

 [[335  12]
  [ 51  87]]

 [[285  34]
  [ 26 140]]]

Fold 2
Training class distribution:
-1    1754
 1    1693
 0    1610
Name: class, dtype: int64
-1    0.346846
 1    0.334783
 0    0.318371
Name: class, dtype: float64
Test class distribution:
 1    182
 0    155
-1    1

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0, Loss:  0.6897427439689636
Epoch: 1, Loss:  0.451312392950058
Epoch: 2, Loss:  0.2195434868335724
Epoch: 3, Loss:  0.2710755467414856
Epoch: 4, Loss:  0.05866151303052902
Epoch: 5, Loss:  0.01553037017583847
Epoch: 6, Loss:  0.011355718597769737
Epoch: 7, Loss:  0.005597327835857868
Epoch: 8, Loss:  0.014491323381662369
Epoch: 9, Loss:  0.003972050733864307
Epoch: 10, Loss:  0.283845454454422
Epoch: 11, Loss:  0.005987305194139481
Epoch: 12, Loss:  0.0027289590798318386
Epoch: 13, Loss:  0.001980640459805727
Epoch: 14, Loss:  0.0016065735835582018
Accuracy Score = 0.7896907216494845
F1 Score (Micro) = 0.7966804979253113
F1 Score (Macro) = 0.7928362147112148
Confusion matrix:
[[[247  56]
  [ 24 158]]

 [[323  14]
  [ 54  94]]

 [[305  25]
  [ 23 132]]]

Fold 3
Training class distribution:
-1    1765
 1    1664
 0    1629
Name: class, dtype: int64
-1    0.348952
 1    0.328984
 0    0.322064
Name: class, dtype: float64
Test class distribution:
 1    211
-1    137
 0    136
Name:

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0, Loss:  0.7205302119255066
Epoch: 1, Loss:  0.5164095759391785
Epoch: 2, Loss:  0.24578754603862762
Epoch: 3, Loss:  0.046165697276592255
Epoch: 4, Loss:  0.03772345185279846
Epoch: 5, Loss:  0.01348819024860859
Epoch: 6, Loss:  0.007164060138165951
Epoch: 7, Loss:  0.3766392171382904
Epoch: 8, Loss:  0.06327370554208755
Epoch: 9, Loss:  0.003065291792154312
Epoch: 10, Loss:  0.05517382547259331
Epoch: 11, Loss:  0.003823998384177685
Epoch: 12, Loss:  0.002751957392320037
Epoch: 13, Loss:  0.003306345781311393
Epoch: 14, Loss:  0.0023069935850799084
Accuracy Score = 0.8388429752066116
F1 Score (Micro) = 0.8518134715025907
F1 Score (Macro) = 0.8478281842767824
Confusion matrix:
[[[242  31]
  [ 25 186]]

 [[324  23]
  [ 33 104]]

 [[332  16]
  [ 15 121]]]

Fold 4
Training class distribution:
-1    1762
 1    1703
 0    1593
Name: class, dtype: int64
-1    0.348359
 1    0.336694
 0    0.314947
Name: class, dtype: float64
Test class distribution:
 1    172
 0    172
-1    140
Nam

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0, Loss:  0.7283346056938171
Epoch: 1, Loss:  0.6413623690605164
Epoch: 2, Loss:  0.19911178946495056
Epoch: 3, Loss:  0.1009327620267868
Epoch: 4, Loss:  0.11905509233474731
Epoch: 5, Loss:  0.014507199637591839
Epoch: 6, Loss:  0.01083921454846859
Epoch: 7, Loss:  0.00963563285768032
Epoch: 8, Loss:  0.005300753749907017
Epoch: 9, Loss:  0.0028306134045124054
Epoch: 10, Loss:  0.004030347801744938
Epoch: 11, Loss:  0.001765971421264112
Epoch: 12, Loss:  0.0016210083849728107
Epoch: 13, Loss:  0.002230075653642416
Epoch: 14, Loss:  0.0013251041527837515
Accuracy Score = 0.8223140495867769
F1 Score (Micro) = 0.8319502074688797
F1 Score (Macro) = 0.8315366050274048
Confusion matrix:
[[[279  33]
  [ 29 143]]

 [[323  21]
  [ 27 113]]

 [[287  25]
  [ 27 145]]]

Fold 5
Training class distribution:
-1    1760
 1    1691
 0    1607
Name: class, dtype: int64
-1    0.347964
 1    0.334322
 0    0.317715
Name: class, dtype: float64
Test class distribution:
 1    184
 0    158
-1    142


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0, Loss:  0.7713998556137085
Epoch: 1, Loss:  0.3557954728603363
Epoch: 2, Loss:  0.17199425399303436
Epoch: 3, Loss:  0.1783718764781952
Epoch: 4, Loss:  0.01905936934053898
Epoch: 5, Loss:  0.009451339952647686
Epoch: 6, Loss:  0.01100762840360403
Epoch: 7, Loss:  0.008906076662242413
Epoch: 8, Loss:  0.0033414780627936125
Epoch: 9, Loss:  0.004345997236669064
Epoch: 10, Loss:  0.018151642754673958
Epoch: 11, Loss:  0.004111660644412041
Epoch: 12, Loss:  0.0020864687394350767
Epoch: 13, Loss:  0.09593334794044495
Epoch: 14, Loss:  0.0017705465434119105
Accuracy Score = 0.7954545454545454
F1 Score (Micro) = 0.8020833333333333
F1 Score (Macro) = 0.8006679245617297
Confusion matrix:
[[[279  21]
  [ 56 128]]

 [[308  34]
  [ 30 112]]

 [[290  36]
  [ 13 145]]]

Fold 6
Training class distribution:
-1    1772
 1    1679
 0    1607
Name: class, dtype: int64
-1    0.350336
 1    0.331949
 0    0.317715
Name: class, dtype: float64
Test class distribution:
 1    196
 0    158
-1    130


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0, Loss:  0.6273123025894165
Epoch: 1, Loss:  0.5169473886489868
Epoch: 2, Loss:  0.42934808135032654
Epoch: 3, Loss:  0.2000660002231598
Epoch: 4, Loss:  0.031231652945280075
Epoch: 5, Loss:  0.019908733665943146
Epoch: 6, Loss:  0.014909572899341583
Epoch: 7, Loss:  0.006176270544528961
Epoch: 8, Loss:  0.0036442596465349197
Epoch: 9, Loss:  0.005515416618436575
Epoch: 10, Loss:  0.0050139399245381355
Epoch: 11, Loss:  0.002757232403382659
Epoch: 12, Loss:  0.0031923342030495405
Epoch: 13, Loss:  0.001811258029192686
Epoch: 14, Loss:  0.0013450270052999258
Accuracy Score = 0.7975206611570248
F1 Score (Micro) = 0.8016614745586709
F1 Score (Macro) = 0.7899651788540677
Confusion matrix:
[[[249  39]
  [ 26 170]]

 [[333  21]
  [ 47  83]]

 [[293  33]
  [ 25 133]]]

Fold 7
Training class distribution:
-1    1756
 1    1686
 0    1616
Name: class, dtype: int64
-1    0.347173
 1    0.333333
 0    0.319494
Name: class, dtype: float64
Test class distribution:
 1    189
 0    149
-1    

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0, Loss:  0.6387871503829956
Epoch: 1, Loss:  0.46367138624191284
Epoch: 2, Loss:  0.1128455251455307
Epoch: 3, Loss:  0.08668786287307739
Epoch: 4, Loss:  0.017573688179254532
Epoch: 5, Loss:  0.3624812960624695
Epoch: 6, Loss:  0.01180395856499672
Epoch: 7, Loss:  0.005551550537347794
Epoch: 8, Loss:  0.0044354163110256195
Epoch: 9, Loss:  0.0026176718529313803
Epoch: 10, Loss:  0.0026172189973294735
Epoch: 11, Loss:  0.0053107901476323605
Epoch: 12, Loss:  0.0023703603073954582
Epoch: 13, Loss:  0.004318095277994871
Epoch: 14, Loss:  0.00170547841116786
Accuracy Score = 0.8119834710743802
F1 Score (Micro) = 0.8220603537981269
F1 Score (Macro) = 0.8204144848750429
Confusion matrix:
[[[257  38]
  [ 27 162]]

 [[324  14]
  [ 38 108]]

 [[305  30]
  [ 24 125]]]

Fold 8
Training class distribution:
-1    1750
 1    1696
 0    1612
Name: class, dtype: int64
-1    0.345987
 1    0.335310
 0    0.318703
Name: class, dtype: float64
Test class distribution:
 1    179
 0    153
-1    15

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0, Loss:  0.6708971261978149
Epoch: 1, Loss:  0.4802745580673218
Epoch: 2, Loss:  0.09759895503520966
Epoch: 3, Loss:  0.07327219843864441
Epoch: 4, Loss:  0.03144267946481705
Epoch: 5, Loss:  0.012761122547090054
Epoch: 6, Loss:  0.01070667989552021
Epoch: 7, Loss:  0.008353101089596748
Epoch: 8, Loss:  0.003592776833102107
Epoch: 9, Loss:  0.003362707793712616
Epoch: 10, Loss:  0.002441544784232974
Epoch: 11, Loss:  0.0017692440887913108
Epoch: 12, Loss:  0.001761598396115005
Epoch: 13, Loss:  0.0013148672878742218
Epoch: 14, Loss:  0.0012089149095118046
Accuracy Score = 0.8202479338842975
F1 Score (Micro) = 0.8257261410788383
F1 Score (Macro) = 0.8250533428165007
Confusion matrix:
[[[262  43]
  [ 21 158]]

 [[313  19]
  [ 35 117]]

 [[311  20]
  [ 30 123]]]

Fold 9
Training class distribution:
-1    1771
 1    1683
 0    1604
Name: class, dtype: int64
-1    0.350138
 1    0.332740
 0    0.317121
Name: class, dtype: float64
Test class distribution:
 1    192
 0    161
-1    13

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0, Loss:  0.6631806492805481
Epoch: 1, Loss:  0.49677976965904236
Epoch: 2, Loss:  0.3931460380554199
Epoch: 3, Loss:  0.164155974984169
Epoch: 4, Loss:  0.1792258620262146
Epoch: 5, Loss:  0.008553668856620789
Epoch: 6, Loss:  0.007165650837123394
Epoch: 7, Loss:  0.010205637663602829
Epoch: 8, Loss:  0.008232878521084785
Epoch: 9, Loss:  0.00435230415314436
Epoch: 10, Loss:  0.0033588637597858906
Epoch: 11, Loss:  0.002432773821055889
Epoch: 12, Loss:  0.0016807178035378456
Epoch: 13, Loss:  0.0015685041435062885
Epoch: 14, Loss:  0.002277905587106943
Accuracy Score = 0.8491735537190083
F1 Score (Micro) = 0.8609958506224067
F1 Score (Macro) = 0.8592878195531909
Confusion matrix:
[[[277  15]
  [ 32 160]]

 [[324  29]
  [ 16 115]]

 [[302  21]
  [ 21 140]]]

Fold 10
Training class distribution:
-1    1764
 1    1686
 0    1608
Name: class, dtype: int64
-1    0.348754
 1    0.333333
 0    0.317912
Name: class, dtype: float64
Test class distribution:
 1    189
 0    157
-1    138


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0, Loss:  0.8256053924560547
Epoch: 1, Loss:  0.40501832962036133
Epoch: 2, Loss:  0.1368398815393448
Epoch: 3, Loss:  0.11221623420715332
Epoch: 4, Loss:  0.2938615381717682
Epoch: 5, Loss:  0.04750397428870201
Epoch: 6, Loss:  0.19520670175552368
Epoch: 7, Loss:  0.21889083087444305
Epoch: 8, Loss:  0.005484272725880146
Epoch: 9, Loss:  0.007018472068011761
Epoch: 10, Loss:  0.00233721686527133
Epoch: 11, Loss:  0.0019977649208158255
Epoch: 12, Loss:  0.0014762053033336997
Epoch: 13, Loss:  0.06719474494457245
Epoch: 14, Loss:  0.0017934240167960525
Accuracy Score = 0.8533057851239669
F1 Score (Micro) = 0.8621761658031089
F1 Score (Macro) = 0.8574866310160427
Confusion matrix:
[[[271  24]
  [ 17 172]]

 [[322  24]
  [ 28 110]]

 [[310  17]
  [ 23 134]]]


In [13]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score

print('\nAccuracy:' + str(accuracy_score(result['class'], result['predicted'])))
cm = confusion_matrix(result['class'], result['predicted'])

print(cm)
print('Class Anti:')
prec = cm[0][0]/cm[:,0].sum()
recall = cm[0][0]/cm[0,:].sum()
f1 = 2*prec*recall/(prec + recall)
print('   Precision =', prec)
print('   Recall =', recall)
print('   F-measure =', f1)
print('Class Neutral:')
prec = cm[1][1]/cm[:,1].sum()
recall = cm[1][1]/cm[1,:].sum()
f1 = 2*prec*recall/(prec + recall)
print('   Precision =', prec)
print('   Recall =', recall)
print('   F-measure =', f1)
print('Class Pro:')
prec = cm[2][2]/cm[:,2].sum()
recall = cm[2][2]/cm[2,:].sum()
f1 = 2*prec*recall/(prec + recall)
print('   Precision =', prec)
print('   Recall =', recall)
print('   F-measure =', f1)


Accuracy:0.8263114415530772
[[1049  135  218]
 [  81 1349  135]
 [ 141  131 1603]]
Class Anti:
   Precision = 0.8253343823760818
   Recall = 0.7482168330955777
   F-measure = 0.784885895997007
Class Neutral:
   Precision = 0.8352941176470589
   Recall = 0.8619808306709266
   F-measure = 0.8484276729559749
Class Pro:
   Precision = 0.8195296523517382
   Recall = 0.8549333333333333
   F-measure = 0.8368572174367007
