# Results for Feb 3 data

In [1]:
import numpy as np
import pandas as pd
from sklearn import metrics
import transformers
import torch
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, BertModel, BertConfig
from sklearn.metrics import multilabel_confusion_matrix

datadir = '../data/'
resultdir = '../results_vaccine/'
numFolds = 10

In [2]:
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'
print(device)

cuda


In [3]:
df = pd.read_csv(datadir + "vaccine.csv")
df = df.drop(columns=['class','vaccine','pro','anti','neutral'])
df['related'] = df['vax_class']
df['unrelated'] = 1 - df['vax_class']
df = df.rename(columns={'vax_class':'class'})
df['list'] = df[df.columns[2:]].values.tolist()
new_df = df[['tweet', 'class', 'list']].copy()
new_df['rowid'] = new_df.index
new_df

Unnamed: 0,tweet,class,list,rowid
0,@DickDugan @maddow They make much more selling...,1,"[1, 0]",0
1,@theheraldsun How does an unvaccinated person ...,1,"[1, 0]",1
2,@cameronjowens @leighsales We have much higher...,1,"[1, 0]",2
3,Officials I trust fear there is not enough int...,1,"[1, 0]",3
4,"My daughter 30 years old, care worker. Double ...",1,"[1, 0]",4
...,...,...,...,...
10831,M?ori tribe tells anti-Covid vaccine protester...,1,"[1, 0]",10831
10832,Anti-vaccine protesters display Nazi symbols o...,1,"[1, 0]",10832
10833,@Joc_face @TravisR96776163 @ElijahSchaffer If ...,1,"[1, 0]",10833
10834,"@ponderousthings @1NewsNZ @jordyn_rudd ""The SA...",1,"[1, 0]",10834


In [4]:
# Defining some key variables that will be used later on in the training
MAX_LEN = 150
TRAIN_BATCH_SIZE = 8
VALID_BATCH_SIZE = 4
EPOCHS = 10
LEARNING_RATE = 1e-05
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [5]:
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.comment_text = dataframe.tweet
        self.targets = self.data.list
        self.max_len = max_len

    def __len__(self):
        return len(self.comment_text)

    def __getitem__(self, index):
        comment_text = str(self.comment_text[index])
        comment_text = " ".join(comment_text.split())

        inputs = self.tokenizer.encode_plus(
            comment_text,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            pad_to_max_length=True,
            return_token_type_ids=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]

        return {
            'rowid': self.data.loc[index, 'rowid'],
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets[index], dtype=torch.float)
        }

In [6]:
# Creating the customized model, by adding a drop out and a dense layer on top of distil bert to get the final output for the model. 

class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        self.l1 = transformers.BertModel.from_pretrained('bert-base-uncased')
        self.l2 = torch.nn.Dropout(0.3)
        self.l3 = torch.nn.Linear(768, 2)
    
    def forward(self, ids, mask, token_type_ids):
        _, output_1= self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids, return_dict=False)
        output_2 = self.l2(output_1)
        output = self.l3(output_2)
        return output


In [7]:
def loss_fn(outputs, targets):
    return torch.nn.BCEWithLogitsLoss()(outputs, targets)

In [8]:
def train(model, optimizer, epoch, data_loader):
    model.train()
    for _,data in enumerate(data_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        optimizer.zero_grad()
        loss = loss_fn(outputs, targets)
        if _%5000==0:
            print(f'Epoch: {epoch}, Loss:  {loss.item()}')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [9]:
def validation(model, data_loader):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    dlist = []
    with torch.no_grad():
        for i, data in enumerate(data_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            dlist.extend(data['rowid'].cpu().detach().numpy().tolist())
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    return dlist, fin_outputs, fin_targets

In [10]:
from sklearn.model_selection import KFold
import pickle

EPOCHS = 10
kf = KFold(n_splits = numFolds, shuffle = True, random_state = 1234)

train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

test_params = {'batch_size': VALID_BATCH_SIZE,
                'shuffle': False,
                'num_workers': 0
                }

train_split = []
test_split = []

i = 0
for train_index, test_index in kf.split(new_df):
    i = i + 1
    print('\nFold', i)
    train_split.append(train_index)
    test_split.append(test_index)
    train_dataset = new_df.iloc[train_index].copy().reset_index()
    test_dataset =  new_df.iloc[test_index].copy().reset_index()
    
    print('Training class distribution:')
    distrib = train_dataset['class'].value_counts()
    print(distrib)
    probs = distrib/sum(distrib)
    print(probs)

    print('Test class distribution:')
    distrib = test_dataset['class'].value_counts()
    print(distrib)
    probs = distrib/sum(distrib)
    print(probs)

    train_dataset = train_dataset.drop(columns=['class'])
    test_dataset = test_dataset.drop(columns=['class'])

    print("FULL Dataset: {}".format(new_df.shape))
    print("TRAIN Dataset: {}".format(train_dataset.shape))
    print("TEST Dataset: {}".format(test_dataset.shape))

    training_set = CustomDataset(train_dataset, tokenizer, MAX_LEN)
    testing_set = CustomDataset(test_dataset, tokenizer, MAX_LEN)    
    training_loader = DataLoader(training_set, **train_params)
    testing_loader = DataLoader(testing_set, **test_params)
   
    model = BERTClass()
    model.to(device)
    optimizer = torch.optim.Adam(params =  model.parameters(), lr=LEARNING_RATE)

    for epoch in range(EPOCHS):
        train(model, optimizer, epoch, training_loader)
        
    dlist, outputs, targets = validation(model, testing_loader)

    pred = pd.DataFrame(dlist, columns=['rowid'])
    temp = pd.DataFrame(targets)
    pred['truth'] = temp.idxmax(axis=1)
    temp = pd.DataFrame(outputs)
    pred['predictions'] = temp.idxmax(axis=1)
    if i == 1:
        result = pred
    else:
        result = pd.concat([result, pred], ignore_index=True)
    
    outputs = np.array(outputs) >= 0.5
    accuracy = metrics.accuracy_score(targets, outputs)
    f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
    f1_score_macro = metrics.f1_score(targets, outputs, average='macro')
    cm = multilabel_confusion_matrix(targets, outputs)
    print(f"Accuracy Score = {accuracy}")
    print(f"F1 Score (Micro) = {f1_score_micro}")
    print(f"F1 Score (Macro) = {f1_score_macro}")
    print(f"Confusion matrix:")
    print(cm)   

    with open(resultdir + 'model' + str(i) + '_cv_vax','wb') as model_file:
        pickle.dump(model, model_file)

result = result.sort_values(by=['rowid'])
result = result.set_index('rowid')
result = pd.concat((result, new_df), axis=1, ignore_index=True)
result.iloc[result.iloc[:,0]==1,0] = -1
result.iloc[result.iloc[:,0]==0,0] = 1
result.iloc[result.iloc[:,0]==2,0] = 0
result.iloc[result.iloc[:,1]==1,1] = -1
result.iloc[result.iloc[:,1]==0,1] = 1
result.iloc[result.iloc[:,1]==2,1] = 0
result = result.rename(columns={0: 'class', 1: 'predicted', 2: 'tweet'})
result = result.drop(columns=[3,4,5])
result.to_csv(resultdir + 'bert_results_vax.csv', index=False)

with open(resultdir + 'train_split_cv_vax','wb') as split_file:
    pickle.dump(train_split, split_file)        

with open(resultdir + 'test_split_cv_vax','wb') as split_file:
    pickle.dump(test_split, split_file)        


Fold 1
Training class distribution:
0    5392
1    4360
Name: class, dtype: int64
0    0.552912
1    0.447088
Name: class, dtype: float64
Test class distribution:
0    574
1    510
Name: class, dtype: int64
0    0.52952
1    0.47048
Name: class, dtype: float64
FULL Dataset: (10836, 4)
TRAIN Dataset: (9752, 4)
TEST Dataset: (1084, 4)


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max 

Epoch: 0, Loss:  0.7539483904838562
Epoch: 1, Loss:  0.013189608231186867
Epoch: 2, Loss:  0.008419126272201538
Epoch: 3, Loss:  0.006067648064345121
Epoch: 4, Loss:  0.06265126913785934
Epoch: 5, Loss:  0.007400728762149811
Epoch: 6, Loss:  0.004239530302584171
Epoch: 7, Loss:  0.0012353351339697838
Epoch: 8, Loss:  0.0009555773576721549
Epoch: 9, Loss:  0.00047716376138851047
Accuracy Score = 0.9898523985239852
F1 Score (Micro) = 0.9898523985239852
F1 Score (Macro) = 0.9898134835511339
Confusion matrix:
[[[570   4]
  [  7 503]]

 [[503   7]
  [  4 570]]]

Fold 2
Training class distribution:
0    5369
1    4383
Name: class, dtype: int64
0    0.550554
1    0.449446
Name: class, dtype: float64
Test class distribution:
0    597
1    487
Name: class, dtype: int64
0    0.550738
1    0.449262
Name: class, dtype: float64
FULL Dataset: (10836, 4)
TRAIN Dataset: (9752, 4)
TEST Dataset: (1084, 4)


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0, Loss:  0.6488358378410339
Epoch: 1, Loss:  0.25727513432502747
Epoch: 2, Loss:  0.33283987641334534
Epoch: 3, Loss:  0.006529924925416708
Epoch: 4, Loss:  0.013260958716273308
Epoch: 5, Loss:  0.0006545749492943287
Epoch: 6, Loss:  0.0005049912724643946
Epoch: 7, Loss:  0.0006394694792106748
Epoch: 8, Loss:  0.00029857089975848794
Epoch: 9, Loss:  0.00029682149761356413
Accuracy Score = 0.988929889298893
F1 Score (Micro) = 0.988929889298893
F1 Score (Macro) = 0.988814710100812
Confusion matrix:
[[[591   6]
  [  6 481]]

 [[481   6]
  [  6 591]]]

Fold 3
Training class distribution:
0    5353
1    4399
Name: class, dtype: int64
0    0.548913
1    0.451087
Name: class, dtype: float64
Test class distribution:
0    613
1    471
Name: class, dtype: int64
0    0.565498
1    0.434502
Name: class, dtype: float64
FULL Dataset: (10836, 4)
TRAIN Dataset: (9752, 4)
TEST Dataset: (1084, 4)


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0, Loss:  0.7504503726959229
Epoch: 1, Loss:  0.13572923839092255
Epoch: 2, Loss:  0.11758451908826828
Epoch: 3, Loss:  0.005898375064134598
Epoch: 4, Loss:  0.004119442775845528
Epoch: 5, Loss:  0.0009359029936604202
Epoch: 6, Loss:  0.000823172798845917
Epoch: 7, Loss:  0.000591269985307008
Epoch: 8, Loss:  0.0003145608934573829
Epoch: 9, Loss:  0.0004518660716712475
Accuracy Score = 0.9935424354243543
F1 Score (Micro) = 0.9935424354243543
F1 Score (Macro) = 0.9934344810877926
Confusion matrix:
[[[608   5]
  [  2 469]]

 [[469   2]
  [  5 608]]]

Fold 4
Training class distribution:
0    5367
1    4385
Name: class, dtype: int64
0    0.550349
1    0.449651
Name: class, dtype: float64
Test class distribution:
0    599
1    485
Name: class, dtype: int64
0    0.552583
1    0.447417
Name: class, dtype: float64
FULL Dataset: (10836, 4)
TRAIN Dataset: (9752, 4)
TEST Dataset: (1084, 4)


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0, Loss:  0.7121485471725464
Epoch: 1, Loss:  0.0629415437579155
Epoch: 2, Loss:  0.009030823595821857
Epoch: 3, Loss:  0.015091138891875744
Epoch: 4, Loss:  0.0029725474305450916
Epoch: 5, Loss:  0.0029793090652674437
Epoch: 6, Loss:  0.7497513294219971
Epoch: 7, Loss:  0.0004933716263622046
Epoch: 8, Loss:  0.0013040483463555574
Epoch: 9, Loss:  0.0007441674824804068
Accuracy Score = 0.9907749077490775
F1 Score (Micro) = 0.9907749077490775
F1 Score (Macro) = 0.9906824183078446
Confusion matrix:
[[[591   8]
  [  2 483]]

 [[483   2]
  [  8 591]]]

Fold 5
Training class distribution:
0    5384
1    4368
Name: class, dtype: int64
0    0.552092
1    0.447908
Name: class, dtype: float64
Test class distribution:
0    582
1    502
Name: class, dtype: int64
0    0.5369
1    0.4631
Name: class, dtype: float64
FULL Dataset: (10836, 4)
TRAIN Dataset: (9752, 4)
TEST Dataset: (1084, 4)


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0, Loss:  0.7428121566772461
Epoch: 1, Loss:  0.02162463776767254
Epoch: 2, Loss:  0.0059189279563724995
Epoch: 3, Loss:  0.003084505908191204
Epoch: 4, Loss:  0.001992512959986925
Epoch: 5, Loss:  0.833691418170929
Epoch: 6, Loss:  0.0018920109141618013
Epoch: 7, Loss:  0.0009755956125445664
Epoch: 8, Loss:  0.0015147097874432802
Epoch: 9, Loss:  0.0003533825511112809
Accuracy Score = 0.9916974169741697
F1 Score (Micro) = 0.9921622867680959
F1 Score (Macro) = 0.992153275310774
Confusion matrix:
[[[578   4]
  [  4 498]]

 [[497   5]
  [  4 578]]]

Fold 6
Training class distribution:
0    5333
1    4419
Name: class, dtype: int64
0    0.546862
1    0.453138
Name: class, dtype: float64
Test class distribution:
0    633
1    451
Name: class, dtype: int64
0    0.583948
1    0.416052
Name: class, dtype: float64
FULL Dataset: (10836, 4)
TRAIN Dataset: (9752, 4)
TEST Dataset: (1084, 4)


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0, Loss:  0.6218694448471069
Epoch: 1, Loss:  0.03179900348186493
Epoch: 2, Loss:  0.2825593948364258
Epoch: 3, Loss:  0.005758652463555336
Epoch: 4, Loss:  0.0025708521716296673
Epoch: 5, Loss:  0.0019084358355030417
Epoch: 6, Loss:  0.0008333807345479727
Epoch: 7, Loss:  0.0024322629906237125
Epoch: 8, Loss:  0.0005052213091403246
Epoch: 9, Loss:  0.0004777576250489801
Accuracy Score = 0.9944649446494465
F1 Score (Micro) = 0.9944649446494465
F1 Score (Macro) = 0.9943115626410302
Confusion matrix:
[[[628   5]
  [  1 450]]

 [[450   1]
  [  5 628]]]

Fold 7
Training class distribution:
0    5375
1    4378
Name: class, dtype: int64
0    0.551112
1    0.448888
Name: class, dtype: float64
Test class distribution:
0    591
1    492
Name: class, dtype: int64
0    0.545706
1    0.454294
Name: class, dtype: float64
FULL Dataset: (10836, 4)
TRAIN Dataset: (9753, 4)
TEST Dataset: (1083, 4)


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0, Loss:  0.7362067103385925
Epoch: 1, Loss:  0.028943557292222977
Epoch: 2, Loss:  0.12668374180793762
Epoch: 3, Loss:  0.0036054830998182297
Epoch: 4, Loss:  0.0017478989902883768
Epoch: 5, Loss:  0.023455841466784477
Epoch: 6, Loss:  0.002038294682279229
Epoch: 7, Loss:  0.32143735885620117
Epoch: 8, Loss:  0.0003479243896435946
Epoch: 9, Loss:  0.00017948159074876457
Accuracy Score = 0.9879963065558633
F1 Score (Micro) = 0.9879963065558633
F1 Score (Macro) = 0.9878757346767423
Confusion matrix:
[[[589   2]
  [ 11 481]]

 [[481  11]
  [  2 589]]]

Fold 8
Training class distribution:
0    5370
1    4383
Name: class, dtype: int64
0    0.5506
1    0.4494
Name: class, dtype: float64
Test class distribution:
0    596
1    487
Name: class, dtype: int64
0    0.550323
1    0.449677
Name: class, dtype: float64
FULL Dataset: (10836, 4)
TRAIN Dataset: (9753, 4)
TEST Dataset: (1083, 4)


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0, Loss:  0.6764918565750122
Epoch: 1, Loss:  0.024465691298246384
Epoch: 2, Loss:  0.009615588933229446
Epoch: 3, Loss:  0.06839737296104431
Epoch: 4, Loss:  0.0027164011262357235
Epoch: 5, Loss:  0.000872431555762887
Epoch: 6, Loss:  0.0009081641328521073
Epoch: 7, Loss:  0.0005737596075050533
Epoch: 8, Loss:  0.002161347074434161
Epoch: 9, Loss:  0.00040542270289734006
Accuracy Score = 0.9944598337950139
F1 Score (Micro) = 0.9944598337950139
F1 Score (Macro) = 0.9944031393409865
Confusion matrix:
[[[593   3]
  [  3 484]]

 [[484   3]
  [  3 593]]]

Fold 9
Training class distribution:
0    5371
1    4382
Name: class, dtype: int64
0    0.550702
1    0.449298
Name: class, dtype: float64
Test class distribution:
0    595
1    488
Name: class, dtype: int64
0    0.5494
1    0.4506
Name: class, dtype: float64
FULL Dataset: (10836, 4)
TRAIN Dataset: (9753, 4)
TEST Dataset: (1083, 4)


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0, Loss:  0.7305500507354736
Epoch: 1, Loss:  0.020016146823763847
Epoch: 2, Loss:  0.011361181735992432
Epoch: 3, Loss:  0.015954483300447464
Epoch: 4, Loss:  0.0035238948184996843
Epoch: 5, Loss:  0.0017681794706732035
Epoch: 6, Loss:  0.003286840161308646
Epoch: 7, Loss:  0.001761268824338913
Epoch: 8, Loss:  0.000645893276669085
Epoch: 9, Loss:  0.0009307905565947294
Accuracy Score = 0.9824561403508771
F1 Score (Micro) = 0.9829257037378866
F1 Score (Macro) = 0.9827909499457617
Confusion matrix:
[[[588   7]
  [ 11 477]]

 [[476  12]
  [  7 588]]]

Fold 10
Training class distribution:
0    5380
1    4373
Name: class, dtype: int64
0    0.551625
1    0.448375
Name: class, dtype: float64
Test class distribution:
0    586
1    497
Name: class, dtype: int64
0    0.54109
1    0.45891
Name: class, dtype: float64
FULL Dataset: (10836, 4)
TRAIN Dataset: (9753, 4)
TEST Dataset: (1083, 4)


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0, Loss:  0.7787419557571411
Epoch: 1, Loss:  0.012834059074521065
Epoch: 2, Loss:  0.007244109641760588
Epoch: 3, Loss:  0.5341516733169556
Epoch: 4, Loss:  0.007459256798028946
Epoch: 5, Loss:  0.002217660192400217
Epoch: 6, Loss:  0.0012661612126976252
Epoch: 7, Loss:  0.0008571565849706531
Epoch: 8, Loss:  0.0007006109226495028
Epoch: 9, Loss:  0.0002969239139929414
Accuracy Score = 0.989843028624192
F1 Score (Micro) = 0.989843028624192
F1 Score (Macro) = 0.9897691993902571
Confusion matrix:
[[[582   4]
  [  7 490]]

 [[490   7]
  [  4 582]]]


In [11]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score

print('\nAccuracy:' + str(accuracy_score(result['class'], result['predicted'])))
cm = confusion_matrix(result['class'], result['predicted'])

print(cm)
print('Class Anti:')
prec = cm[0][0]/cm[:,0].sum()
recall = cm[0][0]/cm[0,:].sum()
f1 = 2*prec*recall/(prec + recall)
print('   Precision =', prec)
print('   Recall =', recall)
print('   F-measure =', f1)
print('Class Neutral:')
prec = cm[1][1]/cm[:,1].sum()
recall = cm[1][1]/cm[1,:].sum()
f1 = 2*prec*recall/(prec + recall)
print('   Precision =', prec)
print('   Recall =', recall)
print('   F-measure =', f1)


Accuracy:0.9905869324473976
[[5918   48]
 [  54 4816]]
Class Anti:
   Precision = 0.9909578030810449
   Recall = 0.9919544083137781
   F-measure = 0.9914558552521361
Class Neutral:
   Precision = 0.9901315789473685
   Recall = 0.988911704312115
   F-measure = 0.9895212656667352
