# Install Transformers Library

In [None]:
!pip install transformers==3

Collecting transformers==3
  Downloading transformers-3.0.0-py3-none-any.whl (754 kB)
[?25l[K     |▍                               | 10 kB 23.0 MB/s eta 0:00:01[K     |▉                               | 20 kB 23.5 MB/s eta 0:00:01[K     |█▎                              | 30 kB 18.7 MB/s eta 0:00:01[K     |█▊                              | 40 kB 15.3 MB/s eta 0:00:01[K     |██▏                             | 51 kB 5.6 MB/s eta 0:00:01[K     |██▋                             | 61 kB 6.1 MB/s eta 0:00:01[K     |███                             | 71 kB 5.5 MB/s eta 0:00:01[K     |███▌                            | 81 kB 6.2 MB/s eta 0:00:01[K     |████                            | 92 kB 6.1 MB/s eta 0:00:01[K     |████▍                           | 102 kB 5.4 MB/s eta 0:00:01[K     |████▊                           | 112 kB 5.4 MB/s eta 0:00:01[K     |█████▏                          | 122 kB 5.4 MB/s eta 0:00:01[K     |█████▋                          | 133 kB 5.4 MB/s e

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import transformers
from transformers import AutoModel, BertTokenizerFast

# specify GPU
device = torch.device("cuda")

# Load Dataset

In [None]:
df = pd.read_csv("/content/train_new_descriptorname.csv")
df.head()


Unnamed: 0,text,label
0,"Clostridium difficile Diarrhea,microbiology En...",C
1,"Abnormalities, Drug-Induced Pregnancy Complica...",B
2,Emigration and Immigration Tuberculin Test Tub...,B
3,"Fibromyalgia,therapy",A
4,"Anxiety,physiopathology Raynaud Disease,physio...",C


In [None]:

df_=df.copy()
df_['label'] =df_['label'].replace(8,0)
df['label'] = df_['label']

In [None]:
df.shape

(781, 2)

In [None]:
df = df[:391]
print(df.shape)

(391, 2)


In [None]:
# check class distribution
df['label'].value_counts(normalize = True)

6    0.263427
7    0.248082
5    0.225064
3    0.084399
0    0.076726
4    0.040921
2    0.033248
1    0.028133
Name: label, dtype: float64

# Split train dataset into train, validation and test sets

In [None]:
train_text, temp_text, train_labels, temp_labels = train_test_split(df['text'], df['label'],
                                                                    random_state=2018,
                                                                    test_size=0.3,
                                                                    stratify=df['label'])

# we will use temp_text and temp_labels to create validation and test set
val_text, test_text, val_labels, test_labels = train_test_split(temp_text, temp_labels,
                                                                random_state=2018,
                                                                test_size=0.5,
                                                                stratify=temp_labels)

# Import BERT Model and BERT Tokenizer

In [None]:
# import BERT-base pretrained model
bert = AutoModel.from_pretrained('bert-base-uncased')

# Load the BERT tokenizer
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

Downloading:   0%|          | 0.00/433 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/440M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

In [None]:
# sample data
text = ["this is a bert model tutorial", "we will fine-tune a bert model"]

# encode text
sent_id = tokenizer.batch_encode_plus(text, padding=True, return_token_type_ids=False)

In [None]:
# output
print(sent_id)

{'input_ids': [[101, 2023, 2003, 1037, 14324, 2944, 14924, 4818, 102, 0], [101, 2057, 2097, 2986, 1011, 8694, 1037, 14324, 2944, 102]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]}


# Tokenization

In [None]:
# get length of all the messages in the train set
seq_len = [len(i.split()) for i in train_text]

pd.Series(seq_len).hist(bins = 30)

NameError: ignored

In [None]:
max_seq_len = 40

In [None]:
# tokenize and encode sequences in the training set
tokens_train = tokenizer.batch_encode_plus(
    train_text.tolist(),
    max_length = max_seq_len,
    pad_to_max_length=True,
    truncation=True,
    return_token_type_ids=False
)

# tokenize and encode sequences in the validation set
tokens_val = tokenizer.batch_encode_plus(
    val_text.tolist(),
    max_length = max_seq_len,
    pad_to_max_length=True,
    truncation=True,
    return_token_type_ids=False
)

# tokenize and encode sequences in the test set
tokens_test = tokenizer.batch_encode_plus(
    test_text.tolist(),
    max_length = max_seq_len,
    pad_to_max_length=True,
    truncation=True,
    return_token_type_ids=False
)

NameError: ignored

# Convert Integer Sequences to Tensors

In [None]:
# for train set
train_seq = torch.tensor(tokens_train['input_ids'])
train_mask = torch.tensor(tokens_train['attention_mask'])
train_y = torch.tensor(train_labels.tolist())

# for validation set
val_seq = torch.tensor(tokens_val['input_ids'])
val_mask = torch.tensor(tokens_val['attention_mask'])
val_y = torch.tensor(val_labels.tolist())

# for test set
test_seq = torch.tensor(tokens_test['input_ids'])
test_mask = torch.tensor(tokens_test['attention_mask'])
test_y = torch.tensor(test_labels.tolist())

# Create DataLoaders

In [None]:
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

#define a batch size
batch_size = 32

# wrap tensors
train_data = TensorDataset(train_seq, train_mask, train_y)

# sampler for sampling the data during training
train_sampler = RandomSampler(train_data)

# dataLoader for train set
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

# wrap tensors
val_data = TensorDataset(val_seq, val_mask, val_y)

# sampler for sampling the data during training
val_sampler = SequentialSampler(val_data)

# dataLoader for validation set
val_dataloader = DataLoader(val_data, sampler = val_sampler, batch_size=batch_size)

print("create DataLoaders")

create DataLoaders


# Freeze BERT Parameters

In [None]:
# freeze all the parameters
for param in bert.parameters():
    param.requires_grad = False

# Define Model Architecture

In [None]:
class BERT_Arch(nn.Module):

    def __init__(self, bert):

      super(BERT_Arch, self).__init__()

      self.bert = bert

      # dropout layer
      self.dropout = nn.Dropout(0.1)

      # relu activation function
      self.relu =  nn.ReLU()

      # dense layer 1
      self.fc1 = nn.Linear(768,512)

      # dense layer 8 (Output layer)
      self.fc2 = nn.Linear(512,8)

      #softmax activation function
      self.softmax = nn.LogSoftmax(dim=1)

    #define the forward pass
    def forward(self, sent_id, mask):

      #pass the inputs to the model
      _, cls_hs = self.bert(sent_id, attention_mask=mask)

      x = self.fc1(cls_hs)

      x = self.relu(x)

      x = self.dropout(x)

      # output layer
      x = self.fc2(x)

      # apply softmax activation
      x = self.softmax(x)

      return x

In [None]:
# pass the pre-trained BERT to our define architecture
model = BERT_Arch(bert)

# push the model to GPU
model = model.to(device)

In [None]:
# optimizer from hugging face transformers
from transformers import AdamW

# define the optimizer
optimizer = AdamW(model.parameters(), lr = 1e-3)

# Find Class Weights

In [None]:
from sklearn.utils.class_weight import compute_class_weight

#compute the class weights
class_wts = compute_class_weight('balanced', np.unique(train_labels), train_labels)

print(class_wts)

[1.625      4.265625   3.79166667 1.48369565 3.10227273 0.55942623
 0.47395833 0.50183824]


In [None]:
# convert class weights to tensor
weights= torch.tensor(class_wts,dtype=torch.float)
weights = weights.to(device)
print(weights)
# loss function
cross_entropy  = nn.NLLLoss(weight=weights)

# number of training epochs
epochs = 10

tensor([1.6250, 4.2656, 3.7917, 1.4837, 3.1023, 0.5594, 0.4740, 0.5018],
       device='cuda:0')


# Fine-Tune BERT

In [None]:
# function to train the model
def train():

  model.train()

  total_loss, total_accuracy = 0, 0

  # empty list to save model predictions
  total_preds=[]

  # iterate over batches
  for step,batch in enumerate(train_dataloader):

    # progress update after every 50 batches.
    if step % 50 == 0 and not step == 0:
      print('  Batch {:>5,}  of  {:>5,}.'.format(step, len(train_dataloader)))

    # push the batch to gpu
    batch = [r.to(device) for r in batch]

    sent_id, mask, labels = batch

    # clear previously calculated gradients
    model.zero_grad()

    # get model predictions for the current batch
    preds = model(sent_id, mask)
    print(preds)
    print(labels)
    # compute the loss between actual and predicted values
    loss = cross_entropy(preds, labels)

    # add on to the total loss
    total_loss = total_loss + loss.item()

    # backward pass to calculate the gradients
    loss.backward()

    # clip the the gradients to 1.0. It helps in preventing the exploding gradient problem
    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

    # update parameters
    optimizer.step()

    # model predictions are stored on GPU. So, push it to CPU
    preds=preds.detach().cpu().numpy()

    # append the model predictions
    total_preds.append(preds)

  # compute the training loss of the epoch
  avg_loss = total_loss / len(train_dataloader)

  # predictions are in the form of (no. of batches, size of batch, no. of classes).
  # reshape the predictions in form of (number of samples, no. of classes)
  total_preds  = np.concatenate(total_preds, axis=0)

  #returns the loss and predictions
  return avg_loss, total_preds

In [None]:
# function for evaluating the model
def evaluate():

  print("\nEvaluating...")

  # deactivate dropout layers
  model.eval()

  total_loss, total_accuracy = 0, 0

  # empty list to save the model predictions
  total_preds = []

  # iterate over batches
  for step,batch in enumerate(val_dataloader):

    # Progress update every 50 batches.
    if step % 50 == 0 and not step == 0:

      # Calculate elapsed time in minutes.
      elapsed = format_time(time.time() - t0)

      # Report progress.
      print('  Batch {:>5,}  of  {:>5,}.'.format(step, len(val_dataloader)))

    # push the batch to gpu
    batch = [t.to(device) for t in batch]

    sent_id, mask, labels = batch

    # deactivate autograd
    with torch.no_grad():

      # model predictions
      preds = model(sent_id, mask)

      # compute the validation loss between actual and predicted values
      loss = cross_entropy(preds,labels)
      print(loss)

      total_loss = total_loss + loss.item()

      preds = preds.detach().cpu().numpy()

      total_preds.append(preds)

  # compute the validation loss of the epoch
  avg_loss = total_loss / len(val_dataloader)

  # reshape the predictions in form of (number of samples, no. of classes)
  total_preds  = np.concatenate(total_preds, axis=0)

  return avg_loss, total_preds

# Start Model Training

In [None]:
# set initial loss to infinite
best_valid_loss = float('inf')

# empty lists to store training and validation loss of each epoch
train_losses=[]
valid_losses=[]

#for each epoch
for epoch in range(epochs):

    print('\n Epoch {:} / {:}'.format(epoch + 1, epochs))

    #train model
    train_loss, _ = train()

    #evaluate model
    valid_loss, _ = evaluate()

    #save the best model
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'saved_weights.pt')

    # append training and validation loss
    train_losses.append(train_loss)
    valid_losses.append(valid_loss)

    print(f'\nTraining Loss: {train_loss:.3f}')
    print(f'Validation Loss: {valid_loss:.3f}')


 Epoch 1 / 10
tensor([[-2.1384, -2.1475, -2.0496, -2.2044, -1.9495, -1.9547, -1.8687, -2.4304],
        [-2.1272, -2.1307, -2.1464, -2.2591, -1.9292, -1.9686, -1.7978, -2.4063],
        [-2.1769, -2.1874, -2.0253, -2.1587, -1.9740, -2.0117, -1.8726, -2.2969],
        [-2.0961, -2.0737, -2.1624, -2.2536, -1.9807, -1.9493, -1.9072, -2.2787],
        [-2.0746, -2.2262, -2.0676, -2.2514, -2.0036, -2.0419, -1.9426, -2.0654],
        [-2.0556, -2.0554, -2.1774, -2.3211, -1.9623, -2.0021, -1.9262, -2.1967],
        [-2.0149, -2.0156, -2.2513, -2.2188, -1.9503, -2.0774, -1.8715, -2.3222],
        [-2.1747, -2.1401, -2.1345, -2.1838, -1.8760, -1.9926, -1.8794, -2.3477],
        [-2.1150, -2.1654, -2.1064, -2.2169, -1.9362, -2.0728, -1.8853, -2.1877],
        [-2.1354, -2.0921, -2.1524, -2.2686, -1.9251, -1.9908, -1.8285, -2.3477],
        [-2.1626, -2.1461, -2.1019, -2.2128, -1.9738, -1.9587, -1.9512, -2.1686],
        [-2.2423, -2.0492, -2.1364, -2.2687, -1.9048, -2.0185, -1.8119, -2.3191],
 

# Load Saved Model

In [None]:
#load weights of best model
path = 'saved_weights.pt'
model.load_state_dict(torch.load(path))


<All keys matched successfully>

# Get Predictions for Test Data

In [None]:
# get predictions for test data
with torch.no_grad():
  preds = model(test_seq.to(device), test_mask.to(device))
  preds = preds.detach().cpu().numpy()

In [None]:
# model's performance
preds = np.argmax(preds, axis = 1)
print(classification_report(test_y, preds))

              precision    recall  f1-score   support

           0       0.07      0.50      0.12         4
           1       0.29      1.00      0.44         2
           2       0.00      0.00      0.00         2
           3       0.00      0.00      0.00         5
           4       0.00      0.00      0.00         3
           5       0.20      0.08      0.11        13
           6       0.36      0.27      0.31        15
           7       0.00      0.00      0.00        15

    accuracy                           0.15        59
   macro avg       0.11      0.23      0.12        59
weighted avg       0.15      0.15      0.13        59



  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# confusion matrix
pd.crosstab(test_y, preds)

col_0,0,1,3,4,5,6
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,2,1,0,0,0,1
1,0,2,0,0,0,0
2,1,0,0,1,0,0
3,3,1,0,1,0,0
4,1,1,0,0,1,0
5,5,0,1,1,1,5
6,5,1,1,2,2,4
7,12,1,0,0,1,1


In [None]:
!pip install pyirr

Collecting pyirr
  Downloading https://files.pythonhosted.org/packages/d6/fd/b7bd6f7c4034e2b2821039c486ca592fe5e1db04bab104dfa3fac9e3183f/pyirr-0.84.1.1.tar.gz
Building wheels for collected packages: pyirr
  Building wheel for pyirr (setup.py) ... [?25l[?25hdone
  Created wheel for pyirr: filename=pyirr-0.84.1.1-cp37-none-any.whl size=23161 sha256=cbdbe862556f9a0462f166c8b587642c8798e66f89a61f692951d0aeb693e50e
  Stored in directory: /root/.cache/pip/wheels/aa/fc/a6/f966d32825fcc5cb68c04c4f7fe0f57a6040004538b5651d13
Successfully built pyirr
Installing collected packages: pyirr
Successfully installed pyirr-0.84.1.1


In [None]:
from pyirr import read_data, intraclass_correlation

data = [[0,1,2],[0,1,2]]
#data = read_data("anxiety")  # loads example data
intraclass_correlation(data, "twoway", "agreement")

  Fvalue = MSr / (a * MSc + b * MSe)
  v = (a * MSc + b * MSe)**2 / ((a * MSc)**2 / (nr - 1) + (b * MSe)**2 / ((ns - 1) * (nr - 1)))


          Intraclass Correlation Results          
Model: twoway
Type: agreement

Subjects = 2
Raters = 3
ICC(A,1) = 0.00

F-Test, H0: r0 = 0 ; H1 : r0 > 0
F(1.00,nan) = nan, p = nan

95%-Confidence Interval for ICC Population Values:
nan < ICC < nan

In [None]:
import sklearn
y1 = [2, 2, 3, 7, 5, 5, 3, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 3, 3, 6, 6, 6, 6, 3, 3, 6, 6, 6, 6, 6, 6, 2, 2, 6, 6, 6, 3, 3, 2, 5, 6, 6, 6, 6, 7, 7, 7, 7, 1, 1, 1, 2, 6, 6, 6, 3, 6, 7, 7, 7, 3, 3, 4, 3, 6, 6, 6, 6, 6, 3, 6, 6, 3, 3, 3, 3, 3, 7, 4, 7, 7, 7, 7, 8, 8, 8, 8, 8, 7, 7, 1, 1, 1, 3, 3, 3, 3, 2, 2, 2, 3, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 2, 6, 6, 6, 2, 3, 6, 6, 6, 6, 6, 7, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 3, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 7, 7, 7, 7, 7, 3, 3, 8, 8, 8, 7, 7, 7, 7, 3, 3, 7, 7, 6, 3, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3, 7, 3, 7, 7, 7, 7, 6, 7, 7, 2, 8, 7, 7, 8, 8, 8, 8, 8, 8, 8, 1, 1, 1, 2, 2, 2, 2, 2, 2, 4, 3, 4, 3, 3, 3, 6, 6, 7, 7, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 3, 3, 2, 8, 7, 7, 7, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 3, 8, 7, 7, 7, 7, 7, 2, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 1, 1, 1, 2, 4, 4, 4, 4, 4, 4, 7, 8, 6, 6, 6, 6, 6, 6, 6, 3, 3, 8, 8, 7, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 8, 7, 7, 7, 6, 6, 6, 6, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 8, 8, 8, 8, 2, 7, 8, 8, 8, 1, 1, 1, 2, 6, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 3, 3, 3, 5, 5, 6, 6, 6, 6, 6, 6, 6, 7, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 3, 2, 5, 5, 2, 8, 2, 7, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 6, 2, 6, 6, 6, 6, 6, 6, 7, 7, 3, 5, 7, 7, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 3, 7, 5, 5, 5, 5, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 7, 7, 1, 1, 1, 3, 5, 7, 7, 8, 2, 6, 7, 7, 6, 6, 6, 6, 6, 6, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 6, 5, 3, 6, 3, 7, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 7, 8, 8, 1, 1, 1, 7, 7, 7, 8, 2, 5, 6, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 8, 8, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 2, 2, 2, 2, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 3, 3, 6, 6, 6, 5, 3, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 8, 3, 8, 8, 8, 2, 2, 2, 3, 2, 5, 5, 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 1, 1, 1, 3, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 3, 6, 2, 3, 3, 6, 6, 6, 6, 3, 3, 3, 8, 8, 8, 3, 3, 8, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 3, 3, 6, 6, 6, 6, 6, 3, 6, 2, 2, 5, 6, 6, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, 2, 2, 6, 3, 5, 6, 6, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 3, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 3, 6, 6, 6, 7, 7, 7, 2, 2, 2, 2, 2, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 3, 3, 6, 6, 7, 7, 7, 7, 3, 3, 3, 7, 3, 3, 3, 7, 7, 7, 7, 7, 8]
y2 = [2, 2, 3, 7, 5, 5, 3, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 3, 3, 6, 6, 6, 6, 3, 3, 6, 6, 6, 6, 6, 6, 2, 2, 6, 6, 6, 3, 3, 2, 5, 6, 6, 6, 6, 7, 7, 7, 7, 1, 1, 1, 2, 6, 6, 6, 3, 6, 7, 7, 7, 3, 3, 4, 3, 6, 6, 6, 6, 6, 3, 6, 6, 3, 3, 3, 3, 3, 7, 4, 7, 7, 7, 7, 8, 8, 8, 8, 8, 7, 7, 1, 1, 1, 3, 3, 3, 3, 2, 2, 2, 3, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 2, 6, 6, 6, 2, 3, 6, 6, 6, 6, 6, 7, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 3, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 7, 7, 7, 7, 7, 3, 3, 8, 8, 8, 7, 7, 7, 7, 3, 3, 3, 7, 7, 7, 7, 7, 7, 6, 2, 2, 2, 2, 2, 2, 2, 8, 7, 3, 7, 7, 7, 6, 6, 7, 7, 8, 8, 7, 7, 8, 8, 7, 7, 7, 7, 8, 1, 1, 1, 2, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 6, 6, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5, 3, 7, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 3, 3, 3, 3, 3, 3, 8, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 7, 7, 5, 5, 5, 7, 7, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 7, 7, 7, 7, 5, 5, 7, 7, 7, 7, 7, 7, 1, 1, 1, 2, 4, 4, 4, 4, 4, 4, 7, 2, 5, 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 8, 7, 5, 7, 7, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 8, 5, 5, 5, 5, 3, 7, 7, 7, 7, 8, 3, 7, 7, 7, 7, 5, 5, 5, 6, 8, 8, 7, 7, 8, 8, 8, 7, 1, 1, 1, 5, 5, 5, 6, 6, 6, 6, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 3, 3, 3, 3, 5, 5, 6, 6, 6, 6, 6, 6, 6, 7, 6, 5, 5, 5, 5, 5, 5, 7, 6, 7, 7, 7, 3, 6, 7, 6, 7, 7, 7, 8, 8, 7, 7, 7, 7, 7, 2, 2, 4, 4, 4, 7, 8, 8, 5, 6, 6, 5, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 7, 7, 6, 6, 6, 6, 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 7, 3, 5, 7, 6, 6, 6, 6, 6, 5, 6, 6, 6, 6, 6, 6, 7, 5, 5, 5, 5, 5, 6, 6, 6, 6, 3, 3, 6, 6, 6, 6, 6, 6, 7, 6, 6, 3, 7, 7, 7, 7, 7, 2, 8, 7, 5, 5, 5, 5, 5, 5, 5, 7, 7, 1, 1, 1, 3, 7, 7, 7, 8, 3, 7, 7, 7, 3, 7, 5, 5, 5, 5, 7, 7, 5, 5, 5, 5, 5, 5, 5, 6, 6, 7, 3, 3, 6, 6, 6, 3, 7, 7, 3, 4, 7, 7, 6, 6, 6, 6, 6, 6, 7, 7, 6, 6, 6, 6, 6, 6, 8, 8, 3, 3, 8, 1, 1, 1, 7, 7, 4, 8, 6, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 2, 2, 7, 7, 3, 3, 6, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 3, 3, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 8, 2, 2, 4, 4, 3, 6, 6, 6, 3, 3, 3, 7, 8, 7, 7, 7, 7, 7, 1, 1, 1, 2, 3, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 7, 7, 7, 7, 8, 8, 3, 3, 7, 7, 5, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 3, 3, 3, 3, 3, 7, 7, 7, 3, 7, 7, 5, 5, 5, 7, 7, 5, 5, 5, 5, 1, 1, 1, 1, 1, 2, 2, 2, 3, 5, 6, 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 3, 7, 7, 7, 7, 7, 7, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 8, 3, 3, 8, 7, 2, 5, 5, 5, 5, 7, 7, 5, 5, 5, 6, 6, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 7, 7, 8, 7, 7, 7, 3, 3, 3, 3, 3, 3, 3, 8, 8, 8, 8, 8, 8]
sklearn.metrics.cohen_kappa_score(y1,y2)

0.5345664312637979