In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm, trange
import csv
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import statistics

import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler


from keras_preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

import transformers
from transformers import get_linear_schedule_with_warmup
from seqeval.metrics import f1_score, accuracy_score

# from transformers import BertTokenizer, BertConfig
# from transformers import BertForTokenClassification, AdamW

from transformers import RobertaConfig, RobertaModel
from transformers import RobertaTokenizer, RobertaForTokenClassification, AutoTokenizer, AdamW

from transformers import AutoTokenizer, DistilBertForTokenClassification, AdamW

2023-08-03 16:55:53.838805: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Check GPU availability

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
torch.cuda.get_device_name(0)

'NVIDIA GeForce RTX 2080 Ti'

# **Preprocessing data**

In [3]:
# Reading data
import json


#open the file, and format correctly
f =open('all.jsonl', 'r')
json_object = json.dumps(f.readlines(), indent=4)
f.close()

#save better format into file
p = open('sample.txt', 'w')
for i in json_object:
    p.write(i)
p.close()

#open new file, and save each 
j = open('sample.txt', 'r')
text = json.loads(j.read())
j.close()

#compile all json dicts into a list
info = []
for i in text:
    info.append(json.loads(str(i)))
    

In [4]:
# To use the BERT, you must use the BERT Tokenizer
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased")


In [5]:
def degreekify(char):
    #char will be a character
    greek = {'α': '[alpha]', 'β':'[beta]', 'γ': '[gamma]', 'δ':'[delta]', 'ε': '[epsilon]', 'ζ':'[zeta]', 'η': '[eta]',
            'θ':'[theta]', 'ι': '[iota]', 'κ':'[kappa]', 'λ':'[lambda]', 'μ': '[mu]', 'ν':'[nu]', 'ξ':'[xi]', 'ο':'[omicron]', 'π':'[pi]', 'ρ':'[rho]',
            'σ': '[sigma]', 'τ': '[tau]', 'υ':'[upsilon]', 'φ':'[phi]', 'χ':'[chi]', 'ψ':'[psi]', 'ω':'[omega]' }
    if char in greek:
        return greek[char]
    else:
        return char

In [6]:
def pre_process(text, annotations):
    #text will be the straight sentence, info[i]['text']
    #annotations will be the list of labels, must be info[i]['annotations']
    
    text_dict = []
    
    for i in range(len(text)):
        text_dict.append(degreekify(text[i]))
    
    
    
    ann_indices = []
    def fun(x):
        return x['start_offset']
    annotations.sort(key=fun)
    
    
    
    if len(annotations)==0:
        ann_indices.append([[0, len(text)],0])
    else:
        ann_indices.append([[0, annotations[0]['start_offset']], 0])
        for i in range(len(annotations)-1):
            ann_indices.append([[annotations[i]['start_offset'], annotations[i]['end_offset']], data_tags.index(annotations[i]['label'])])
            ann_indices.append([[annotations[i]['end_offset'], annotations[i+1]['start_offset']], 0])
            
        ann_indices.append([[annotations[-1]['start_offset'], annotations[-1]['end_offset']], data_tags.index(annotations[-1]['label'])])
        ann_indices.append([[annotations[-1]['end_offset'], len(text)], 0])
         
    
    labels = []
    sentences = []
    for a in ann_indices:
        
        if a[0][1]-a[0][0] !=0:
            together = ''
            for i in range(a[0][0], a[0][1]):
                together += text_dict[i]
                
            toke = together.split()
            sentences.extend(toke)
            t = len(toke)
            if t != 0:
                temp = [data_tags[a[1]+1]] * t
                if a[1] != 0:
                    temp[0] = data_tags[a[1]]
                labels.extend(temp)
 
    return labels, sentences

In [7]:
def reduce(sent, label, slist, llist):
    lens = len(sent)
    if lens < 128:
        slist.append(sent)
        llist.append(label)
    else:
        t = lens//2
        return reduce(sent[:t], label[:t], slist, llist), reduce(sent[t:], label[t:], slist, llist)
    


In [26]:
#create labels
data_tags = ['ahhhhhhhhhhhhhhhhhhhh','0','Metal', 'M-cont' , 'Element', 'E-cont', 'Acid', 'A-cont', 'Yield' , 'Y-cont', 'Separation Method' , 'S-cont', 'Resin', 'R-cont', 'Method of Analysis', 'T-cont', 'pH', 'P-cont', 'Chemical Compound', 'H-cont', 'Organic solvent', 'O-cont', 'Element Group', 'G-cont', 'Inorganic Solvent', 'I-cont', 'Flowrate', 'F-cont', 'Acid Concentration', 'C-cont', 'Reagent', 'X-cont']

s_test, l_test = [], []
sentences, labels = [], []

for i in range(len(info)):
    l, s = pre_process(info[i]['text'], info[i]['entities'])
  
    if i % 5 == 0:
        reduce(s,l,s_test, l_test)
        
#         test_sent.append(s)
#         test_label.append(l)
    else:
        reduce(s,l,sentences, labels)
#         labels.append(l)
#         sentences.append(s)

data_tags = data_tags[1:]

In [9]:

# Determine the list of tags
tag_values = data_tags
print(tag_values)

tag_values.append("PAD")
print(tag_values)

tag2idx = {t: i for i, t in enumerate(tag_values)}
print(tag2idx)
     

['0', 'Metal', 'M-cont', 'Element', 'E-cont', 'Acid', 'A-cont', 'Yield', 'Y-cont', 'Separation Method', 'S-cont', 'Resin', 'R-cont', 'Method of Analysis', 'T-cont', 'pH', 'P-cont', 'Chemical Compound', 'H-cont', 'Organic solvent', 'O-cont', 'Element Group', 'G-cont', 'Inorganic Solvent', 'I-cont', 'Flowrate', 'F-cont', 'Acid Concentration', 'C-cont', 'Reagent', 'X-cont']
['0', 'Metal', 'M-cont', 'Element', 'E-cont', 'Acid', 'A-cont', 'Yield', 'Y-cont', 'Separation Method', 'S-cont', 'Resin', 'R-cont', 'Method of Analysis', 'T-cont', 'pH', 'P-cont', 'Chemical Compound', 'H-cont', 'Organic solvent', 'O-cont', 'Element Group', 'G-cont', 'Inorganic Solvent', 'I-cont', 'Flowrate', 'F-cont', 'Acid Concentration', 'C-cont', 'Reagent', 'X-cont', 'PAD']
{'0': 0, 'Metal': 1, 'M-cont': 2, 'Element': 3, 'E-cont': 4, 'Acid': 5, 'A-cont': 6, 'Yield': 7, 'Y-cont': 8, 'Separation Method': 9, 'S-cont': 10, 'Resin': 11, 'R-cont': 12, 'Method of Analysis': 13, 'T-cont': 14, 'pH': 15, 'P-cont': 16, 'Chemi

In [10]:
def tokenize_and_preserve_labels(sentence, text_labels):
    tokenized_sentence = []
    labels = []

    for word, label in zip(sentence, text_labels):

        # Tokenize the word and count # of subwords the word is broken into
        tokenized_word = tokenizer.tokenize(word)
        n_subwords = len(tokenized_word)

        # Add the tokenized word to the final tokenized word list
        tokenized_sentence.extend(tokenized_word)

        # Add the same label to the new list of labels `n_subwords` times
        labels.extend([label] * n_subwords)

    return tokenized_sentence, labels


In [12]:
tokenized_texts_and_labels = [ tokenize_and_preserve_labels(sent, labs) for sent, labs in zip(sentences, labels)]

tokenized_texts = [token_label_pair[0] for token_label_pair in tokenized_texts_and_labels]
labels = [token_label_pair[1] for token_label_pair in tokenized_texts_and_labels]

In [13]:
# MAX_LEN is the maximum length of a sequence
MAX_LEN = 128 # 64 or 128 or ...
bs = 10 # batch size


In [14]:
# Use Padding to equalize the length of sentences
input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
                          maxlen=MAX_LEN, dtype="long", value=0.0,
                          truncating="post", padding="post")

tags = pad_sequences([[tag2idx.get(l) for l in lab] for lab in labels],
                     maxlen=MAX_LEN, value=tag2idx["PAD"], padding="post",
                     dtype="long", truncating="post")
     

In [15]:
# Do not mask values that are zero
attention_masks = [[float(i != 0.0) for i in ii] for ii in input_ids]

In [16]:
# Split data to train and validation. %90 for train and %10 for validation
tr_inputs, val_inputs, tr_tags, val_tags = train_test_split(input_ids, tags,
                                                            random_state=2018, test_size=0.1)

# Each mask contains 10% of a sentence
tr_masks, val_masks, _, _ = train_test_split(attention_masks, input_ids,
                                             random_state=2018, test_size=0.1)

# The input of the BERT should be the tensors produced using PyTorch
# So convert all inputs and labels into torch tensors
tr_inputs = torch.tensor(tr_inputs)
val_inputs = torch.tensor(val_inputs)

tr_tags = torch.tensor(tr_tags)
val_tags = torch.tensor(val_tags)

tr_masks = torch.tensor(tr_masks)
val_masks = torch.tensor(val_masks)

# Create the DataLoader for our training set
train_data = TensorDataset(tr_inputs, tr_masks, tr_tags)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=bs)

print(train_sampler)

# Create the DataLoader for our validation set
valid_data = TensorDataset(val_inputs, val_masks, val_tags)
valid_sampler = SequentialSampler(valid_data)
valid_dataloader = DataLoader(valid_data, sampler=valid_sampler, batch_size=bs)


<torch.utils.data.sampler.RandomSampler object at 0x7f043d15c2b0>


# **Build model**

In [17]:
# Load DistilBERT Model
from transformers import AutoTokenizer, AutoModelForTokenClassification
model = DistilBertForTokenClassification.from_pretrained(
    "distilbert-base-cased",
    num_labels=len(tag2idx), # The number of output labels
    output_attentions = False, # Whether the model returns attention weights.
    output_hidden_states = False # Whether the model returns all hidden-states.
    
)

Some weights of the model checkpoint at distilbert-base-cased were not used when initializing DistilBertForTokenClassification: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_layer_norm.weight', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-cased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this 

In [18]:
# Tell PyTorch to run this model on the GPU
model.cuda();

In [19]:
FULL_FINETUNING = True
if FULL_FINETUNING:
    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'gamma', 'beta']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
         'weight_decay_rate': 0.01},
        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
         'weight_decay_rate': 0.0}
    ]
else:
    param_optimizer = list(model.classifier.named_parameters())
    optimizer_grouped_parameters = [{"params": [p for n, p in param_optimizer]}]

optimizer = AdamW(
    optimizer_grouped_parameters,
    lr= 4e-5, # learning-rate default is 3e-5
    eps=1e-8 # adam-epsilon default is 1e-8
)




In [20]:
epochs = 100

max_grad_norm = 1.0

# Total number of training steps is number of batches * number of epochs.
total_steps = len(train_dataloader) * epochs

# Create the learning rate scheduler.
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps= 1000,
    num_training_steps=total_steps
)

# **Training**

In [21]:
def acc(print_labels, special):
    accuracy_list = []
    max_a = 0
    index_a = 1000000
    
    for i in range(len(sentences)):
       
        
        s, l = tokenize_and_preserve_labels(sentences[i], labels[i])
    
        
        temp1 = []
     
        for j in range(len(s)):
            t = tokenizer.convert_tokens_to_ids(s[j])
            temp1.append(t)
            
        tokenized_sentence = temp1
        input_ids = torch.tensor([tokenized_sentence]).cuda()

        with torch.no_grad():
            output = model(input_ids)

        label_indices = np.argmax(output[0].to('cpu').numpy(), axis=2)

        # join bpe split tokens
        tokens = tokenizer.convert_ids_to_tokens(input_ids.to('cpu').numpy()[0])
        new_tokens, new_labels = [], []
        for token, label_idx in zip(tokens, label_indices[0]):
            if token.startswith("##"):
                new_tokens[-1] = new_tokens[-1] + token[2:]
            else:
                new_labels.append(tag_values[label_idx])
                new_tokens.append(token)

        new_labels = new_labels[1:-1]
        a = accuracy_score(new_labels, labels[i])
        if a  >= max_a and i !=4:
            max_a = a
            index_a = i
          
        accuracy_list.append(a)
        if print_labels == True:
            print("___________________"+ str(i)+"___________________")
            print("\nPredict labels: ",new_labels)
            print("Actual labels: ",labels[i])
            print()
            print()
            if special == i:
                for j in range(len(sentences[i])):
                    print(sentences[i][j], new_labels[j])
    print(index_a, max_a)
    return statistics.mean(accuracy_list)
        


In [29]:
def acc_test(print_labels, special):
    accuracy_list = []
    max_a = 0
    index_a = 1000000
    
    for i in range(len(s_test)):
       
        
        s, l_test[i] = tokenize_and_preserve_labels(s_test[i], l_test[i])
      
        
        temp1 = []
     
        for j in range(len(s)):
            t = tokenizer.convert_tokens_to_ids(s[j])
            temp1.append(t)
            
        tokenized_sentence = temp1
        input_ids = torch.tensor([tokenized_sentence]).cuda()

        with torch.no_grad():
            output = model(input_ids)

        label_indices = np.argmax(output[0].to('cpu').numpy(), axis=2)

        # join bpe split tokens
        tokens = tokenizer.convert_ids_to_tokens(input_ids.to('cpu').numpy()[0])
        new_tokens, new_labels = [], []
        for token, label_idx in zip(tokens, label_indices[0]):
            if token.startswith("##"):
                new_tokens[-1] = new_tokens[-1] + token[2:]
            else:
                new_labels.append(tag_values[label_idx])
                new_tokens.append(token)

        new_labels = new_labels[1:-1]
        a = accuracy_score(new_labels, l_test[i])
        if a  >= max_a and i !=4:
            max_a = a
            index_a = i
          
        accuracy_list.append(a)
        if print_labels == True:
            print("___________________"+ str(i)+"___________________")
            print("\nPredict labels: ",new_labels)
            print("Actual labels: ",l_test[i])
            print()
            print()
            if special == i:
                for j in range(len(sent_test[i])):
                    print(s_test[i][j], new_labels[j])
    print(index_a, max_a)
    return statistics.mean(accuracy_list)
        


In [24]:
from torch import nn
from transformers import Trainer
w = [.99, 0.0008, .0008, .99,.99, .0319, .0319, .0239, .0239, .0736, .0736, .0135, .0135, .0526, .0526, .0083, .0083, .0463, .0436, .0324, .0324, .0324, .014, .014, .0386, .0386, .0004, .0004, .0293, .0293, .0421, .0421]
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get('logits')
        # compute custom loss
        loss_fct = nn.CrossEntropyLoss(weight=torch.tensor(w))
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

In [25]:
## Store the average loss after each epoch so we can plot them.
import copy
loss_values, validation_loss_values = [], []

test_acc = []

max_test = 10

for e in trange(epochs, desc="Epoch"):
    
    
    
    # ========================================
    #               Training
    # ========================================
    # Perform one full pass over the training set.

    # Put the model into training mode.
    model.train()
    # Reset the total loss for this epoch.
    total_loss = 0

    # Training loop
    for step, batch in enumerate(train_dataloader):
        # add batch to gpu
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask, b_labels = batch
        # Always clear any previously calculated gradients before performing a backward pass.
        model.zero_grad()
        # forward pass
        # This will return the loss (rather than the model output)
        # because we have provided the `labels`.
        outputs = model(b_input_ids,
                        attention_mask=b_input_mask, labels=b_labels)
        # get the loss
        loss = outputs[0]
        # Perform a backward pass to calculate the gradients.
        loss.backward()
        # track train loss
        total_loss += loss.item()
        # Clip the norm of the gradient
        # This is to help prevent the "exploding gradients" problem.
        torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=max_grad_norm)
        # update parameters
        optimizer.step()
        # Update the learning rate.
        scheduler.step()

    # Calculate the average loss over the training data.
    avg_train_loss = total_loss / len(train_dataloader)
    print("\nAverage train loss: {}".format(avg_train_loss))

    # Store the loss value for plotting the learning curve.
    loss_values.append(avg_train_loss)


    # ========================================
    #               Validation
    # ========================================
    # After the completion of each training epoch, measure our performance on
    # our validation set.

    # Put the model into evaluation mode
    model.eval()
    # Reset the validation loss for this epoch.
    eval_loss, eval_accuracy = 0, 0
    nb_eval_steps, nb_eval_examples = 0, 0
    predictions , true_labels = [], []
    for batch in valid_dataloader:
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask, b_labels = batch

        # Telling the model not to compute or store gradients,
        # saving memory and speeding up validation
        with torch.no_grad():
            # Forward pass, calculate logit predictions.
            # This will return the logits rather than the loss because we have not provided labels.
            outputs = model(b_input_ids, 
                            attention_mask=b_input_mask, labels=b_labels)
        # Move logits and labels to CPU
        logits = outputs[1].detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()

        # Calculate the accuracy for this batch of test sentences.
        eval_loss += outputs[0].mean().item()
        predictions.extend([list(p) for p in np.argmax(logits, axis=2)])
        true_labels.extend(label_ids)

    eval_loss = eval_loss / len(valid_dataloader)
    validation_loss_values.append(eval_loss)
    print("\nValidation loss: {}".format(eval_loss))
    pred_tags = [tag_values[p_i] for p, l in zip(predictions, true_labels)
                                 for p_i, l_i in zip(p, l) if tag_values[l_i] != "PAD"]
    valid_tags = [tag_values[l_i] for l in true_labels
                                  for l_i in l if tag_values[l_i] != "PAD"]

    print("Validation Accuracy: {}".format(accuracy_score(pred_tags, valid_tags)))
    #print("Validation F1-Score: {}".format(f1_score(pred_tags, valid_tags)))
    print()
    a = eval_loss
    test_acc.append(a)
    if a <= max_test:
        max_test = a
    elif  max_test * 1.2 >= a and max_test < a:
        torch.save(model, "end.ck")
torch.save(model, "end.ck")

Epoch:   1%|▍                                         | 1/100 [00:02<04:27,  2.70s/it]


Average train loss: 3.317275568842888

Validation loss: 2.8113037745157876
Validation Accuracy: 0.7840488824569867



Epoch:   2%|▊                                         | 2/100 [00:04<03:54,  2.39s/it]


Average train loss: 1.9013388951619465

Validation loss: 1.2142116924126942
Validation Accuracy: 0.7999678404888245



Epoch:   3%|█▎                                        | 3/100 [00:07<03:42,  2.30s/it]


Average train loss: 1.139299056182305

Validation loss: 0.8589433133602142
Validation Accuracy: 0.8179771667470654



Epoch:   4%|█▋                                        | 4/100 [00:09<03:36,  2.25s/it]


Average train loss: 0.8051323294639587

Validation loss: 0.6624458332856497
Validation Accuracy: 0.8408104196816208



Epoch:   5%|██                                        | 5/100 [00:11<03:31,  2.23s/it]


Average train loss: 0.6134635675698519

Validation loss: 0.5367327953378359
Validation Accuracy: 0.8573725679369674



Epoch:   6%|██▌                                       | 6/100 [00:13<03:28,  2.21s/it]


Average train loss: 0.492098360011975

Validation loss: 0.4577326675256093
Validation Accuracy: 0.8684675992924907



Epoch:   7%|██▉                                       | 7/100 [00:15<03:25,  2.21s/it]


Average train loss: 0.4120481501643856

Validation loss: 0.40803346534570056
Validation Accuracy: 0.8705579675188937



Epoch:   8%|███▎                                      | 8/100 [00:18<03:22,  2.20s/it]


Average train loss: 0.35028683580458164

Validation loss: 0.3755058782796065
Validation Accuracy: 0.8861553304389773



Epoch:   9%|███▊                                      | 9/100 [00:20<03:20,  2.20s/it]


Average train loss: 0.2966266019890706

Validation loss: 0.344523835927248
Validation Accuracy: 0.9030390738060782



Epoch:  10%|████                                     | 10/100 [00:22<03:17,  2.20s/it]


Average train loss: 0.2485733445112904

Validation loss: 0.3273644298315048
Validation Accuracy: 0.9023958835825695



Epoch:  11%|████▌                                    | 11/100 [00:24<03:15,  2.20s/it]


Average train loss: 0.2043715074347953

Validation loss: 0.29738567024469376
Validation Accuracy: 0.9101141662646728



Epoch:  12%|████▉                                    | 12/100 [00:26<03:13,  2.20s/it]


Average train loss: 0.17166476720012724

Validation loss: 0.2920200241108735
Validation Accuracy: 0.9139733076057244



Epoch:  13%|█████▎                                   | 13/100 [00:28<03:11,  2.20s/it]


Average train loss: 0.138975699354584

Validation loss: 0.28237280746301013
Validation Accuracy: 0.924264351181862


Average train loss: 0.11341438830519716

Validation loss: 0.2883855191369851
Validation Accuracy: 0.9216915902878277



Epoch:  14%|█████▋                                   | 14/100 [00:33<04:12,  2.94s/it]


Average train loss: 0.09380900021642447

Validation loss: 0.2877338131268819
Validation Accuracy: 0.9152596880527416



Epoch:  16%|██████▌                                  | 16/100 [00:40<04:19,  3.09s/it]


Average train loss: 0.07683654292486608

Validation loss: 0.27234124888976413
Validation Accuracy: 0.9252291365171249


Average train loss: 0.06015644397120923

Validation loss: 0.31680373785396415
Validation Accuracy: 0.9139733076057244



Epoch:  17%|██████▉                                  | 17/100 [00:45<04:55,  3.56s/it]


Average train loss: 0.055713200087969504

Validation loss: 0.2853130617489417
Validation Accuracy: 0.9196012220614247



Epoch:  18%|███████▍                                 | 18/100 [00:49<05:19,  3.90s/it]


Average train loss: 0.04857172578340396

Validation loss: 0.29598813503980637
Validation Accuracy: 0.9216915902878277



Epoch:  19%|███████▊                                 | 19/100 [00:54<05:33,  4.11s/it]


Average train loss: 0.0367399842167894

Validation loss: 0.2850442649796605
Validation Accuracy: 0.9306962534169481



Epoch:  20%|████████▏                                | 20/100 [00:59<05:39,  4.25s/it]


Average train loss: 0.034473230848864965

Validation loss: 0.290536396826307
Validation Accuracy: 0.9241035536259848



Epoch:  21%|████████▌                                | 21/100 [01:03<05:44,  4.37s/it]


Average train loss: 0.028904568374855444

Validation loss: 0.2947141534338395
Validation Accuracy: 0.9282842900787908



Epoch:  22%|█████████                                | 22/100 [01:08<05:48,  4.47s/it]


Average train loss: 0.024735976476222277

Validation loss: 0.3111361836393674
Validation Accuracy: 0.9276410998552822



Epoch:  23%|█████████▍                               | 23/100 [01:13<05:50,  4.55s/it]


Average train loss: 0.024253127892734483

Validation loss: 0.31943803156415623
Validation Accuracy: 0.9205660073966876



Epoch:  24%|█████████▊                               | 24/100 [01:17<05:49,  4.60s/it]


Average train loss: 0.02109784061516014

Validation loss: 0.30473141682644683
Validation Accuracy: 0.9265155169641421



Epoch:  26%|██████████▋                              | 26/100 [01:24<04:49,  3.91s/it]


Average train loss: 0.01776821607685027

Validation loss: 0.3310813618203004
Validation Accuracy: 0.9213699951760733


Average train loss: 0.014941107913424881

Validation loss: 0.30215706676244736
Validation Accuracy: 0.9282842900787908



Epoch:  27%|███████████                              | 27/100 [01:29<04:59,  4.11s/it]


Average train loss: 0.013566403038566932

Validation loss: 0.32095836630711955
Validation Accuracy: 0.9284450876346679



Epoch:  28%|███████████▍                             | 28/100 [01:34<05:10,  4.31s/it]


Average train loss: 0.010554777780877581

Validation loss: 0.30337207205593586
Validation Accuracy: 0.9318218363080881



Epoch:  29%|███████████▉                             | 29/100 [01:38<05:14,  4.43s/it]


Average train loss: 0.008756128462361326

Validation loss: 0.31448794063180685
Validation Accuracy: 0.9300530631934395



Epoch:  31%|████████████▋                            | 31/100 [01:45<04:25,  3.84s/it]


Average train loss: 0.00882063017343171

Validation loss: 0.3309570252895355
Validation Accuracy: 0.9253899340730021



Epoch:  32%|█████████████                            | 32/100 [01:48<03:48,  3.35s/it]


Average train loss: 0.008265875245948942

Validation loss: 0.3460728122542302
Validation Accuracy: 0.9255507316288792


Average train loss: 0.007494847850466613

Validation loss: 0.3231494377056758
Validation Accuracy: 0.9318218363080881



Epoch:  33%|█████████████▌                           | 33/100 [01:52<04:10,  3.74s/it]


Average train loss: 0.007668728959591438

Validation loss: 0.3190550170838833
Validation Accuracy: 0.9252291365171249



Epoch:  35%|██████████████▎                          | 35/100 [01:59<03:46,  3.49s/it]


Average train loss: 0.008013573904463556

Validation loss: 0.34536053116122883
Validation Accuracy: 0.9202444122849333



Epoch:  36%|██████████████▊                          | 36/100 [02:01<03:18,  3.11s/it]


Average train loss: 0.006374947259852585

Validation loss: 0.3506160471588373
Validation Accuracy: 0.9237819585142306



Epoch:  37%|███████████████▏                         | 37/100 [02:04<02:58,  2.84s/it]


Average train loss: 0.006309941214567516

Validation loss: 0.351751825461785
Validation Accuracy: 0.9252291365171249



Epoch:  38%|███████████████▌                         | 38/100 [02:06<02:44,  2.65s/it]


Average train loss: 0.005661416401077683

Validation loss: 0.3605959701041381
Validation Accuracy: 0.9300530631934395



Epoch:  39%|███████████████▉                         | 39/100 [02:08<02:33,  2.52s/it]


Average train loss: 0.005650195053021889

Validation loss: 0.3406499257932107
Validation Accuracy: 0.9236211609583534



Epoch:  40%|████████████████▍                        | 40/100 [02:10<02:25,  2.43s/it]


Average train loss: 0.00653675222080589

Validation loss: 0.3462403317292531
Validation Accuracy: 0.9282842900787908



Epoch:  41%|████████████████▊                        | 41/100 [02:12<02:19,  2.37s/it]


Average train loss: 0.005962164564455937

Validation loss: 0.344732320557038
Validation Accuracy: 0.9278018974111594



Epoch:  42%|█████████████████▏                       | 42/100 [02:15<02:14,  2.32s/it]


Average train loss: 0.005252041874579542

Validation loss: 0.34941117900113267
Validation Accuracy: 0.9300530631934395



Epoch:  43%|█████████████████▋                       | 43/100 [02:17<02:10,  2.29s/it]


Average train loss: 0.0052443106020897785

Validation loss: 0.3606766580293576
Validation Accuracy: 0.9315002411963338



Epoch:  44%|██████████████████                       | 44/100 [02:19<02:07,  2.27s/it]


Average train loss: 0.005580167716592162

Validation loss: 0.35067450689772767
Validation Accuracy: 0.9292490754140537



Epoch:  45%|██████████████████▍                      | 45/100 [02:21<02:04,  2.26s/it]


Average train loss: 0.005361496966240035

Validation loss: 0.36157263070344925
Validation Accuracy: 0.926354719408265



Epoch:  46%|██████████████████▊                      | 46/100 [02:24<02:01,  2.25s/it]


Average train loss: 0.0050661003721567495

Validation loss: 0.36206480860710144
Validation Accuracy: 0.9241035536259848



Epoch:  47%|███████████████████▎                     | 47/100 [02:26<01:58,  2.24s/it]


Average train loss: 0.0050110521484990995

Validation loss: 0.36663010840614635
Validation Accuracy: 0.9279626949670365



Epoch:  48%|███████████████████▋                     | 48/100 [02:28<01:56,  2.23s/it]


Average train loss: 0.004768080573436843

Validation loss: 0.37989936334391433
Validation Accuracy: 0.9287666827464223



Epoch:  49%|████████████████████                     | 49/100 [02:30<01:53,  2.23s/it]


Average train loss: 0.0046610291434869095

Validation loss: 0.3739423044025898
Validation Accuracy: 0.9282842900787908



Epoch:  49%|████████████████████                     | 49/100 [02:31<02:37,  3.09s/it]


KeyboardInterrupt: 

In [None]:
# Use plot styling from seaborn.
sns.set(style='darkgrid')

# Increase the plot size and font size.
sns.set(font_scale=1.5)
plt.rcParams["figure.figsize"] = (12,6)

# Plot the learning curve.
plt.plot(loss_values, 'b-o', label="training loss")
plt.plot(validation_loss_values, 'r-o', label="validation loss")

# Label the plot.
plt.title("Learning curve")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()

plt.show()

# Performance on test data

In [31]:
q = len(labels)-1
print(acc(False, q))
acc(True, q)

273 0.953125
0.6325384405682112
___________________0___________________

Predict labels:  ['0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', 'Element', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0']
Actual labels:  ['0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0']


___________________1___________________

Predict labels:  ['Metal', 'Metal', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', 'Element', '0', 'Element', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Acid Concentration', 'C-cont', 'C-cont', '0', 'Separation Method', 'S-cont', 

___________________33___________________

Predict labels:  ['0', '0', '0', '0', '0', 'Organic solvent', 'Organic solvent', 'Organic solvent', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'pH', '0', '0', '0', '0', '0', '0', 'Organic solvent', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Yield', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']
Actual labels:  ['Organic solvent', '0', '0', '0', '0', '0', 'Organic solvent', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Organic solve

___________________69___________________

Predict labels:  ['0', '0', '0', '0', '0', '0', '0', '0', 'Separation Method', 'Separation Method', 'Separation Method', 'S-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Separation Method', 'S-cont', 'S-cont', 'S-cont', 'S-cont', 'S-cont', 'S-cont', '0', '0', '0', '0', '0', '0', '0', 'Separation Method', 'S-cont', 'S-cont', 'S-cont', 'S-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', 'Separation Method', 'Separation Method', 'S-cont', 'S-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Chemical Compound', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Inorganic Solvent', '0', '0', '0', '0', '0']
Actual labels:  ['0', '0', '0'

___________________106___________________

Predict labels:  ['0', '0', '0', '0', '0', '0', '0', '0', 'Reagent', 'Reagent', 'Reagent', 'Reagent', 'Reagent', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Organic solvent', 'Organic solvent', 'Organic solvent', 'Organic solvent', 'Organic solvent', '0', '0', '0', '0', '0', 'Organic solvent', '0', '0', 'Element', '0', '0', '0', 'Organic solvent', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Organic solvent', '0', '0', '0', '0', '0', '0', 'Separation Method', '0', '0', '0', '0', '0', '0', '0', '0']
Actual labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', 'Reagent', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Organic solvent', '0', '0', '0', '0', 'Organic solvent', '0', '0', '

___________________144___________________

Predict labels:  ['0', '0', '0', '0', '0', 'Element', '0', 'Element', '0', '0', '0', '0', '0', 'Element', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', 'Element', 'Element', 'Element', '0', 'Element', 'Element', 'Element', 'Element', '0', 'Element', 'Element', 'Element', 'Element', '0', '0', 'Element', 'Element', 'Element', 'Element', '0', '0', 'Acid Concentration', 'Acid Concentration', 'Acid Concentration', 'C-cont', 'C-cont']
Actual labels:  ['0', '0', '0', '0', '0', '0', 'Element', '0', 'Element', '0', '0', '0', '0', '0', 'Element', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', 'Element', '0', 'Element', '0', '0', 'Element', '0', '0', 'Acid Concentration', 'C-cont', 'C-cont', '0']


___________________145___________________

Predict labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Resin', 'Resin', 'Resin', 'R-cont', 'R-cont', 'R-cont', 'R-cont', '0', '

___________________181___________________

Predict labels:  ['0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', 'Element', '0', 'Element', '0', '0', '0', '0', '0', '0', '0']
Actual labels:  ['0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', 'Ele

___________________217___________________

Predict labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', 'Yield', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Yield', 'Yield', 'Yield', 'Yield', 'Yield', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', 'Element', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element']
Actual labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', 'Yield', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y

___________________254___________________

Predict labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Acid Concentration', 'Acid Concentration', 'Acid Concentration', 'C-cont', 'C-cont', '0', '0', '0', 'Element', '0', 'Element', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', 'Element', 'Element', 'Element', '0', 'Element', 'Element', 'Element', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', 'Element', 'Element', 'Element', '0']
Actual labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', 'Acid Concentration', 'C-cont', 'C-cont', '0', '0', '0', 'Element', '0', 'Element', '0', '0', 'Element', '0', '0', '0', '0', '0', '0',

___________________291___________________

Predict labels:  ['0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']
Actual labels:  ['0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']


___________________292___________________

Predict labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '

___________________327___________________

Predict labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Acid', 'A-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', 'S-cont', '0', '0', '0', 'Method of Analysis', 'T-cont', 'T-cont']
Actual labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0'

___________________364___________________

Predict labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', 'Chemical Compound', 'H-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']
Actual labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0'

___________________400___________________

Predict labels:  ['0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', 'Element', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', 'Element', '0', 'Element', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', 'Element', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0']
Actual labels:  ['0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', 'Ele

___________________435___________________

Predict labels:  ['Element', 'Element', '0', '0', 'Yield', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', 'Element', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', 'Element', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']
Actual labels:  ['Element', '0', '0', 'Yield', 'Y-cont', 'Y-cont', 'Y-cont', 

___________________472___________________

Predict labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element']
Actual labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0']


___________________473___________________

Predict labels:  ['Element', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '

___________________508___________________

Predict labels:  ['Inorganic Solvent', 'Inorganic Solvent', 'Inorganic Solvent', 'I-cont', 'I-cont', 'I-cont', 'I-cont', 'I-cont', 'I-cont', '0', 'Inorganic Solvent', 'I-cont', 'I-cont', 'I-cont', 'I-cont', 'I-cont', 'I-cont', '0', '0', 'pH', 'P-cont', 'P-cont', 'P-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Inorganic Solvent', 'Inorganic Solvent', 'Inorganic Solvent', 'Inorganic Solvent', 'Inorganic Solvent', 'I-cont', '0', 'Inorganic Solvent', 'Inorganic Solvent', 'Inorganic Solvent', 'I-cont', 'I-cont', 'I-cont', 'I-cont', 'I-cont', 'I-cont', '0', 'Inorganic Solvent', 'I-cont', 'I-cont', 'I-cont', 'I-cont', 'I-cont', 'I-cont', '0', '0', 'pH', 'P-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']
Actual labels:  ['0', 'Inorganic Solv

0.6323065523364936

In [30]:
t = len(label_test)-1
print(acc_test(False, t))
acc_test(True, t)

122 1.0
0.6840939282621535
___________________0___________________

Predict labels:  ['0', '0', '0', '0', '0', 'Acid', 'A-cont', '0', 'Acid', '0', '0', 'Separation Method', 'S-cont', '0', 'Separation Method', '0', '0', 'Separation Method', 'S-cont', '0', 'Separation Method', '0', '0', 'Element', '0', '0', '0', '0', '0', 'Acid', 'A-cont', '0', '0', '0', '0', '0', 'Acid', 'A-cont', '0', 'Acid', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Acid', '0', 'Acid', 'A-cont', '0', '0', 'Separation Method', '0', 'Separation Method', '0', 'Organic solvent', 'Organic solvent', 'Organic solvent', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']
Actual labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Acid', 'Acid', 'Acid', 'Acid', 'Acid

___________________31___________________

Predict labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Separation Method', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Chemical Compound', '0', 'Separation Method', 'Separation Method', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Chemical Compound', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']
Actual labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '

___________________67___________________

Predict labels:  ['Element', 'Element', 'Element', '0', '0', '0', 'Element', 'Element', 'Element', 'Element', '0', '0', 'Acid Concentration', 'Acid Concentration', 'Acid Concentration', 'C-cont', 'C-cont', '0', '0', 'Inorganic Solvent', 'Reagent', 'Inorganic Solvent', 'I-cont', 'I-cont', 'I-cont', 'I-cont', 'I-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Acid', 'A-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Chemical Compound', 'I-cont', '0', '0', '0', 'Inorganic Solvent', 'I-cont', '0', '0', '0', '0', '0', '0', '0', 'Chemical Compound', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', 'Element', 'Element', 'Element', '0',

___________________104___________________

Predict labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', 'Element', '0', '0', '0', '0', '0', 'Separation Method', '0', 'Separation Method', 'Separation Method', 'Organic solvent', '0', 'Acid', 'A-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Organic solvent', 'O-cont', '0']
Actual labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '

0.6810489315175969

In [32]:
model = torch.load("test.ck")
model.eval()


print(acc(False, q))
acc(True, q)

273 0.953125
0.6302567457326717
___________________0___________________

Predict labels:  ['0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', 'Element', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0']
Actual labels:  ['0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0']


___________________1___________________

Predict labels:  ['Metal', 'Metal', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', 'Element', '0', 'Element', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Acid Concentration', 'C-cont', 'C-cont', '0', 'Separation Method', 'S-cont', 

___________________35___________________

Predict labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Acid', 'Acid', 'Acid', 'Acid', '0', 'Acid', 'Acid', 'Acid', 'Acid', 'Acid', 'A-cont', '0', '0', '0', '0', '0', '0', '0', '0', 'Chemical Compound', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']
Actual labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', 'Acid', '0', 'Acid', 'A-cont', '0', '0', '0', '0', 'Chemical Compound', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0

___________________73___________________

Predict labels:  ['0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', 'Separation Method', 'S-cont', 'S-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Separation Method', 'Separation Method', 'Separation Method', 'S-cont', 'S-cont', 'S-cont', 'S-cont', 'S-cont', '0', '0', '0', '0', '0', '0', '0', 'Chemical Compound', 'H-cont', 'H-cont', 'H-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', 'Element', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', 'Acid', 'A-cont', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', 'Element', 'Element', 'Element']
Actual labels:  ['0', '0', '0', '0', '0', 'Element', '0', '0', '0', 'Separation Method', 'S-cont', 'S-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Separation Method', 'S-cont', 'S-cont', 'S-cont', '0', '0', '0', '0', '0', '0', '0', 'Chemical Compound'

___________________112___________________

Predict labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0']
Actual labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0']


___________________113___________________

Predict labels:  ['0', 'Element', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Reagent', 'X-cont', '0', '0', '0', '0', '0', 'Separation Method', '

___________________151___________________

Predict labels:  ['0', '0', 'Acid Concentration', 'Acid Concentration', 'Acid Concentration', 'C-cont', '0', 'Acid Concentration', 'Acid Concentration', 'Acid Concentration', 'C-cont', 'C-cont', 'C-cont', 'C-cont', 'C-cont', 'C-cont', 'Chemical Compound', 'H-cont', '0', '0', '0', '0', '0', '0', 'Inorganic Solvent', 'Inorganic Solvent', 'Inorganic Solvent', 'I-cont', 'I-cont', 'I-cont', 'I-cont', 'I-cont', 'I-cont', '0', '0', 'Element', '0', '0', '0']
Actual labels:  ['0', '0', '0', 'Acid Concentration', 'C-cont', '0', 'Acid Concentration', 'C-cont', 'C-cont', 'Chemical Compound', 'H-cont', '0', '0', '0', '0', '0', 'Inorganic Solvent', 'I-cont', 'I-cont', 'I-cont', '0', '0', 'Element', '0', '0']


___________________152___________________

Predict labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 

___________________189___________________

Predict labels:  ['0', 'Separation Method', 'Separation Method', 'Separation Method', 'S-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', 'Element', '0', '0', '0', 'Element', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', 'Element', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Separation Method', 'S-cont', '0', 'Separation Method', 'S-cont', '0', '0', '0', '0', 'Element', '0', 'Separation Method', 'Separation Method', 'Separation Method', 'S-cont', '0', 'Separation Method', 'S-cont', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', 'Element', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']
Actual labels:  ['0', '0', 'Separation Method', 

___________________226___________________

Predict labels:  ['0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'S-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0']
Actual labels:  ['0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0'

___________________264___________________

Predict labels:  ['0', '0', '0', '0', '0', 'Element', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Method of Analysis', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']
Actual labels:  ['0', '0', '0', '0', 'Element', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 

___________________303___________________

Predict labels:  ['0', '0', 'Separation Method', 'S-cont', '0', 'Separation Method', 'Separation Method', 'Separation Method', 'Separation Method', 'Separation Method', 'S-cont', '0', 'Separation Method', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', 'Yield', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', '0', '0', '0', 'Element', '0', '0', '0', '0', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', 'Y-cont', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0']
Actual labels:  ['0', '0', '0', 'Separation Method', 'S-cont', '0', 'Separation Method', 'S-cont', '0', 'Separation Method', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '

___________________340___________________

Predict labels:  ['0', '0', '0', '0', '0', '0', '0', '0', 'Organic solvent', 'S-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Organic solvent', 'S-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Organic solvent', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Chemical Compound', 'H-cont', '0', '0', '0', 'Acid', 'A-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']
Actual labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', 'Organic solvent', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Organic solvent', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0'

___________________379___________________

Predict labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Organic solvent', 'O-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']
Actual labels:  ['0', '0', '0', '0', '0', '0', '0', '0', 'Organic solvent', 'O-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0',

___________________415___________________

Predict labels:  ['I-cont', '0', '0', '0', '0', '0', 'Reagent', 'Reagent', 'Reagent', '0', '0', '0', '0', 'pH', 'P-cont', 'P-cont', 'P-cont', 'P-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Separation Method', 'Separation Method', 'Separation Method', 'S-cont', 'S-cont', '0', '0', '0', '0', 'Method of Analysis', 'Method of Analysis', 'Method of Analysis', '0', 'Method of Analysis', 'Method of Analysis', 'Method of Analysis', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', 'Reagent', 'Reagent', 'Reagent', '0', '0', '0', '0', '0', 'Method of Analysis', 'T-cont', '0', '0', '0', '0', '0', '0', '0', 'Method of Analysis', 'Method of Analysis', 'Method of Analysis', 'T-cont', 'T-cont', 'T-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', 'Reagent', 'Reagent', 'Reagent', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',

___________________452___________________

Predict labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Method of Analysis', '0', 'Element', '0', '0', 'Element', '0', 'Reagent', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']
Actual labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Method of Analysis', '0', 'Element', '0', '0', 'Element', '0', 'Reagent', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']


___________________453___________________

Predict labels:  ['0', '0', '0', '0', '0', '0', 'Reagent', '0', 'Element', 'Element', 'Element', 

___________________490___________________

Predict labels:  ['0', '0', '0', '0', '0', '0', '0', '0', 'Element', 'Element', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Method of Analysis', 'T-cont', 'T-cont', 'T-cont', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']
Actual labels:  ['0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Method of Analysis', 'T-cont', '0', '0', '0', '0', 

___________________529___________________

Predict labels:  ['0', '0', '0', '0', 'Organic solvent', 'O-cont', 'O-cont', 'O-cont', '0', 'Inorganic Solvent', 'I-cont', 'I-cont', 'I-cont', 'I-cont', 'I-cont', 'I-cont', 'I-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Method of Analysis', 'Method of Analysis', 'Method of Analysis', 'Method of Analysis', 'Method of Analysis', 'T-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', 'Method of Analysis', 'Method of Analysis', 'Method of Analysis', 'Method of Analysis', 'Method of Analysis', 'T-cont', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']
Actual labels:  ['0', '0', '0', '0', 'Organic solvent', 'O-c

0.6302567457326717

In [33]:
print(acc_test(False, t))
acc_test(True, t)

122 1.0
0.6670805543160518
___________________0___________________

Predict labels:  ['0', '0', '0', '0', '0', 'Acid', 'A-cont', '0', 'Acid', '0', '0', 'Separation Method', 'S-cont', '0', 'Separation Method', '0', '0', 'Separation Method', 'S-cont', '0', 'Separation Method', '0', '0', 'Element', '0', '0', '0', '0', '0', 'Acid', 'A-cont', '0', '0', '0', '0', '0', 'Acid', 'A-cont', '0', 'Acid', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Acid', '0', 'Acid', 'A-cont', '0', '0', 'Separation Method', '0', 'Separation Method', '0', 'Organic solvent', 'Organic solvent', 'Organic solvent', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']
Actual labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0

___________________36___________________

Predict labels:  ['0', 'Element', 'Chemical Compound', 'Chemical Compound', 'Chemical Compound', 'Chemical Compound', 'H-cont', 'H-cont', '0', '0', '0', 'Acid Concentration', 'Acid Concentration', 'Acid Concentration', 'C-cont', 'C-cont', '0', '0', '0', '0', '0', '0', 'Element', '0', 'Resin', 'R-cont', 'R-cont', '0', '0', '0', '0', '0', '0', 'Acid', 'O-cont', '0', '0', '0', '0', '0', '0', 'Element', 'Element', 'Element', 'Element', '0', '0', '0', '0', '0', '0', '0', 'Acid', '0', '0', '0', '0', '0', '0', 'Acid Concentration', 'C-cont', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', 'Element', 'Element', 'Element', '0', 'Element', 'Element', 'Element', 'Element', '0', '0', '0', '0', '0', '0']
Actual labels:  ['0', '0', 'Chemical Compound', 'Chemical Compound', 'Chemical Compound', 'Chemical Compound', 'Chemical Compound', 'Chemical Compound', 'Chemical Compound', 'Chemical Compound', 'Chemical Compound', 'Chemica

___________________72___________________

Predict labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', '0', '0', '0', '0']
Actual labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 

___________________110___________________

Predict labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'Element', 'Element', 'Element', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']
Actual labels:  ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',

0.6567944236603805