In [1]:
import os

os.environ["WANDB_MODE"]="disabled"
os.environ["CUDA_VISIBLE_DEVICES"] = ""
#!export CUDA_VISIBLE_DEVICES=""

In [2]:
import argparse
import os
import pandas as pd
import numpy as np
from transformers import BertTokenizer,BertForSequenceClassification,Trainer, TrainingArguments
import torch
from torch.utils.data import Dataset
from sklearn import preprocessing
import torch.nn as nn
from sklearn.utils import compute_class_weight
from sklearn.metrics import f1_score, classification_report
import pickle



In [3]:
le = preprocessing.LabelEncoder()
# use this to map categories to integers.


In [4]:
  #load tokenizer 
tokenizer = BertTokenizer.from_pretrained('allenai/scibert_scivocab_uncased', do_lower_case=True, use_fast=True)

In [5]:
# If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

No GPU available, using the CPU instead.


functions and class definitions


In [6]:
class MulticlassDataset(Dataset):

    def __init__(self, encodings, labels):
      self.encodings = encodings
      self.labels = labels

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = self.labels[idx]
        return item

    def __len__(self):
        return len(self.labels)
        

In [7]:
def create_dataset(dataframe, tokenizer):
  MAX_LENGTH = 100 #raise from 60 to 80 to 100
  inputs = {
          "input_ids":[],
          "attention_mask":[]
        }
   

  # use this if concatenting text from all fields in the data (except the class field)
  features_columns =[x for x in dataframe.columns.values if x != 'iSampleMaterial']
  def create_concatenated_text(dataframe):
    """combine the columns text to create a single sentence"""
    ttext= [] #text that is a concatenation of all columns
    for _, row in dataframe.iterrows():
      row_value = row["text"]
      if row_value!="" and type(row_value)==str:
          combined += row_value   # +" , "
      ttext.append(combined)
    return ttext
  #sents = create_concatenated_text(dataframe)
    

  def getTrainText(dataframe, ttcol ):  # smr version-- pre concatenate training text in one column
    ttext= []
    for _, row in dataframe.iterrows():
      row_value = row[ttcol]
      ttext.append(str(row_value))
    return ttext

  sents = getTrainText(dataframe, traintextcol )
    
  for sent in sents:
    tokenized_input = tokenizer(sent,max_length=MAX_LENGTH, padding='max_length', truncation=True)
    inputs["input_ids"].append(torch.tensor(tokenized_input["input_ids"]))
    inputs["attention_mask"].append(torch.tensor(tokenized_input["attention_mask"]))
 
  print("torch tensor dataframe columns:", dataframe.columns.values)
  #print("dataframe['iSampleMaterial']: ",dataframe['iSampleMaterial'].values )
  labels = torch.tensor(dataframe['iSampleMaterial'].values.tolist())
    
  return MulticlassDataset(inputs,labels)

def get_class_weights(dataframe):
  """computes the class weight and returns a list to account for class imbalance """
  
    
  dataframe['iSampleMaterial'] = le.transform(dataframe.iSampleMaterial) 
  labels = torch.tensor(dataframe['iSampleMaterial'].values.tolist())
  #labels = torch.tensor(dataframe['iSampleMaterial'].values.tolist())
  label_le = le.classes_ 
  print ("np unique labels for weights:", np.unique(labels))  
  print ("le class labels: ",label_le)
  print ("labels.numpy:", labels.numpy)
    
  class_weights=compute_class_weight( class_weight ='balanced',classes = np.unique(labels),y = labels.numpy())
  #class_weights=compute_class_weight( class_weight ='balanced',classes = labels,y = labellist.numpy())

  total_class_weights =torch.tensor(class_weights,dtype=torch.float).to(device)
  return total_class_weights

def create_custom_trainer(class_weights):
  """creates custom trainer that accounts for class imbalance"""
  class CustomTrainer(Trainer):
      def compute_loss(self, model, inputs, return_outputs=False):
          labels = inputs.get("labels")
          # forward pass
          outputs = model(**inputs)
          logits = outputs.get("logits")
          # compute custom loss 
          loss_fct = nn.CrossEntropyLoss(weight=class_weights)
          loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
          return (loss, outputs) if return_outputs else loss
  return CustomTrainer





def train(selected_type, dataframe, tokenizer, batch_size, learning_rate, epochs,train_mode, output_dir):

  train_df, dev_df, test_df = preprocess(dataframe,selected_type)
  train_dataset = create_dataset(train_df, tokenizer)
  dev_dataset = create_dataset(dev_df,tokenizer)
  test_dataset = create_dataset(test_df,tokenizer)

  #load model
  model = BertForSequenceClassification.from_pretrained("allenai/scibert_scivocab_uncased", num_labels = len(le.classes_), )

  # Tell pytorch to run this model on the GPU.
  #desc = model.cuda()
  desc = model.to(device)

  training_args = TrainingArguments(
          output_dir= output_dir,     # output directory
          num_train_epochs=epochs,              # total number of training epochs
          per_device_train_batch_size=batch_size,  # batch size per device during training
          per_device_eval_batch_size=batch_size,   # batch size for evaluation
          learning_rate = learning_rate,
          warmup_steps=500,                # number of warmup steps for learning rate scheduler
          weight_decay=0.01, 
          load_best_model_at_end=True,            
          logging_dir=output_dir,            # directory for storing logs
          logging_steps=10,
          evaluation_strategy = "epoch", #To calculate metrics per epoch
          save_strategy = "epoch"
  )
  #get class weight
  class_weights = get_class_weights(train_df)
  CustomTrainer = create_custom_trainer(class_weights)

  if train_mode == "custom":
    trainer = CustomTrainer(model = model, args =training_args, train_dataset=train_dataset, eval_dataset=dev_dataset)
  else:
    trainer = Trainer(model = model, args =training_args, train_dataset=train_dataset, eval_dataset=dev_dataset)
  trainer.train()

In [8]:
def preprocess(dataframe, selected_material_type=None):
  #convert the dataframe labels accordingly by the material type
  # original preprocess from Sarah Song
  #if selected_material_type!="None":
  #  new_df = dataframe.copy()
  #  for _, row in new_df.iterrows():
  #    if row['iSampleMaterial'].split("_")[0] == selected_material_type:
  #      continue #leave the label
  #    else:
  #     row['iSampleMaterial']="None" #set none as label
  #else:
  new_df = dataframe.copy()   
  
  #convert labels into integers
  le.fit(new_df.iSampleMaterial)
  print(" number of labels: ", len(le.classes_))
  new_df['iSampleMaterial'] = le.transform(new_df.iSampleMaterial)
  
  #split data to training df, dev df, test df
  sample_size = 10000
  #fraction=sample_size/len(new_df)  # get about 500 samples
  #sel_len = sample_size
  train_df, dev_df, test_df =  np.split(new_df.sample(n=sample_size, random_state=42),[int(.6*sample_size), int(.8*sample_size)])

  train_df.to_csv('output/train_df.csv')
  dev_df.to_csv('output/dev_df.csv')
  test_df.to_csv('output/test_df.csv')
 
  return train_df, dev_df, test_df

In [9]:
def preprocess_2(dataframe):
    
  le.fit(dataframe.iSampleMaterial)
  print(" number of labels: ", len(le.classes_))
  print("label encoder classes:",le.classes_)
  rockint = le.transform(["mat:rock"])[0]
  #convert the dataframe labels accordingly by the material type
  # want to sample the different material types according to their frequency
  # generate separate dataframes for mineral, rock, sediment, rockorsediment, and soil

  # fraction of total for each class
  #min_n = .255
  #rock_n = .435
  #sed_n = .125
  #rocksed_n = .065
  #soil_n = .12

#try different distribution
  min_n = .205
  rock_n = .235
  sed_n = .225
  rocksed_n = .215
  soil_n = .12
  # total sample 
  sample_size = 1000
  #rand_state = int(42)
  rand_state = int(19)
  
  
  #split data to training df, val df, test df
  min_df = dataframe[dataframe["iSampleMaterial"]=="mat:mineral"].copy()   #flattened label version 
    # build the data frames
  print("min_df rowcount: ", len(min_df.index))
  this_n = int(round(min_n * sample_size, 0))  # weights='weight', axis=0,
  train_df_min, dev_df_min, test_df_min =  np.split(min_df.sample(n=this_n,   random_state=rand_state),[int(.6*this_n), int(.8*this_n)])
  print("finished min dataframe. this_n:", this_n, " split at:",int(.6*this_n), int(.8*this_n))

  rock_df = dataframe[dataframe["iSampleMaterial"]=="mat:rock"].copy()   #flattened label version 
    # build the data frames
  this_n = int(round(rock_n * sample_size, 0))  # weights='weight', axis=0,
  train_df_rock, dev_df_rock, test_df_rock =  np.split(rock_df.sample(n=this_n,   random_state=rand_state),[int(.6*this_n), int(.8*this_n)])
  print("finished rock dataframe. this_n:", this_n, " split at:",int(.6*this_n), int(.8*this_n))

    
  sed_df = dataframe[dataframe["iSampleMaterial"]=="mat:sediment"].copy()   #flattened label version 
    # build the data frames
  this_n = int(round(sed_n * sample_size, 0))  # weights='weight', axis=0,
  train_df_sed, dev_df_sed, test_df_sed =  np.split(sed_df.sample(n=this_n,   random_state=rand_state),[int(.6*this_n), int(.8*this_n)])
  print("finished sed dataframe. this_n:", this_n, " split at:",int(.6*this_n), int(.8*this_n))


  rocksed_df = dataframe[dataframe["iSampleMaterial"]=="mat:rockorsediment"].copy()   #flattened label version 
    # build the data frames
  this_n = int(round(rocksed_n * sample_size, 0)) # weights='weight', axis=0,
  train_df_rocksed, dev_df_rocksed, test_df_rocksed =  np.split(rocksed_df.sample(n=this_n,   random_state=rand_state),[int(.6*this_n), int(.8*this_n)])
  print("finished rocksed dataframe. this_n:", this_n, " split at:",int(.6*this_n), int(.8*this_n))


  soil_df = dataframe[dataframe["iSampleMaterial"]=="mat:soil"].copy()   #flattened label version 
    # build the data frames
  this_n = int(round(soil_n * sample_size, 0)) # weights='weight', axis=0,
  train_df_soil, dev_df_soil, test_df_soil =  np.split(soil_df.sample(n=this_n,   random_state=rand_state),[int(.6*this_n), int(.8*this_n)])
  print("finished soil dataframe. this_n:", this_n, " split at:",int(.6*this_n), int(.8*this_n))

    
  #train_df_soil.to_csv('output/train_df_soil.csv')
  #dev_df_soil.to_csv('output/dev_df_soil.csv')
  #test_df_soil.to_csv('output/test_df_soil.csv')
    
    
#intention is for final dataset for training to have sample_size records, distributed over the 5 classes based on the abundance of the class
#  and weighted according to the frequency distribution for the 154 IGSN registrants. Based on 
#  assumption that a given registrant will be documenting similar samples with similar conventions
    
# merge the training dataframes
  theframes = [train_df_min, train_df_rock, train_df_sed,train_df_rocksed,train_df_soil]
  train_df = pd.concat(theframes)
  train_df.sort_values(by='igsn', inplace=True )
  #convert labels into integers
  train_df['iSampleMaterial'] = le.transform(train_df.iSampleMaterial)
  
# merge the dev dataframes
  theframes = [dev_df_min, dev_df_rock, dev_df_sed,dev_df_rocksed,dev_df_soil]
  dev_df = pd.concat(theframes)
  dev_df.sort_values(by='igsn', inplace=True )
  #convert labels into integers
  dev_df['iSampleMaterial'] = le.transform(dev_df.iSampleMaterial)

# merge the training dataframes
  theframes = [test_df_min, test_df_rock, test_df_sed,test_df_rocksed,test_df_soil]
  test_df = pd.concat(theframes) 
  test_df.sort_values(by='igsn', inplace=True )
  #convert labels into integers
  test_df['iSampleMaterial'] = le.transform(test_df.iSampleMaterial)

  train_df.to_csv('output/train_df.csv')
  dev_df.to_csv('output/dev_df.csv')
  test_df.to_csv('output/test_df.csv')
    
  return train_df, dev_df, test_df

In [10]:
 # use dictionary of classes 
# SMR 2023-08-21

def preprocess_3(dataframe):
  # classname : samplesize 
  classdict = {
    "mat:rock" : 500,
    "mat:mineral" : 500,
    "mat:organicmaterial" : 500,
    "mat:sediment" : 500,
    "mat:soil" : 500,
    "mat:liquidwater" : 500,
    "mat:material" : 400,
    "mat:rockorsediment" : 400,
    "mat:mixedsoilsedimentrock" : 300,
    "mat:biogenicnonorganicmaterial" : 300,
    "mat:otheranthropogenicmaterial" : 200,
    "mat:particulate" : 200,
    "xxx" : 150,
    "mat:gas" : 200,
    "mat:anthropogenicmetal" : 50
  }

  classcol = "iSampleMaterial"
  rand_state = int(19)
  samplesize = int(0)
  classname = ""
    
  #empty data frames to accumulate results  
  work_df = pd.DataFrame()
  train_df  = pd.DataFrame() 
  dev_df = pd.DataFrame()
  test_df = pd.DataFrame()

  le.fit(dataframe.iSampleMaterial)
  print(" number of labels: ", len(le.classes_))
  print("label encoder classes:",le.classes_)
  print("transform classes:", le.fit_transform(le.classes_))
    
  for classname, samplesize in classdict.items() :
  #split data to training df, val df, test df
    #print("class:", classname, "  samplesize:",samplesize)
    work_df = dataframe[dataframe[classcol]==classname].copy()   #flattened label version 
        # build the data frames
    #print(classname, " rowcount: ", len(work_df.index))
    train_df_work, dev_df_work, test_df_work =  np.split(work_df.sample(n=samplesize,   random_state=rand_state),[int(.6*samplesize), int(.8*samplesize)])
    print("finished ",classname," dataframe. samplesize:", samplesize, " split at:",int(.6*samplesize), int(.8*samplesize))
     
    # merge into the output dataframes
    train_df = pd.concat([train_df_work, train_df]) 
    test_df = pd.concat([test_df_work, test_df]) 
    dev_df = pd.concat([dev_df_work, dev_df]) 
  
  
  #sort by igsn, convert labels to integers  
  train_df.sort_values(by='igsn', inplace=True )
  train_df['iSampleMaterial'] = le.transform(train_df.iSampleMaterial)
  test_df.sort_values(by='igsn', inplace=True )
  test_df['iSampleMaterial'] = le.transform(test_df.iSampleMaterial)
  dev_df.sort_values(by='igsn', inplace=True )
  dev_df['iSampleMaterial'] = le.transform(dev_df.iSampleMaterial)

  #write dataframes for reference
  train_df.to_csv('output/train_df.csv')
  dev_df.to_csv('output/dev_df.csv')
  test_df.to_csv('output/test_df.csv')
    
  return train_df, dev_df, test_df
    

 # Notes
 
 epochs 4, batch 20, lr_rat .007 worked best yet, with 500 samples; lowest loses at 3 epochs
 
 try adding axis = 0 in pandas sampling, I can't tell if its using weights. different results-- sed and rocksed are bad,
  others much better. Run again to see if the same... Get different results. The pandas sample is different, and that 
 
 impacts the results. 3 of 5 classes identified prttey well. Try raising sample to 1000.  Got matches on 4 classes, 
 good only on mineral, rock, and soil.
 
 try sampling w/o weights
  !! worked much better!!

 2023-08-15
 
 try 10 epochs (n-1000, .007, batch 20) to see if get any convergence,no convergence, 
        but recall and precision not bad
 
 try 3 epochs, rest same. Seemed to work about as well
 
 try 1 epoch-- seemed to work pretty much the same
 
 try 3 epoch, 40 batch, 5000 samples-- complete fail! 
 
 try 3 epoch, 100 batch, 5000 samples
 
 1 epoch 100 batch 5000, SS's preprocess. Doesn't get any rockSed, otherwise goot
 
 1 epoch 100 batch, 10000 sample, SS preprocess. didn't get any rockSed or soil...
 
 4 epoch 100 batch,  10000 sample, SMR preprocess.  TErrible. only got rock
 
 4 epoch 100 batch,  10000 sample, ss preprocess.  Terrible. only got rock
 
 3 epoch, 20 batch 10000 sample, ss preprocess rate.01 Bust only got rock
 
 3 epoch, 20 batch 10000 sample, smr preprocess rate.01 Bust only got rock, but some  convergences
 
 4 epoch,batch20,.007,1000 samples, change fractions in training data to favor sed,rocksed. 
    # good matches except rocksed, but some hits theretoo. Got convergence after the first epoch
 
 try more epochs
 
 8 epoch,batch20,.007,1000 samples, change fractions in training data to favor sed,rocksed. 
    # good result rand 42, 23

results vary slightly with different rand
 
 raise n to 2000, rand=73, bad result 
 
 try 4 epoch rand= 73
 
 2023-08-21
 implement new preprocess that builds training data by selecting the classes to sample and number of samples using a dictionary.  raise training text length from 80 to 100.  Training text from SESAR has been culled to remove long    sentences about sample locations, mostly from Alan Mansur, NMNH
  total sample is about 2000.
  
Results are useless.  Go back to 80 char length, and up the learning sample size, reduce epochs to 1


In [11]:
## Required parameters
nb_epochs = int(4)  #was 2, then 3, 
batch_size = int(20) #was 10, then 20, tried 30. 
lr_rate = float(0.007) #was.01

# material_type = str('')

#train_mode = str('FALSE')
train_mode = str('custom')
 #  Whether we account for class imbalance during training by using a custom trainer 
    # (custom) or not (none)                  
output_dir =str('output')
 #Output directory where the model checkpoint will be saved
    

In [12]:
#df = pd.read_csv("iSamplesMaterialTrainingSmall.csv")
#df = pd.read_csv("SESARTrainingiSamKeywords.csv", usecols=['igsn', 'traintext'],dtype={'igsn':str,'traintext':str})
#df = pd.read_csv("MaterialTypeData2023-08-07.csv") # only has rock, sediment, rocksed, soil, mineral

classcol = "iSampleMaterial"
traintextcol = 'traintext'
df = pd.read_csv("SESARTrainingiSamKeywords.csv", usecols=['igsn', classcol, traintextcol],dtype={'igsn':str, classcol:str, traintextcol:str})

df = df.fillna("")
#remove rows that do not have a class name or training text
df = df[df[classcol]!=""]
df = df[df[traintextcol]!=""]

#count tokens-- 
####################### SLOW-- this scans all records.
rowcount = 1
ratiosum = 0.0
maxratio = 0.0
for _, row in df.iterrows():
    sentence = row["text"]
    tokens = tokenizer.tokenize(sentence)
    token_count = len(tokens)
    senlen = len(sentence)
    ratio = token_count/senlen
    if ratio > maxratio:
        maxratio = ratio
    
    #print("Original sentence:", sentence)
    #print("Sentence len:", senlen, "; Number of tokens:", token_count, "; ratio:", ratio)
    #print("Number of tokens:", token_count)
    
    rowcount =rowcount + 1
    ratiosum = ratiosum + ratio
    #print("ratio:", ratio)
    
avrage = ratiosum/rowcount
print("Average ratio:", avrage, "; Max ratio:", maxratio)
print("row count: ", rowcount)

    

In [13]:
#load tokenizer
# train(material_type, df, tokenizer, batch_size,lr_rate, nb_epochs, train_mode, output_dir)

# insert train function in line here for debugging...
train_df, dev_df, test_df = preprocess(df)  #original function from Sarah Song
#train_df, dev_df, test_df = preprocess_2(df)  #steves update, only rock, mineral, rocksed, soil, sediment
#train_df, dev_df, test_df = preprocess_3(df)  #dictionary to set sample size for each class



 number of labels:  15


In [14]:

#print("train_df columns:", train_df.columns.values)
#print("train_df:", train_df.describe)
#train_df['iSampleMaterial'].values

train_dataset = create_dataset(train_df, tokenizer)
dev_dataset = create_dataset(dev_df,tokenizer)
test_dataset = create_dataset(test_df,tokenizer)

torch tensor dataframe columns: ['igsn' 'traintext' 'iSampleMaterial']
torch tensor dataframe columns: ['igsn' 'traintext' 'iSampleMaterial']
torch tensor dataframe columns: ['igsn' 'traintext' 'iSampleMaterial']


In [15]:
#print(len(le.classes_))
#print("transform classes:", le.fit_transform([1]))

In [16]:
# load model
model = BertForSequenceClassification.from_pretrained("allenai/scibert_scivocab_uncased", num_labels = len(le.classes_), )

Some weights of the model checkpoint at allenai/scibert_scivocab_uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification we

In [17]:
desc = model.to(device)
training_args = TrainingArguments(
          no_cuda = True,
          output_dir= output_dir,     # output directory
          num_train_epochs=nb_epochs,              # total number of training epochs
          per_device_train_batch_size=batch_size,  # batch size per device during training
          per_device_eval_batch_size=batch_size,   # batch size for evaluation
          learning_rate = lr_rate,
          warmup_steps=500,                # number of warmup steps for learning rate scheduler
          weight_decay=0.01, 
          load_best_model_at_end=True,            
          logging_dir=output_dir,            # directory for storing logs
          logging_steps=10,
          evaluation_strategy = "epoch", #To calculate metrics per epoch
          save_strategy = "epoch"
  )

In [18]:
class_weights = get_class_weights(df)
CustomTrainer = create_custom_trainer(class_weights)
if train_mode == "custom":

    trainer = CustomTrainer(model = model, args =training_args, train_dataset=train_dataset, eval_dataset=dev_dataset)
else:
    trainer = Trainer(model = model, args =training_args, train_dataset=train_dataset, eval_dataset=dev_dataset)

np unique labels for weights: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]
le class labels:  ['mat:anthropogenicmetal' 'mat:biogenicnonorganicmaterial' 'mat:gas'
 'mat:liquidwater' 'mat:material' 'mat:mineral'
 'mat:mixedsoilsedimentrock' 'mat:organicmaterial'
 'mat:otheranthropogenicmaterial' 'mat:particulate' 'mat:rock'
 'mat:rockorsediment' 'mat:sediment' 'mat:soil' 'xxx']
labels.numpy: <built-in method numpy of Tensor object at 0x00000228C6640EF0>


In [19]:
#train_dataset.labels
#train_dataset.encodings

In [20]:
trainer.train()



Epoch,Training Loss,Validation Loss
1,4.3251,4.182779
2,7.1534,6.5085
3,3.9233,3.804018
4,2.5195,2.631948


in control.should_save. metrics: {'eval_loss': 4.182779312133789}
in control.should_save. metrics: {'eval_loss': 6.508499622344971}
in control.should_save. metrics: {'eval_loss': 3.804018259048462}
in control.should_save. metrics: {'eval_loss': 2.631948471069336}


TrainOutput(global_step=1200, training_loss=3.7530531819661457, metrics={'train_runtime': 14961.581, 'train_samples_per_second': 1.604, 'train_steps_per_second': 0.08, 'total_flos': 1233477028800000.0, 'train_loss': 3.7530531819661457, 'epoch': 4.0})

In [21]:
  #get class weight.  Only need if using customTrainer
#class_weights = get_class_weights(train_df)


In [22]:
print(torch.__version__)

2.0.1+cpu


In [23]:
  #conduct evaluation 
  keys = []
  precision = []
  recall = []
  f1 = []

In [24]:
logits = trainer.predict(test_dataset)[0] #get the logits 

test_pred = np.argmax(logits,axis=-1)
y_test= torch.tensor(test_df['iSampleMaterial'].values.tolist())

print (y_test)

res = classification_report(y_test,test_pred,output_dict=True)

tensor([ 5,  7, 10,  ...,  5, 10,  5])


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [25]:
#print(logits)
#print(test_dataset.__getitem__(3))

In [26]:
# print(test_dataset.encodings)

In [27]:
for key, score in res.items():
  if key.isdigit():
    keys.append((le.inverse_transform([int(key)])[0]))
    precision.append(round(score['precision'],2))
    recall.append(round(score['recall'],2))
    f1.append(round(score['f1-score'],2))
    print("%s \t\t\t %0.2f \t %0.2f \t %0.2f"% (le.inverse_transform([int(key)])[0],score['precision'], score['recall'], score['f1-score']))

mat:biogenicnonorganicmaterial 			 0.00 	 0.00 	 0.00
mat:gas 			 0.00 	 0.00 	 0.00
mat:liquidwater 			 0.00 	 0.00 	 0.00
mat:material 			 0.00 	 0.00 	 0.00
mat:mineral 			 0.00 	 0.00 	 0.00
mat:mixedsoilsedimentrock 			 0.00 	 0.00 	 0.00
mat:organicmaterial 			 0.13 	 1.00 	 0.23
mat:otheranthropogenicmaterial 			 0.00 	 0.00 	 0.00
mat:particulate 			 0.00 	 0.00 	 0.00
mat:rock 			 0.00 	 0.00 	 0.00
mat:rockorsediment 			 0.00 	 0.00 	 0.00
mat:sediment 			 0.00 	 0.00 	 0.00
mat:soil 			 0.00 	 0.00 	 0.00


In [28]:
#write the results to excel and save
result_df = pd.DataFrame(data=zip(keys,precision,recall,f1), columns=['label','precision','recall','f1'])
result_output_dir ="output/sesar_result.xlsx"
result_df.to_excel(result_output_dir)
print("Macro average: ",f1_score(y_test,test_pred,average='macro'))

Macro average:  0.017578524221571166
