# Goal: end to end inference and evaluation

given a csv, make predictions and evaluate predictions, then return results in a csv

In [1]:
import pandas as pd

data_path = f"../make_test_csv/test.csv"
# Ensure to include 'ships_idx' in the fields list
fields = ['ships_idx', 'tag_name', 'tag_description', 'thing', 'property']

# Load the dataset
try:
    df = pd.read_csv(data_path, skipinitialspace=True, usecols=fields)
except UnicodeDecodeError:
    df = pd.read_csv(data_path, skipinitialspace=True, usecols=fields, encoding='ISO-8859-1')


df = df.dropna()
selected_columns = ['thing', 'property', 'tag_description']
df[selected_columns] = df[selected_columns].astype("string")

In [2]:
# construct dataset
from datasets import Dataset

def process_df(df):
    output_list = [{
        'translation': {
            'ships_idx': row['ships_idx'],
            'tag_description': row['tag_description'],
            'thing_property': f"<THING_START>{row['thing']}<THING_END><PROPERTY_START>{row['property']}<PROPERTY_END>",
            'answer_thing': f"{row['thing']}",
            'answer_property':f"{row['property']}",
        }
    } for _, row in df.iterrows()]

    return output_list

test_dataset = Dataset.from_list(process_df(df))

In [3]:
from transformers.pipelines.pt_utils import KeyDataset
from transformers import pipeline
from tqdm import tqdm

model_checkpoint = "train_tp_checkpoint_40/checkpoint-2320"

from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("t5-base", return_tensors="pt")
# Define additional special tokens
additional_special_tokens = ["<THING_START>", "<THING_END>", "<PROPERTY_START>", "<PROPERTY_END>"]
# Add the additional special tokens to the tokenizer
tokenizer.add_special_tokens({"additional_special_tokens": additional_special_tokens})
# tokenizer.add_special_tokens({'sep_token': "<SEP>"})


pipe = pipeline("translation_XX_to_YY", model=model_checkpoint, tokenizer=tokenizer, return_tensors=True, max_length=64, device=0)

# check what token-ids the special tokens are
# tokenizer.encode("<THING_START><THING_END><PROPERTY_START><PROPERTY_END>")


In [4]:
def extract_seq(tokens, start_value, end_value):
    if start_value not in tokens or end_value not in tokens:
        return None  # Or handle this case according to your requirements
    start_id = tokens.index(start_value)
    end_id = tokens.index(end_value)

    return tokens[start_id+1:end_id]


In [5]:
# problem, what if end tokens are not in?
def process_tensor_output(output):
    tokens = output[0]['translation_token_ids'].tolist()
    thing_seq = extract_seq(tokens, 32100, 32101) # 32100 = <THING_START>, 32101 = <THING_END>
    property_seq = extract_seq(tokens, 32102, 32103) # 32102 = <PROPERTY_START>, 32103 = <PROPERTY_END>
    p_thing = None
    p_property = None
    if (thing_seq is not None):
        p_thing =  tokenizer.decode(thing_seq)
    if (property_seq is not None):
        p_property =  tokenizer.decode(property_seq)
    return p_thing, p_property

In [6]:
p_thing_list = []
p_property_list = []
print("making inference on test set")
for out in tqdm(pipe(KeyDataset(test_dataset["translation"], "tag_description"), batch_size=256)):
    p_thing, p_property = process_tensor_output(out)
    p_thing_list.append(p_thing)
    p_property_list.append(p_property)
print("inference done")

making inference on test set


21140it [00:40, 525.73it/s]                   

inference done





In [8]:
answer_thing = [ item['answer_thing'] for item in test_dataset["translation"]]
answer_property = [ item['answer_property'] for item in test_dataset["translation"]]
def correctness_test(input, reference):
    assert(len(input) == len(reference))
    correctness_list = []
    for i in range(len(input)):
        if (input[i] == reference[i]):
            correctness_list.append(True)
        else:
            correctness_list.append(False)
    return correctness_list

# compare with answer to evaluate correctness
thing_correctness = correctness_test(p_thing_list, answer_thing)
print("thing prediction accuracy", sum(thing_correctness)/len(thing_correctness))
property_correctness = correctness_test(p_property_list, answer_property)
print("property prediction accuracy", sum(property_correctness)/len(property_correctness))

dict = {'p_thing': p_thing_list, 
        'p_property': p_property_list,
        'p_thing_correct': thing_correctness,
        'p_property_correct': property_correctness}
df_pred = pd.DataFrame(dict)

thing prediction accuracy 0.4195364238410596
property prediction accuracy 0.38680227057710503


In [9]:
# load dataset again, this time with all fields, except last 5 fields
data_path = f"test.csv"
# Load the dataset

fields = ['thing',  'property', 'ships_idx', 'tag_name', 'equip_type_code', 'tag_description',
        'tx_period', 'tx_type', 'on_change_yn', 'scaling_const', 'signal_type', 'min',
        'max', 'unit', 'data_type', 'description', 'updated_time', 'status_code',
        'is_timeout']

try:
    df_orig = pd.read_csv(data_path, usecols=fields, skipinitialspace=True)
except UnicodeDecodeError:
    df_orig = pd.read_csv(data_path, usecols=fields, skipinitialspace=True, encoding='ISO-8859-1')

# combine prediction dataframe
df_final = pd.concat([df_orig, df_pred], axis=1)
df_final.to_csv('test_with_predictions.csv')