In [1]:
import pandas as pd
import json

In [2]:
df_CSM = pd.read_excel('data/CSM Resolution Notes Evaluation Sept 7th.xlsx')
df_HRSD = pd.read_excel('data/HRSD Resolution Notes Evaluation Sept 6th.xlsx')
df_ITSM = pd.read_excel('data/ITSM Resolution Notes Evaluation Sept 7th.xlsx')

In [3]:
for context in df_CSM['Case Data']:
    

Index(['Index', 'task_id', 'state', 'Case Data',
       'NOW LLM Generated Resolution Notes', 'Reviewer',
       'Resolution Notes is Perfect\n(Yes/No)',
       'Missed Important Details\n(1/2/3)', 'Had Made-up Details\n(1/2/3)',
       'Mixed up who said what\n(1/2/3)',
       'Had too much unnecessary info\n(1/2/3)', 'Golden Resolution Notes',
       'Reviewer's notes', 'Linguist Comments', 'Linguist'],
      dtype='object')

# Checking Resolution notes defect

In [1]:
import os, pandas as pd, numpy as np, re, json
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer, AutoConfig
import torch
import warnings
warnings.filterwarnings("ignore")
from tqdm import tqdm
from preprocessing.preprocessing_pipeline import PreProcessingPipeline
from transformers import AutoTokenizer
from transformers import LogitsProcessorList
from custom_logits_processor import (NoRepeatNGramLogitsProcessor,)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_id = '/mnt/atg_platform/models/now_llm_chat/v0.4.0-rc2'
cache = 'cache_model'
eval_data = pd.read_json('data/resolution_notes_0.4.0.json')
print(eval_data.columns)

Index(['task_id', 'context', 'prompt', 'response', 'golden resolution notes',
       'source_type', 'hallucination_summac_asym_precision',
       'hallucination_summac_sym_precision', 'completeness_summac_asym_recall',
       'completeness_summac_sym_recall', 'entity_hallucination_score'],
      dtype='object')


In [3]:
# creating model
model_base = AutoModelForCausalLM.from_pretrained(
    model_id,
    cache_dir=cache,
    trust_remote_code=True,
    use_cache=True,
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,
)
model_base.to("cuda")
tokenizer = AutoTokenizer.from_pretrained(model_id)

In [4]:
tokenizer.eos_token_id = 49155
tokenizer.eos_token

'<|end|>'

In [13]:
def generate_summary(model_base, tokenizer, eval_data):
    outputs_response = []
    for idx, record in tqdm(eval_data.iterrows()):
        inputs = record["prompt"]
        custom_logits_processors = LogitsProcessorList()
        no_repeat_ngram_size = 10
        custom_logits_processors.append(
            NoRepeatNGramLogitsProcessor(no_repeat_ngram_size, tokenizer)
        )

        # preprocessing
        steps = ["remove_previous_summary", "truncate_input"]
        pipeline = PreProcessingPipeline()

        def preprocess(text):
            # text = (
            #     text
            #         .replace("<|system|>", "<|system|>\n")
            #         .replace("<|endoftext|><|customer|>", "<|end|>\n<|user|>\n")
            #         .replace("<|endoftext|><|agent|>", "<|end|>\n<|assistant|>")
            # )

            return pipeline.preprocess(text, tokenizer, steps).preprocessed_input

        inputs = preprocess(inputs)
        
        # need to add current inputs
        #inputs = '<|system|>\n' + inputs + '<|end|>\n<|user|>\n' + prompt + '<|end|>\n<|assistant|>'
        
        cuda_device = 'cuda:0'
        inputs_tokenized = tokenizer(inputs, padding=True, return_tensors="pt")

        with torch.no_grad():
            inputs_tokenized = {k: v.to(cuda_device) for k, v in inputs_tokenized.items()}
            outputs = model_base.generate(
                    input_ids=inputs_tokenized["input_ids"],
                    attention_mask=inputs_tokenized["attention_mask"],
                    max_new_tokens=500,
                    temperature=0.3,
                    num_beams=1,
                    use_cache=True,
                    do_sample=True,
                    logits_processor=custom_logits_processors,
                    num_return_sequences=1,
                    repetition_penalty=1.05,
                    eos_token_id=tokenizer.eos_token_id,

            )

            outputs = outputs[:, inputs_tokenized["input_ids"].shape[1] :]

            single_result = tokenizer.batch_decode(
                outputs.detach().cpu().numpy(), skip_special_tokens=True
            )

            outputs_response.append(single_result[0])
    # add outputs to eval_data
    eval_data["response_with_eos_49155"] = outputs_response


In [14]:
generate_summary( model_base, tokenizer, eval_data)

175it [05:39,  1.94s/it]


In [18]:
eval_data.to_excel('OUTPUTS/resolution_notes_0.4.0_with_eos_49155.xlsx')

In [17]:
with open('OUTPUTS/resolution_notes_0.4.0_with_eos_49155.json','w') as f:
    json.dump(eval_data.to_dict(), f)

In [2]:
import json
with open('OUTPUTS/10_13_2023/inputs_P1_outputs.json','r') as f:
    data = json.load(f)

for item in data:
    item["Hallucination"] = 1

with open('OUTPUTS/10_13_2023/inputs_P1_outputs_hal.json','w') as f:
    json.dump(data, f, indent=4)

In [3]:
import json
with open('OUTPUTS/10_13_2023/inputs_P1_outputs_hal.json','r') as f:
    data = json.load(f)

ones = []
zeros = []
for item in data:
    if item["Hallucination"] == 1:
        ones.append(item)
    else:
        zeros.append(item)

print(len(ones))
print(len(zeros))
print(len(data))

102
14
116


In [5]:
import json
import pandas as pd
with open('OUTPUTS/10_16_2023/inputs_P1_outputs_RS2.json','r') as f:
    data = json.load(f)
df = pd.DataFrame(data)
# add columns
df["COMPLETENESS Missed important details (1/2/3)"] = ""
df["HALLUCINATION Had made up details (1/2/3)"] = ""
df["Put details in wrong places (1/2/3)"] = ""
df["Had unnecessary info (1/2/3)"] = ""
df["Reviewer Notes (COMPLETENESS)"] =""
df["Reviewer Notes (HALLUCINATION)"] =""

df.to_excel('OUTPUTS/10_16_2023/inputs_P1_outputs_RS2.xlsx') 