In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]='1'
import json

import torch
from torch.utils.data import Dataset
import itertools

import time
import torch
import sklearn
import numpy as np
import pandas as pd
import torch.nn as nn
from torch.cuda.amp import autocast
from dataclasses import dataclass
from transformers import AutoTokenizer, AutoModel, AutoConfig, DataCollatorForSeq2Seq
from transformers import Trainer, TrainingArguments, DataCollatorWithPadding, AutoModelForSequenceClassification
from peft import get_peft_config, PeftModel, PeftConfig, get_peft_model, LoraConfig, TaskType 
from transformers.modeling_outputs import CausalLMOutputWithPast
from transformers import BitsAndBytesConfig, LlamaForCausalLM, LlamaModel, LlamaPreTrainedModel
from transformers.data.data_collator import pad_without_fast_tokenizer_warning
from transformers import set_seed


class CustomDataset(Dataset):
    def __init__(self, fname, tokenizer):
        IGNORE_INDEX=-100
        self.inp = []
        self.trg = []
        self.label = []

        PROMPT = '''You are an AI assistant that helps users analyze conversations and solve related problems. Please read the conversation carefully and select the most appropriate answer to the question based on the given options.'''
        answer_dict = {
            "": None,
            "inference_1": 0,
            "inference_2": 1,
            "inference_3": 2
        }

        with open(fname, "r", encoding='utf-8') as f:
            data = json.load(f)

        def make_chat(inp, idx):
            chat = ["[Conversation]"]
            for cvt in inp['conversation']:
                speaker = cvt['speaker']
                utterance = cvt['utterance']
                str(utterance).replace('name', '화자')
                chat.append(f"화자{speaker}: {utterance}")
            chat = "\n".join(chat)

            question = f"[Question]\n위 대화의 {inp['category']}"
            if (ord(inp['category'][-1]) - ord("가")) % 28 > 0:
                question += "으로"
            else:
                question = "로"
            question += " 올바른 지문은?"
                
            chat = chat + "\n\n" + question + "\n\n[Option]\n"
            chat += f"A. {inp[f'inference_{idx[0]}']}\n"
            chat += f"B. {inp[f'inference_{idx[1]}']}\n"
            chat += f"C. {inp[f'inference_{idx[2]}']}"

            return chat
        permutations = list(itertools.permutations([1,2,3]))
        for idx in permutations:
            for example in data:
                chat = make_chat(example["input"], idx)
                message = [
                    {"role": "system", "content": PROMPT},
                    {"role": "user", "content": chat},
                ]
         
                source = tokenizer.apply_chat_template(
                    message,
                    add_generation_prompt=True,
                    return_tensors="pt",
                )
    
                target = ""
                # if example["output"] == f"inference_{idx[0]}":
                #     target = f"A. {example['input'][f'inference_{idx[0]}']}{tokenizer.eos_token}"
                # elif example["output"] == f"inference_{idx[1]}":
                #     target = f"B. {example['input'][f'inference_{idx[1]}']}{tokenizer.eos_token}"
                # elif example["output"] == f"inference_{idx[2]}":
                #     target = f"C. {example['input'][f'inference_{idx[2]}']}{tokenizer.eos_token}"
                    
                target = tokenizer(target,
                          return_attention_mask=False,
                          add_special_tokens=False,
                          return_tensors="pt")
                target["input_ids"] = target["input_ids"].type(torch.int64)
    
                input_ids = torch.concat((source[0], target["input_ids"][0]))
                labels = torch.concat((torch.LongTensor([IGNORE_INDEX] * source[0].shape[0]), target["input_ids"][0]))
                self.inp.append(input_ids)
                self.label.append(labels)
                self.trg.append(answer_dict[example["output"]])

    def __len__(self):
        return len(self.inp)

    def __getitem__(self, idx):
        return self.inp[idx], self.label[idx]


class DataCollatorForSupervisedDataset(object):
    def __init__(self, tokenizer):
        self.tokenizer = tokenizer

    def __call__(self, instances):
        input_ids, labels = tuple([instance[key] for instance in instances] for key in ("input_ids", "labels"))
        input_ids = torch.nn.utils.rnn.pad_sequence(
            [torch.tensor(ids) for ids in input_ids], batch_first=True, padding_value=self.tokenizer.pad_token_id
        )
        labels = torch.nn.utils.rnn.pad_sequence([torch.tensor(lbls) for lbls in labels], batch_first=True, padding_value=-100)
        return dict(
            input_ids=input_ids,
            labels=labels,
            attention_mask=input_ids.ne(self.tokenizer.pad_token_id),
        )

In [2]:
class Llama3ForSFT(LlamaPreTrainedModel):
    _tied_weights_keys = ["lm_head.weight"]
    def __init__(self, config):
        super().__init__(config)
        self.model = LlamaModel(config)
        self.vocab_size = config.vocab_size
        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
        self.post_init()

    def forward(
        self,
        input_ids= None,
        attention_mask= None,
        position_ids = None,
        past_key_values= None,
        inputs_embeds= None,
        labels= None,
        use_cache= None,
        output_attentions= None,
        output_hidden_states = None,
        return_dict= None,
        cache_position = None,
    ):
        outputs = self.model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            position_ids=position_ids,
            past_key_values=past_key_values,
            inputs_embeds=inputs_embeds,
            use_cache=use_cache,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
            cache_position=cache_position,
        )
        hidden_states = outputs[0]
        if self.config.pretraining_tp > 1:
            lm_head_slices = self.lm_head.weight.split(self.vocab_size // self.config.pretraining_tp, dim=0)
            logits = [F.linear(hidden_states, lm_head_slices[i]) for i in range(self.config.pretraining_tp)]
            logits = torch.cat(logits, dim=-1)
        else:
            logits = self.lm_head(hidden_states)
        logits = logits.float()

        loss = None
        if labels is not None:
            # Shift so that tokens < n predict n
            shift_logits = logits[..., :-1, :].contiguous()
            shift_labels = labels[..., 1:].contiguous()
            # Flatten the tokens
            loss_fct = nn.CrossEntropyLoss()
            shift_logits = shift_logits.view(-1, self.config.vocab_size)
            shift_labels = shift_labels.view(-1)
            # Enable model parallelism
            shift_labels = shift_labels.to(shift_logits.device)

            fake_label_tokens_ids = torch.tensor([128250],device=shift_labels.device)
            label_tokens_ids = torch.tensor(LABEL_IDS,device=shift_labels.device)
#             index_mapping = {value.item(): idx for idx, value in enumerate(label_tokens_ids)}
#             true_labels = shift_labels[torch.isin(shift_labels, label_tokens_ids)]
#             true_labels = torch.tensor([index_mapping[label.item()] for label in true_labels], device=true_labels.device)
            true_logits = shift_logits[torch.isin(shift_labels, fake_label_tokens_ids)][:,label_tokens_ids]
#             loss = loss_fct(true_logits, true_labels)

        return CausalLMOutputWithPast(
            loss=loss,
            logits=logits,
        )

In [3]:

import argparse
import json
import tqdm

import torch
import numpy
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

# fmt: off
number = 9
parser = argparse.ArgumentParser(prog="test", description="Testing about Conversational Context Inference.")

g = parser.add_argument_group("Common Parameter")
g.add_argument("--output", default=f'quan8_auto.json', type=str, required=False, help="output filename")
g.add_argument("--model_id", default='kihoonlee/STOCK_SOLAR-10.7B', type=str, required=False, help="huggingface model id")
g.add_argument("--tokenizer", type=str, help="huggingface tokenizer")
g.add_argument("--device", default='cuda', type=str, required=False, help="device to load the model")
# fmt: on


def main(args):
    # model = AutoModelForCausalLM.from_pretrained(
    #     args.model_id,
    #     torch_dtype=torch.bfloat16,
    #     device_map=args.device,
    #     use_cache=False,
    # )

    bnb_config = BitsAndBytesConfig(
        load_in_8bit=True,
        # bnb_4bit_use_double_quant=True,
        # bnb_4bit_quant_type="nf4",
        # bnb_4bit_compute_dtype=torch.bfloat16
    )
    
    model = AutoModelForCausalLM.from_pretrained(
    args.model_id,
    use_cache=False,
    device_map='auto',
    quantization_config=bnb_config,
)
    
    model = PeftModel.from_pretrained(model,
                                  #f'fold{number}/checkpoint-2040',
                                      'last_d/checkpoint-1890'
                                 )
    
    model.eval()

    if args.tokenizer == None:
        args.tokenizer = args.model_id
    tokenizer = AutoTokenizer.from_pretrained(args.tokenizer)
    tokenizer.padding_side = 'right'
    tokenizer.chat_template = "{% for message in messages %}{% if message['role'] == 'system' %}{% if message['content']%}{{'### System:\n' + message['content']+'\n\n'}}{% endif %}{% elif message['role'] == 'user' %}{{'### User:\n' + message['content']+'\n\n'}}{% elif message['role'] == 'assistant' %}{{'### Assistant:\n'  + message['content']}}{% endif %}{% if loop.last and add_generation_prompt %}{{ '### Assistant:\n' }}{% endif %}{% endfor %}"

    global LABEL_IDS
    LABEL_IDS= [tokenizer(i, add_special_tokens=False)['input_ids'][0] for i in ['A','B','C']]
    dataset = CustomDataset("test.json", tokenizer)

    answer_dict = {
        0: "inference_1",
        1: "inference_2",
        2: "inference_3",
    }

    with open("test.json", "r", encoding="utf-8") as f:
        result = json.load(f)

    answer = []
    with torch.no_grad():
        for idx in tqdm.tqdm(range(len(dataset))):
            inp, labels = dataset[idx]
            outputs = model(
                inp.to('cuda').unsqueeze(0),
                labels=labels.to('cuda')
            )
            logits = outputs.logits[:,-1].flatten()
            probs = (
                torch.nn.functional.softmax(
                    torch.tensor(
                        [
                            logits[tokenizer.vocab['A']],
                            logits[tokenizer.vocab['B']],
                            logits[tokenizer.vocab['C']],
                        ]
                    ),
                    dim=0,
                )
                .detach()
                .cpu()
                .to(torch.float32)
                .numpy()
            )
            answer.append(numpy.argmax(probs))
            
    answer = np.array(answer)
    answer = answer.reshape(6, len(answer)//6)
    for i, custom_dict in enumerate(list(itertools.permutations([0,1,2]))):
        custom_dict = {value: index for index, value in enumerate(custom_dict)}
        print(custom_dict)
        answer[i] =  np.array([custom_dict[value] for value in answer[i].tolist()])
        
    from scipy import stats
    mode_values = stats.mode(answer, axis=0).mode
    for idx, label in enumerate(mode_values):
        result[idx]["output"] = answer_dict[label]
        print(answer_dict[label])
    with open(args.output, "w", encoding="utf-8") as f:
        f.write(json.dumps(result, ensure_ascii=False, indent=4))


In [4]:
if __name__ == "__main__":
    exit(main(parser.parse_args([])))

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

  attn_output = torch.nn.functional.scaled_dot_product_attention(
100%|██████████████████████████████████████████████████████████████████████████████| 3630/3630 [32:55<00:00,  1.84it/s]

{0: 0, 1: 1, 2: 2}
{0: 0, 2: 1, 1: 2}
{1: 0, 0: 1, 2: 2}
{1: 0, 2: 1, 0: 2}
{2: 0, 0: 1, 1: 2}
{2: 0, 1: 1, 0: 2}
inference_2
inference_3
inference_2
inference_3
inference_1
inference_2
inference_2
inference_1
inference_2
inference_2
inference_3
inference_2
inference_1
inference_3
inference_3
inference_3
inference_2
inference_2
inference_2
inference_1
inference_1
inference_2
inference_1
inference_2
inference_3
inference_3
inference_1
inference_2
inference_1
inference_3
inference_1
inference_2
inference_3
inference_3
inference_2
inference_1
inference_3
inference_2
inference_2
inference_2
inference_1
inference_3
inference_1
inference_1
inference_1
inference_1
inference_3
inference_3
inference_2
inference_1
inference_1
inference_3
inference_1
inference_1
inference_2
inference_2
inference_3
inference_3
inference_2
inference_1
inference_1
inference_1
inference_2
inference_3
inference_2
inference_3
inference_3
inference_3
inference_2
inference_1
inference_3
inference_3
inference_1
inference_




In [5]:
# import json

# # 첫 번째 JSON 파일 읽기
# with open('train.json', 'r', encoding="utf-8") as file:
#     json_data1 = json.load(file)

# # 두 번째 JSON 파일 읽기
# with open('dev.json', 'r', encoding="utf-8") as file:
#     json_data2 = json.load(file)

# # 두 JSON 데이터를 병합
# merged_data = []
# for i in json_data1:
#     merged_data.append(i)

# print(len(merged_data))

# for i in json_data2:
#     merged_data.append(i)

# print(len(merged_data))

# with open('merge.json', "w", encoding="utf-8") as f:
#      f.write(json.dumps(merged_data, ensure_ascii=False, indent=4))
    
# print("두 JSON 파일이 성공적으로 병합되었습니다.")