In [2]:
!pip install -q datasets transformers evaluate peft trl bitsandbytes accelerate
!pip install --upgrade -q accelerate
!pip install -q python-Levenshtein
!pip install -q langchain langchain-openai

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip instal

In [6]:
import os
import torch
from datasets import load_from_disk, load_metric
from transformers import AutoTokenizer, HfArgumentParser, AutoModelForCausalLM, BitsAndBytesConfig, pipeline

def get_model_id(model_type, run_name, project_name, checkpoint_id):
    return os.path.join(model_type, "model_output", project_name, run_name, checkpoint_id)

project_config = {
    "survey-json": {
        "project_name": "survey-json-model-inst",
        "train_dataset_path": "./datasets/survey_json_datasets_instruction_train",
        "test_dataset_path": "./datasets/survey_json_datasets_instruction_test",
    },
    "schema": {
        "project_name": "schema-model-inst",
        "train_dataset_path": "./datasets/schema_datasets/schema_data_train",
        "test_dataset_path": "./datasets/schema_datasets/schema_data_test"
    },
    "paraloq": {
        "project_name": "paraloq-model-inst",
        "train_dataset_path": "./datasets/paraloq/paraloq_data_train",
        "test_dataset_path": "./datasets/paraloq/paraloq_data_test"
    },
    "nous": {
        "project_name": "nous-model-inst",
        "train_dataset_path": "./datasets/nous/nous_data_train",
        "test_dataset_path": "./datasets/nous/nous_data_test"
    }
}

def load_model(model_id="gemma-2b"):
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_quant_type="nf4"
    )

    # Load model
    model = AutoModelForCausalLM.from_pretrained(
        model_id, 
        quantization_config=quantization_config, 
        torch_dtype=torch.float16,
        # attn_implementation="flash_attention_2",
    )

    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side='left')
    tokenizer.pad_token_id = tokenizer.eos_token_id
    return model, tokenizer

def load_project(project="schema", model_id="gemma-2b"):
        
    checkpoint_id = "checkpoint-600"
    
    model_ids = {
        "gemma-7b": get_model_id("gemma", "gemma-7b-qlora-inst", project_config[project]['project_name'], checkpoint_id),
        "gemma-2b": get_model_id("gemma", "gemma-2b-qlora-inst", project_config[project]['project_name'], checkpoint_id),
        "llama2-7b": get_model_id("llama2", "codellama2-7b-qlora-inst", project_config[project]['project_name'], checkpoint_id),
        "llama3-7b": get_model_id("llama3", "llama3-7b-qlora-inst", project_config[project]['project_name'], checkpoint_id),
        "phi-2": get_model_id("phi2", "phi-2-qlora-inst", project_config[project]['project_name'], checkpoint_id),
        "mistral-7b": get_model_id("mistral", "mistral-7b-qlora-inst", project_config[project]['project_name'], checkpoint_id)
    }
    test_dataset = load_from_disk(project_config[project]["test_dataset_path"])
    model, tokenizer = load_model(model_ids[model_id])
    return test_dataset, model, tokenizer

In [7]:
import json
from tqdm import tqdm

In [None]:
def get_predictions(test_dataset, model, tokenizer):
    accuracy_metric = load_metric("accuracy")

    failed_count = 0
    preds = []
    ground_truths = []
    for example in tqdm(test_dataset):    
        pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
        prompt, ground_truth = example['text'].split("[/INST]")
        prompt += "[/INST]"
        result = pipe(prompt, max_new_tokens=2088, do_sample=True)
        output = result[0]['generated_text']
        pred = output.split("[/INST]")[1].strip()
        ground_truth = ground_truth.strip()

        # overall_leven_accuracy = levenshtein_accuracy(pred, ground_truth)
        # sj_accuracy = survey_json_accuracy(pred, ground_truth)
        # accuracy = accuracy_metric.compute(predictions=[pred], references=[ground_truth])

        preds.append(pred)
        ground_truths.append(ground_truth)
        
    return preds, ground_truths

def run(project = "schema", model_id = "gemma-2b"):

    test_dataset, model, tokenizer = load_project(project=project, model_id=model_id)
    generated_responses, actual_responses = get_predictions(test_dataset, model, tokenizer)
    export_date = {
        "generated_responses": generated_responses,
        "actual_responses": actual_responses
    }

    # write to json file
    with open(f'./data/{project}_{model_id}_instruction_generation.json', 'w') as f:
        json.dump(export_date, f)
        
run_list = ["schema", "paraloq", "nous"]
model_list = ["gemma-2b", "gemma-7b", "llama2-7b", "phi-2", "mistral-7b", "llama3-7b"]
for project in run_list:
    for model_id in model_list:
        print(f"Running {project} with {model_id}")
        run(project, model_id)

In [10]:
from typing import List
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI
from utils import setup_api_key

setup_api_key()
    
chain_model = ChatOpenAI(temperature=0)

class Metric(BaseModel):
    setup: str = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")

# Set up a parser + inject instructions into the prompt template.
parser = JsonOutputParser(pydantic_object=Metric)

prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | chain_model | parser

for pred, ground_truth in zip(preds, ground_truths):
    metric_query = f""
    chain.invoke({"query": metric_query})

NameError: name 'preds' is not defined