# P-Tuning a Model Using the Nemo Service 


In [16]:
import json
import random
import pandas as pd
import os

from nemollm.api import NemoLLM
from llm_utils.nemo_service_models import NemoServiceBaseModel

---

## Load Data

In [4]:
df = pd.read_csv('/workspace/dli/2-PubMedQA/data_62124.csv')
df.head()
print(df.shape)

Unnamed: 0.1,Unnamed: 0,good address,compare address,label
0,0,14361 cupcake terrace,14361 cupofcake terrace,1
1,1,13823 principle terrace,2689 principal highway,0
2,2,908 toolbox street,1209 main highway,0
3,3,6785 wheel avenue,1809 main way,0
4,4,3944 electric screwdriver road,3944 elecc screwdriver,1


In [5]:
def generate_address_prompt(good_address, compare_address, numLabel):
    prompt = ""

    prompt += f"""OBJECTIVE: Some addresses look the same, but indicate different physical locations and therefore they should not be linked, one indication that two addresses should not be linked is they have different house numbers. Alternatively, some addresses are not exact matches but should be linked, they often dont match because of a mispelling in one of the street names.  For example, These pair of addresses should be matched: 990 sizzling place and 990 sizlig place. while this pair of streets 67 metal way and 87 petal drive indicate different address that should not be matched.\n"""
    
    question_text = f'Should the following two addresses be linked {good_address} and {compare_address} ?'
    prompt += f"QUESTION: {question_text}\n"
    prompt += f"ANSWER (yes|no): "

    label = ['yes' if numLabel == 1 else 'no']
    
    return {"prompt": prompt, "completion": label[0]}


## Format Data

In [6]:
# generate training data 

with open('/workspace/dli/2-PubMedQA/address-train-data.jsonl', 'w') as f:
    for i in range(250):
        row = generate_address_prompt(df['good address'][i], df['compare address'][i], df['label'][i])
        f.write(json.dumps(row) + '\n')

In [7]:
# generate validation data 

with open('/workspace/dli/2-PubMedQA/address-valid-data.jsonl', 'w') as f:
    for i in range(250, 295):
        row = generate_address_prompt(df['good address'][i], df['compare address'][i], df['label'][i])
        f.write(json.dumps(row) + '\n')

In [8]:
# generate test data 

with open('/workspace/dli/2-PubMedQA/address-test-data.jsonl', 'w') as f:
    for i in range(295, 305):
        row = generate_address_prompt(df['good address'][i], df['compare address'][i], df['label'][i])
        f.write(json.dumps(row) + '\n')

---

## NeMo Service Mocking

There are two scenarios in this workshop where instead of you working directly with the real NeMo Service, you are going to interact with mocks. The first, which we will discuss here, is when uploading data files. The primary reason for this is that with many students interacting with the same NeMo Service account at the same time, keeping track of all your specific files will get cumbersome quick.

Rather, we will provide mock functions that will simulate the real functions you would call if you were working with your own account.

---

## Upload Data to NeMo Service

Uploading data to the NeMo Service is straight forward. Typically you would create a `conn` object with the NeMo Service as we did in the first notebook and then use its `upload` method, passing it the file path of the file you would like to upload. In our case, we will use a mock `upload` method that we have provided for you, and view the (mock) response that it generates.

In [10]:
api_key = os.getenv('NGC_API_KEY')
api_host = os.getenv('API_HOST')

In [11]:
conn = NemoLLM(
    api_host=api_host,
    api_key=api_key
)

In [None]:
train_response = conn.upload('/workspace/dli/2-PubMedQA/address-train-data.jsonl')

In [None]:
validation_response = conn.upload('/workspace/dli/2-PubMedQA/address-valid-data.jsonl')

In [None]:
train_response

In [None]:
{'id': 'c00eff6b-937c-47f1-9357-cf87af5972ea',
 'name': '/workspace/dli/2-PubMedQA/address-train-data.jsonl',
 'size': 232390,
 'number_of_samples': 150,
 'format': 'jsonl',
 'usage_category': 'dataset',
 'org_id': 'abcdefghijkl',
 'user_id': 'abcdefghijklmnopqrstuvwxyz',
 'ready_at': '0001-01-01T00:00:00Z',
 'created_at': '2024-05-28T16:46:48.519447Z'}

In [None]:
validation_response

In [None]:
{'id': 'c00eff6b-937c-47f1-9357-cf87af5972ea',
 'name': '/workspace/dli/2-PubMedQA/address-valid-data.jsonl',
 'size': 232390,
 'number_of_samples': 150,
 'format': 'jsonl',
 'usage_category': 'dataset',
 'org_id': 'abcdefghijkl',
 'user_id': 'abcdefghijklmnopqrstuvwxyz',
 'ready_at': '0001-01-01T00:00:00Z',
 'created_at': '2024-05-28T16:46:48.519447Z'}

## Create Customization (P-Tuned Model)

In [None]:
response_8b = conn.create_customization(
    model=PtuneableModels.gpt8b.value,
    name='address-8b-token-50-batch-8-epochs-3',
    description="P-tuning for custom address model.",
    batch_size=8,
    num_virtual_tokens=10,
    validation_dataset_file_id=validate_dataset_file_id,   
    training_dataset_file_id=training_dataset_file_id,   
    epochs=3
)

## Evaluate Results 

In [None]:
prompts_and_answers = json.load(open('/workspace/dli/2-PubMedQA/address-test-data.jsonl','r'))

To use a p-tuned model with the NeMo Service we simply need to obtain the customization_id obtained after performing the customization. Once we have it, we can include it in calls to the base model that was p-tuned

In [None]:
llms['gpt8b'] = NemoServiceBaseModel(model=PtuneableModels.gpt8b.value, customization_id='1780214f-8582-4e1c-89cd-7488f089e167')

In [None]:
for prompt, answer in prompts_and_answers:
    response = llms['gpt8b'].generate(prompt, tokens_to_generate=1).strip()
    print(f'Response from model: {response}')
    print(f'Actual answer: {answer}')
    correct = response == answer
    print(f'Response from model correct: {correct}\n')

p-tuning over even 3 epochs on even 200 samples should return better performance than prompt engineering alone