# Log inference

In [20]:
import os
from dotenv import find_dotenv, load_dotenv
dotenv_path = find_dotenv()
load_dotenv(dotenv_path)

import openai
import wandb
from pathlib import Path
import pandas as pd
import numpy as np
from tqdm import tqdm

from math import exp

In [2]:
# create eval job
project_name = "GPT-3 blog title"
run = wandb.init(project=project_name, job_type='eval')

[34m[1mwandb[0m: Currently logged in as: [33mbenneo[0m. Use [1m`wandb login --relogin`[0m to force relogin


retrive config parameteres from job file

In [3]:
# choose a fine-tuned model
artifact = run.use_artifact(
    f"benneo/{project_name}/fine_tune_details:v1", type="fine_tune_details"
)

all details of job is in metadata

In [5]:
artifact.metadata

{'id': 'ft-3Svmi2GPLKTLnZAQCmHlEYql',
 'model': 'babbage',
 'object': 'fine-tune',
 'status': 'succeeded',
 'created_at': 1669865236,
 'updated_at': 1669867527,
 'hyperparams': {'n_epochs': 4,
  'batch_size': 256,
  'prompt_loss_weight': 0.1,
  'classification_n_classes': 2,
  'learning_rate_multiplier': 0.1,
  'classification_positive_class': ' good',
  'compute_classification_metrics': True},
 'result_files': [{'id': 'file-mhWxYNRuWpzRWT3ZHAua58We',
   'bytes': 36754,
   'object': 'file',
   'status': 'processed',
   'purpose': 'fine-tune-results',
   'filename': 'compiled_results.csv',
   'created_at': 1669867527,
   'status_details': None}],
 'training_files': [{'id': 'file-eRRY7tNbGl7v8c7GLqDr3NRn',
   'bytes': 3014524,
   'object': 'file',
   'status': 'processed',
   'purpose': 'fine-tune',
   'filename': 'file',
   'created_at': 1669849602,
   'status_details': None}],
 'organization_id': 'org-Kz5UVJ3lj9OBEwe4ukIaOuoU',
 'fine_tuned_model': 'babbage:ft-personal:blog-title-score

add metadata to eval run config

In [6]:
wandb.config.update({k:artifact.metadata[k] for k in ['fine_tuned_model', 'model', 'hyperparams']})

access model id

In [7]:
fine_tuned_model = artifact.metadata['fine_tuned_model']
fine_tuned_model

'babbage:ft-personal:blog-title-scorer-2022-12-01-04-05-26'

retrive latest validation file

In [10]:
artifact_valid = run.use_artifact('benneo/GPT-3 blog title/medium_valid.jsonl:latest', type='validation_files')
valid_file = artifact_valid.get_path('prompts_prepared_valid.jsonl').download()
valid_file

'./artifacts/medium_valid.jsonl:v0/prompts_prepared_valid.jsonl'

In [11]:
df = pd.read_json(valid_file, orient='records', lines=True)
df

Unnamed: 0,prompt,completion
0,Title: Making serverless variables work for yo...,good
1,Title: The Beginners Guide to Similarity Match...,good
2,Title: How we crafted a career development fra...,good
3,Title: Photographers are obsolete. Are they? ->,good
4,Title: 5 Cool AI-Powered Drug Discovery Tools ->,good
...,...,...
995,Title: From Pandas to PySpark with Koalas ->,bad
996,Title: 7 Advanced Python Concepts You Might Wa...,bad
997,Title: How to Create a Generator in Python ->,bad
998,Title: Picking Your Next Meal Like a Data Scie...,bad


In [33]:
n_samples = 20
df = df.iloc[:n_samples]

In [24]:
r = openai.Completion.create(
    model=fine_tuned_model,
    prompt=df.iloc[1]["prompt"],
    temperature=0,  # must be 0
    max_tokens=1,  # must be 1
    logprobs=1,  # returns the probability
)
r

<OpenAIObject text_completion id=cmpl-6IWM6vM86v162P4sIgqiWRL8jZADO at 0x12e19acc0> JSON: {
  "choices": [
    {
      "finish_reason": "length",
      "index": 0,
      "logprobs": {
        "text_offset": [
          64
        ],
        "token_logprobs": [
          -0.47429797
        ],
        "tokens": [
          " good"
        ],
        "top_logprobs": [
          {
            " good": -0.47429797
          }
        ]
      },
      "text": " good"
    }
  ],
  "created": 1669872298,
  "id": "cmpl-6IWM6vM86v162P4sIgqiWRL8jZADO",
  "model": "babbage:ft-personal:blog-title-scorer-2022-12-01-04-05-26",
  "object": "text_completion",
  "usage": {
    "completion_tokens": 1,
    "prompt_tokens": 15,
    "total_tokens": 16
  }
}

In [27]:
pred = r["choices"][0]["text"]
print(pred)
class_prob = exp(r["choices"][0]["logprobs"]["token_logprobs"][0])
print(class_prob)

 good
0.6223217917138967


In [28]:
def get_pred(res):
    pred = res["choices"][0]["text"]
    class_prob = exp(res["choices"][0]["logprobs"]["token_logprobs"][0])

    if pred == " bad":
        class_prob = 1.0 - class_prob

    return pred, class_prob

In [34]:
df

Unnamed: 0,prompt,completion
0,Title: Making serverless variables work for yo...,good
1,Title: The Beginners Guide to Similarity Match...,good
2,Title: How we crafted a career development fra...,good
3,Title: Photographers are obsolete. Are they? ->,good
4,Title: 5 Cool AI-Powered Drug Discovery Tools ->,good
5,Title: Solving XOR with a single Perceptron ->,good
6,Title: A Beginners Guide: Cryptocurrency ->,good
7,Title: These are the highest paying freelance ...,good
8,Title: The Secret Behind How Chinese Startups ...,good
9,Title: Changing sync web server to async in 2 ...,good


In [31]:
data = []

for _, row in tqdm(df.iterrows()):
    prompt = row['prompt']
    r = openai.Completion.create(
        model=fine_tuned_model,
        prompt=prompt,
        temperature=0,  # must be 0
        max_tokens=1,  # must be 1
        logprobs=1,  # returns the probability
    )

    prompt = row['prompt']
    target = row['completion']
    pred, class_prob = get_pred(r)
    data.append([prompt, target, pred, class_prob])

10it [00:01,  5.29it/s]


In [40]:
pd.DataFrame(data)

Unnamed: 0,0,1,2,3
0,Title: Making serverless variables work for yo...,good,bad,0.491576
1,Title: The Beginners Guide to Similarity Match...,good,good,0.622322
2,Title: How we crafted a career development fra...,good,good,0.511328
3,Title: Photographers are obsolete. Are they? ->,good,good,0.502483
4,Title: 5 Cool AI-Powered Drug Discovery Tools ->,good,bad,0.411839
5,Title: Solving XOR with a single Perceptron ->,good,good,0.579067
6,Title: A Beginners Guide: Cryptocurrency ->,good,good,0.713198
7,Title: These are the highest paying freelance ...,good,good,0.542597
8,Title: The Secret Behind How Chinese Startups ...,good,good,0.635397
9,Title: Changing sync web server to async in 2 ...,good,good,0.501522


In [35]:
prediction_table = wandb.Table(columns=['prompt', 'target', 'pred', 'good_prob'], data=data)

In [37]:
wandb.log({'predictions': prediction_table})

In [38]:
wandb.finish()