In [1]:
import sys, json, re, collections
import pandas as pd
from pathlib import Path
from decouple import config
sys.path.append("../src/")
from llm_helpers import openai_client

pd.set_option('display.max_columns', 1000, 'display.width', 1000, 'display.max_rows',1000)

data_dir = Path(".").absolute().parent/"data"
ls = lambda p:print("\n".join(map(str,p.iterdir())))

ls(data_dir)

/home/idan/Documents/llm_workshop/data/sample_apps.parquet


In [2]:
df = pd.read_parquet(data_dir / "sample_apps.parquet").sample(9)
df

Unnamed: 0,bundle_id,title,description,store_url,category_names,ios
7478,com.alibaba.intl.android.apps.poseidon,Alibaba.com - B2B marketplace,What is Alibaba.com?\nAlibaba.com is one of th...,https://play.google.com/store/apps/details?id=...,"SHOPPING,APPLICATION",False
508,1058528141,War Machines：Tanks Combat Game,"HELLO, COMMANDER ! \nWELCOME TO WAR MACHINES, ...",https://apps.apple.com/us/app/war-machines-tan...,"Games,Entertainment,Strategy,Action",True
383,air.com.buffalo_studios.newflashbingo,Bingo Blitz™️ - Bingo Games,Experience your free online bingo game as you ...,https://play.google.com/store/apps/details?id=...,"GAME_BOARD,GAME",False
39810,com.playrix.gardenscapes,Gardenscapes,Welcome to Gardenscapes—the first hit from Pla...,https://play.google.com/store/apps/details?id=...,"GAME_CASUAL,GAME",False
10617,com.audible.application,Audible: Audio Entertainment,Listen to books while relaxing by the fire or ...,https://play.google.com/store/apps/details?id=...,"BOOKS_AND_REFERENCE,APPLICATION",False
1373,1138264921,Match Masters ‎- PvP Match 3,Match 3 games - reinvented! Now with online mu...,https://apps.apple.com/us/app/match-masters-pv...,"Games,Casual,Puzzle",True
51356,com.walmart.android,Walmart Shopping & Grocery,Save money. Live better.\n\nThe Walmart app is...,https://play.google.com/store/apps/details?id=...,"SHOPPING,APPLICATION",False
29752,com.king.candycrushsodasaga,Candy Crush Soda Saga,You loved playing Candy Crush Saga - Start pla...,https://play.google.com/store/apps/details?id=...,"GAME_CASUAL,GAME",False
13376,com.bumble.app,Bumble - Dating. Friends. Bizz,Millions of people have signed up to Bumble to...,https://play.google.com/store/apps/details?id=...,"DATING,APPLICATION",False


## Asking OpenAI directly?

In [3]:
def openai_ask(prompts):
    response = openai_client.completions.create(
        model="text-davinci-003",
        prompt=prompts,
    )
    ret = [choice.text.strip().lower() for choice in response.choices]
    return ret

openai_ask("Is a dog a mammal?")

['yes, a dog is a mammal.']

In [4]:
openai_ask("Is a dog a mammal? answer yes or no")

['yes']

## What about "fuzzier" questions?

In [5]:
openai_ask("I'm facing financial troubles, should I take a loan?")

['no. taking a loan should be a last resort when you are facing']

In [6]:
openai_ask("I'm facing financial troubles, should I take a loan? answer yes or no")

['no']

In [7]:
openai_ask("I'm facing financial troubles, should I take a loan? answer yes or no")

['no']

In [8]:
openai_ask("I'm facing financial troubles, should I take a loan? answer yes or no")

['no. taking a loan can make your financial situation worse, especially if']

## Popular approach: Retry

In [9]:
def yes_no_or_retry(prompt):
    i=0
    answer = ""
    while answer not in {"yes", "no"}:
        answer = openai_ask(prompt)[0].lower()
        i+=1
    return i, answer=="yes"

yes_no_or_retry("I'm facing financial troubles, should I take a loan? answer yes or no")

(2, False)

As we can see, the answer is not always a valid choise "yes" or "no"
# Introducing verbalizers:

## Verbalizers in a sentence:

Mapping a **class** to a **token**, and then choosing the token with the highest logit
## The OpenAI tokenizer

https://platform.openai.com/tokenizer

In [22]:
YES_TOKEN = frozenset([5297, 3763, 3363, 8505, 3363, 3763, 43335, 3763, 21560])
GPT3_YES_TOKEN = frozenset([9642, 14410, 10035, 7566, 14331, 9891])
NO_TOKEN = frozenset([2949, 645, 1400, 3919, 1400, 645, 15285, 645, 8005])
GPT3_NO_TOKEN = frozenset([2822, 5782, 912, 2201, 9173, 2360])


def openai_yes_or_no(prompts):
    response = openai_client.completions.create(
        model="text-davinci-003",
        prompt=prompts,
        temperature=0.7,
        logit_bias={t: 100 for t in YES_TOKEN | NO_TOKEN},
        max_tokens=1,
    )
    ret = [choice.text.strip().lower() == "yes" for choice in response.choices]
    return ret

openai_yes_or_no("Is dog a mammal?")


[True]

In [11]:
openai_yes_or_no("I'm facing financial troubles, should I take a loan? answer yes or no")

[False]

In [12]:
prompt = "I'm facing financial troubles, should I take a loan? answer yes or no"
prompts = [prompt]*10
answers = openai_yes_or_no(prompts)
collections.Counter(answers)

Counter({False: 10})

## Question:
Ask the following questions for each app
1. Is the app for kids?
2. Is the app a shopping app?
3. Is it a game?
4. Is this app a dating app?
5. Does this app have in-app purchases?
6. Is this app a match 3 game?

Compare the naive "yes" or "no" to logit bias

In [26]:
app_description = df.iloc[7]["description"]
print(app_description)

for question in ["Is the app for kids?",
                 "Is the app a shopping app?",
                 "Is it a game?",
                 "Is this app a dating app?",
                 "Does this app have in-app purchases?",
                 "Is this app a match 3 game?"]:
    prompt = "Given the following app description:\n\n" + app_description + "\n\n" + question + "\n\nAnswer yes or no"
    prompts = [prompt]*5
    #answers = openai_ask(prompts)
    answers = openai_yes_or_no(prompts)
    result = collections.Counter(answers)
    print(question)
    print(result)
    print("\n-----------------\n")


You loved playing Candy Crush Saga - Start playing Candy Crush Soda Saga! More divine matching combinations and challenging game modes, brimming with purple soda and fun!

This mouth-watering puzzle adventure will instantly quench your thirst for fun. Match 3 candies and blast your way through new dimensions of magical gameplay. Take on this Sodalicious Saga alone or play with friends to see who can get the highest score!

Monthly season updates bring unique quests and exciting match 3 puzzles - more sugar to blast means more fun to have! Progress through the Season Pass while earning rewards and boosters to help you on your Saga.

Compete against other players to see who can complete levels the fastest, or work as a team in the 4 in a Row game mode, where players work together for Sodalicious rewards!

DIFFERENT GAME MODES: 
*Soda – Switch the bottles and match 3 or more candies to release purple soda
*Frosting – Match candies to blast the ice
*Honeycomb – Match 3 candies or more next