In [None]:
%pip install huggingface_hub transformers

In [None]:
from huggingface_hub import AsyncInferenceClient
from transformers import LlamaTokenizerFast

BATCH_SIZE = 512
MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.1"

client = AsyncInferenceClient(model="http://64.156.70.185:18683", timeout=120)
tokenizer = LlamaTokenizerFast.from_pretrained(MODEL_ID)


In [None]:
get_prompt = (
    lambda search_string: f'Here is a recent Google search: "{search_string}". What could the user be referring to with this query? What seems to be their intent?'
)
get_templated_prompt = lambda search_string: tokenizer.apply_chat_template(
    [{"role": "user", "content": get_prompt(search_string)}],
    tokenize=False,
    add_generation_prompt=True,
)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
df = pd.read_json("/content/drive/MyDrive/Colab Notebooks/data/MyActivity.json")

df = df[~df['title'].str.contains('Visited')]
df = df[~df['title'].str.contains('Viewed')]
df = df[~df['title'].str.contains('Defined')]
df = df[~df['title'].str.contains('Used')]
df['title'] = df['title'].str.replace('^Searched for ', '', regex=True)

df['title'] = df['title'].str.lower()
df.drop_duplicates(subset='title', keep='first', inplace=True)

df['time'] = pd.to_datetime(df['time'])
df = df.sort_values(by='time')


In [None]:
from tqdm.asyncio import tqdm
from asyncio import Semaphore

semaphore = Semaphore(BATCH_SIZE)

In [None]:
async def generate(prompt):
    await semaphore.acquire()
    try:
        return await client.text_generation(
            prompt,
            temperature=0.1,
            top_p=0.95,
            repetition_penalty=1.2,
            top_k=50,
            max_new_tokens=1024,
        )
    except Exception as e:
        print(e)
        return "ERROR"
    finally:
        semaphore.release()

In [None]:
prompts = list(map(get_templated_prompt, df["title"].values))

tasks = list(map(generate, prompts))

In [None]:
outputs = []
for result in await tqdm.gather(*tasks, total=len(tasks), smoothing=0):
    outputs.append(result)

df["intent"] = outputs

In [None]:
df.to_csv("/content/drive/MyDrive/Colab Notebooks/data/intent.csv", columns=["title", "time", "intent"])

In [None]:
df_size = 59235
average_output_tokens = 512
runpod_h100_hourly_cost = 4.69
running_time_hrs = 0.42 # 25:26

In [None]:
pp_k_tokens = 1000* runpod_h100_hourly_cost * running_time_hrs / (df_size * average_output_tokens)
pp_k_tokens

In [None]:
gtp3_pp_k_tokens = 0.0020
gtp3_pp_k_tokens / pp_k_tokens # 30x cheaper!