In [None]:
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModel
import time

def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0]
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)

tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1', trust_remote_code=True)
model.eval()
_ = model

In [None]:
def embed_texts(passages):
    encoded_input = tokenizer(passages, padding=True, truncation=True, return_tensors='pt')

    with torch.no_grad():
        model_output = model(**encoded_input)
    
    embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
    embeddings = F.normalize(embeddings, p=2, dim=1)
    return embeddings

In [None]:
def similarity(veca, vecb):
    maga, magb = 0, 0
    dotprod = 0
    for a,b in zip(veca, vecb):
        maga += a*a
        magb += b*b
        dotprod += a*b
    mag = (maga*magb)**0.5
    return dotprod/mag

In [None]:
x= embed_texts(["My arm really hurts!", "my arm is absolutely killing me."])
print(x)

In [None]:
similarity(x[0], x[1])

# Exciting! Now let's do something useful..

For the sake of this blog post, I thought it would be fun to create a "prick detector".

unfriendly, prick, fuckhead, asshole, twat
"Generate 30 messages from someone who is being an asshole."}


## Making FakeData with Ollama
https://ollama.com/blog/openai-compatibility

I use mistral-openorca as it's nicely been uncensored for us, so it can create mean messages!


In [None]:
from openai import OpenAI

# connect to ollama local API
client = OpenAI(
    base_url = 'http://localhost:11434/v1',
    api_key='ollama', # required, but unused
)

def get_completion(messages):
    response = client.chat.completions.create(
      model="mistral-openorca",
      messages=messages
    )
    return response

def unpack(resp):
    return resp.choices[0].message.content

In [None]:
# Here we do a "one-shot" which gives the LLM a hint at what it should output with an example.
# This tends to make our output conform better to what we want, and reduces the risk of hallucination.
messages = [
        {"role": "system", "content": "You are a helpful assistant, which generates fake messages."},
        {"role": "user", "content": "Generate three messages from someone who is being helpful."},
        {"role": "assistant", "content": "1. I'd love to help! Just tell me what you need and I'll get right to it.\n2. No problem at all mate, just send me the files and I'll start editing them, just to confirm, you do want me to check only for typos and grammatical errors?\n3. Absolutely, I'll just get the car from the garage, I'd be more than happy to drive you to the hospital."},
        {"role": "user", "content": "Generate 30 messages from someone who is being toxic."}
    ]

start = time.time()
response = get_completion(messages)
stop = time.time()
elapsed = stop-start
print(round(response.usage.total_tokens/elapsed,2), "tokens per second")

print()

So we can generate fakedata, how very exciting!
At this point 

## Let's Create a Classifier!



In [None]:


# Example with streaming:

def get_completion_stream(messages):
    """prints a streaming example"""
    response = client.chat.completions.create(
        model="mistral-openorca",
        messages=messages,
        temperature=0.7,
        n=1,
        stream=True
    )

    text_out = ""
    for chunk in response:
        t = chunk.choices[0].delta.content
        text_out += t
        print(t, end='')

    return text_out


In [None]:
get_completion_stream(messages)

