In [None]:
!pip install llama-cpp-python==0.2.82 -q -q -q

In [None]:
!wget -q https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf?download=true -O model.gguf

In [None]:
import pandas as pd
from llama_cpp import Llama

In [None]:

emails_df = pd.read_csv('data/email_categories_data.csv')

print("Preview of our email dataset:")
emails_df.head(2)

Preview of our email dataset:


Unnamed: 0,email_id,email_content,expected_category
0,1,Urgent: Server Maintenance Required\nOur main ...,Priority
1,2,50% Off Spring Collection!\nDon't miss our big...,Promotions


In [None]:

model_path = "model.gguf"

In [None]:

llm = Llama(model_path=model_path)

llama_model_loader: loaded meta data with 20 key-value pairs and 201 tensors from model.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = py007_tinyllama-1.1b-chat-v0.3
llama_model_loader: - kv   2:                       llama.context_length u32              = 2048
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 2048
llama_model_loader: - kv   4:                          llama.block_count u32              = 22
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 5632
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 64
llama_model_loader: - kv   7:                 llama.attention.head_count u32             

In [7]:
prompt = """ You are an email classifier. 
Your task is to assign each email to exactly one of the following categories:
- Priority: Important or urgent emails that require immediate attention.
- Updates: Informational emails such as notifications, cancellations, or reminders.
- Promotions: Marketing or sales-related emails such as discounts, special offers, or deals.

Classify each email by reading its subject and body. 
Respond with ONLY the category name (Priority, Updates, or Promotions). Do not include extra text or other categories, even if they reflect more closely the email topic.

Examples:

Example 1:
Urgent: Password Reset Required
Your account security requires immediate attention. Please reset your password within 24 hours.
Priority

Example 2:
Special Offer - 50% Off Everything!
Don't miss our biggest sale of the year. Everything must go!
Promotions

Example 3:
Canceled Event - Team Meeting
This event has been canceled and removed from your calendar.
Updates

Example 4:
"""

In [None]:

def process_message(llm, message, prompt):
    """Process a message and return the response"""
    input_prompt = f"{prompt} {message}"
    response = llm(
        input_prompt,
        max_tokens=5,
        temperature=0
    )
    
    return response['choices'][0]['text'].strip()

In [None]:

test_emails = emails_df.head(2)

results = []
for idx, row in test_emails.iterrows():
    email_content = row['email_content']
    expected_category = row['expected_category']
    
    result = process_message(llm, email_content, prompt)
    
    results.append({
        'email_content': email_content,
        'expected_category': expected_category,
        'model_output': result
    })


llama_print_timings:        load time =   18964.43 ms
llama_print_timings:      sample time =       0.77 ms /     5 runs   (    0.15 ms per token,  6493.51 tokens per second)
llama_print_timings: prompt eval time =   18964.20 ms /   290 tokens (   65.39 ms per token,    15.29 tokens per second)
llama_print_timings:        eval time =     411.62 ms /     4 runs   (  102.90 ms per token,     9.72 tokens per second)
llama_print_timings:       total time =   19379.71 ms /   294 tokens
Llama.generate: prefix-match hit

llama_print_timings:        load time =   18964.43 ms
llama_print_timings:      sample time =       0.77 ms /     5 runs   (    0.15 ms per token,  6493.51 tokens per second)
llama_print_timings: prompt eval time =    2797.08 ms /    30 tokens (   93.24 ms per token,    10.73 tokens per second)
llama_print_timings:        eval time =     411.97 ms /     4 runs   (  102.99 ms per token,     9.71 tokens per second)
llama_print_timings:       total time =    3211.39 ms /    34 

In [None]:

results_df = pd.DataFrame(results)

result1 = results_df['model_output'].iloc[0]
result2 = results_df['model_output'].iloc[1]

print(f"Result 1: `{result1}`\nResult 2: `{result2}`")

Result 1: `Priority`
Result 2: `Promotions`
