### Import Required Packages

In [26]:
from huggingface_hub import InferenceClient
import os
import pandas as pd

### Intialize Inference Client

In [27]:
client = InferenceClient(
    provider="nscale",
    api_key=os.environ["HF_TOKEN"],
)

### Batch Prompt with Review, Features, and Policy Label

In [28]:
def generate_batch_prompt_with_policy(reviews, features_list, policy_labels):
    # Start with the few-shot examples
    prompt = """
    Here are a few examples of reviews classified by type:

    Example 1: 
    Review: "The food was absolutely DISGUSTING and the service was HORRIBLE! NEVER COMING BACK!!!"
    Features: Review Length: 65, Sentiment: -0.9, Confidence Score: 0.95, Relevancy Score: 0.85, Caps Ratio: 0.35, Policy Label: compliant
    Label: Rant

    Example 2:
    Review: "🔥 LIMITED TIME OFFER 🔥 Get 50% OFF with code PROMO2024! Visit our website now!"
    Features: Review Length: 55, Sentiment: 0.1, Confidence Score: 0.6, Relevancy Score: 0.3, Caps Ratio: 0.25, Policy Label: non-compliant
    Label: Ad

    Example 3:
    Review: "The steak was perfectly cooked and the wine pairing was excellent. Service was attentive but not intrusive."
    Features: Review Length: 85, Sentiment: 0.85, Confidence Score: 0.92, Relevancy Score: 0.95, Caps Ratio: 0.0, Policy Label: compliant
    Label: Valid

    Example 4:
    Review: "I think it might rain tomorrow. Should I bring an umbrella?"
    Features: Review Length: 45, Sentiment: 0.0, Confidence Score: 0.4, Relevancy Score: 0.1, Caps Ratio: 0.0, Policy Label: compliant
    Label: Irrelevant

    Example 5:
    Review: "OMG this place changed my LIFE! The ambiance, the food, everything was PERFECT! 😍✨"
    Features: Review Length: 40, Sentiment: 0.95, Confidence Score: 0.88, Relevancy Score: 0.8, Caps Ratio: 0.2, Policy Label: compliant
    Label: Emotional

    Example 6:
    Review: "Check out my YouTube channel for more reviews! Like and subscribe!"
    Features: Review Length: 50, Sentiment: 0.2, Confidence Score: 0.5, Relevancy Score: 0.2, Caps Ratio: 0.15, Policy Label: non-compliant
    Label: Spam


    Now, classify the following reviews based on their content, features, and policy labels. Possible labels are: Valid, Rant, Ad, Spam, Irrelevant.
    """

    # Add reviews with their features and policy labels
    for i, review in enumerate(reviews):
        features = features_list[i]
        policy_label = policy_labels[i]
        prompt += f"""
        
        Review: "{review}"
        Features: Review Length: {features['review_length']}, Sentiment: {features['sentiment']}, Confidence Score: {features['confidence_score']}, Relevancy Score: {features['relevancy_score']}, Caps Ratio: {features['all_caps_ratio']}, Policy Label: {policy_label}
        Label:
        """

    return prompt

### Classify Reviews in Batches

In [29]:
def classify_batch_with_qwen(reviews, features_list, policy_labels):
    prompt = generate_batch_prompt_with_policy(reviews, features_list, policy_labels)
    response = client.chat.completions.create(
        model="Qwen/Qwen3-4B-Instruct-2507",
        messages=[{"role": "user", "content": prompt}],
        temperature=0
    )
    labels = [choice.message['content'].strip() for choice in response.choices]
    return labels

def label_reviews_batch(df):
    reviews = df['cleaned_review_text'].tolist()
    features_list = df[['review_length', 'sentiment', 'confidence_score', 'relevancy_score', 'all_caps_ratio']].to_dict(orient='records')
    policy_labels = df['policy_label'].tolist()
    
    # Get batch labels
    labels = classify_batch_with_qwen(reviews, features_list, policy_labels)
    
    # Assign predicted labels to the DataFrame
    df['predicted_label'] = labels
    return df

### Execute on a small batch

In [30]:
data_path = '../data/featured/reviews_with_features.csv'
df = pd.read_csv(data_path).head(20)

labeled_data_batch = label_reviews_batch(df)

# Save the results to a CSV file
labeled_data_batch.to_csv('labeled_reviews_batch_with_qwen.csv', index=False)

ValueError: Length of values (1) does not match length of index (20)