In [None]:
!pip install groq



In [None]:
import pandas as pd
import requests
import numpy as np
import os
import concurrent.futures

from groq import Groq
from concurrent.futures import ThreadPoolExecutor, as_completed
from sklearn.metrics import classification_report

In [None]:
from google.colab import userdata

In [None]:
client = Groq(
    api_key=userdata.get('GROQ_API_KEY'),
)

In [None]:
# few-shot prompt for classification
few_shot_prompt = """
Classify the following text into one of these categories:
Healthcare Providers, Patients, Payers and Insurers, Pharmaceutical Representatives, Regulators.

Examples:
1. Text: "The doctor prescribed a new medication for my condition."
   Category: Healthcare Providers
2. Text: "I recently paid for my health insurance premium."
   Category: Payers and Insurers
3. Text: "The FDA has approved a new drug for cancer treatment."
   Category: Regulators
4. Text: "I am worried about my medication side effects."
   Category: Patients
5. Text: "Our pharmaceutical company is releasing a new pain reliever."
   Category: Pharmaceutical Representatives

Classify this:
Text: "{text}"
Category:
"""

In [None]:
# classify text using the Groq API
def classify_text_groq(input_text):
    prompt = few_shot_prompt.format(text=input_text)

    response = client.chat.completions.create(
        model="llama3-8b-8192",
        messages=[
            {"role": "system", "content": "You are an expert text classifier."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.5,
        max_tokens=100
    )

    output = response.choices[0].message.content.strip()
    category = output.split("Category:")[-1].strip()
    return category

In [None]:
def classify_data_concurrently(data):
    with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
        results = list(executor.map(classify_text_groq, data["text"]))

    data["predicted_label"] = results
    return data

In [None]:
data = pd.read_csv("healthcare_actor_data_eval.csv")

labeled_data = classify_data_concurrently(data)

labeled_data.to_csv("labeled_healthcare_data.csv", index=False)

print(labeled_data.head())