#Pre-Processing Text data

In [None]:
# Load the test dataset
test_df = pd.read_csv("/content/drive/MyDrive/test.csv")
test_df = test_df[['crimeaditionalinfo', 'category', 'sub_category']].dropna()

# Combine category and sub_category into a single target for evaluation
test_df['target'] = test_df.apply(lambda row: f"category: {row['category']} sub_category: {row['sub_category']}", axis=1)

# Tokenize the test dataset
def preprocess_test_data(examples):
    inputs = tokenizer(examples['crimeaditionalinfo'], max_length=128, truncation=True, padding="max_length", return_tensors="pt")
    return inputs

test_inputs = preprocess_test_data({"crimeaditionalinfo": test_df['crimeaditionalinfo'].tolist()})

#Prediction and Accuracy

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Function to calculate metrics in batches
def calculate_metrics_in_batches(model, tokenizer, test_df, batch_size=8, device='cuda'):
    model.to(device)
    model.eval()  # Ensure the model is in evaluation mode

    true_labels = []
    pred_labels = []

    for i in range(0, len(test_df), batch_size):
        batch = test_df.iloc[i:i+batch_size]
        inputs = tokenizer(
            batch['crimeaditionalinfo'].tolist(),
            max_length=128,
            truncation=True,
            padding="max_length",
            return_tensors="pt"
        ).to(device)

        with torch.no_grad():
            outputs = model.generate(
                input_ids=inputs['input_ids'],
                attention_mask=inputs['attention_mask'],
                max_length=64,
                num_beams=4,
                early_stopping=True
            )

        # Decode predictions and collect ground truth
        predictions = tokenizer.batch_decode(outputs, skip_special_tokens=True)
        targets = batch['target'].tolist()

        # Append predictions and ground truth
        pred_labels.extend([pred.strip() for pred in predictions])
        true_labels.extend([target.strip() for target in targets])

    # Calculate metrics
    accuracy = accuracy_score(true_labels, pred_labels)
    precision = precision_score(true_labels, pred_labels, average='weighted')
    recall = recall_score(true_labels, pred_labels, average='weighted')
    f1 = f1_score(true_labels, pred_labels, average='weighted')

    return accuracy, precision, recall, f1

# Usage
accuracy, precision, recall, f1 = calculate_metrics_in_batches(model, tokenizer, test_df, batch_size=8, device='cuda')
print(f"Test Metrics:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
