In [None]:
!pip install python-Levenshtein

In [None]:
!pip install jiwer pandas

In [None]:
import pandas as pd
import jiwer
# Load the CSV file
df = pd.read_csv("/content/750_test_2025-06-17_15-51-29 - Sheet1.csv")
# Display the first few rows
df.head()

In [None]:
import pandas as pd
from collections import Counter
from jiwer import wer, cer

# Load your CSV (assumes df is already defined; update if needed)
# df = pd.read_csv("your_file.csv")  # Uncomment and change if needed

# Extract ground truth texts
ground_truths = df["Ground Truth"].astype(str).tolist()

# List of model columns
models = ["Predictions"]  # Update this if using multiple models

# Store results
results = []

# Define custom metric function
def compute_word_metrics(ground_truths, predictions):
    tp, fp, fn = 0, 0, 0

    for gt, pred in zip(ground_truths, predictions):
        gt_words = set(gt.split())  # Tokenize ground truth
        pred_words = set(pred.split())  # Tokenize prediction

        tp += len(gt_words & pred_words)              # True Positives
        fp += len(pred_words - gt_words)              # False Positives
        fn += len(gt_words - pred_words)              # False Negatives

    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    return precision * 100, recall * 100, f1_score * 100

# Loop through models
for model in models:
    predictions = df[model].astype(str).tolist()

    # Filter out empty or whitespace-only entries
    filtered_pairs = [(gt, pred) for gt, pred in zip(ground_truths, predictions) if gt.strip() and pred.strip()]

    if not filtered_pairs:
        print(f"Skipping model '{model}' due to empty or invalid data.")
        continue

    ground_truths_cleaned, predictions_cleaned = zip(*filtered_pairs)

    # Compute WER and CER
    wer_value = wer(list(ground_truths_cleaned), list(predictions_cleaned))
    cer_value = cer(list(ground_truths_cleaned), list(predictions_cleaned))

    # Compute Accuracy
    exact_matches = sum(gt == pred for gt, pred in zip(ground_truths_cleaned, predictions_cleaned))
    accuracy = (exact_matches / len(ground_truths_cleaned)) * 100

    # Compute Precision, Recall, and F1-score
    precision, recall, f1_score = compute_word_metrics(ground_truths_cleaned, predictions_cleaned)

    # Append results
    results.append([model, wer_value, cer_value, accuracy, precision, recall, f1_score])

# Convert to DataFrame for better visualization
results_df = pd.DataFrame(results, columns=["Model", "WER", "CER", "Accuracy", "Precision", "Recall", "F1-score"])
print(results_df)
