In [None]:
import pandas as pd
import json
import numpy as np
from sklearn.metrics import cohen_kappa_score
from statsmodels.stats.inter_rater import fleiss_kappa

# Read the CSV files
file_path_1 = 'file1.csv'  # Replace with your file path
file_path_2 = 'file2.csv'  # Replace with your file path

# Load the data
df1 = pd.read_csv(file_path_1)
df2 = pd.read_csv(file_path_2)

# Extract POS tags from the JSON-like strings
def extract_labels(pos_tag_column):
    # Convert JSON string to dictionary
    pos_tags = []
    for item in pos_tag_column:
        # Check if the item is a string before trying to parse it
        if isinstance(item, str):
            try:
                annotation = json.loads(item)
                # Extract labels from each dictionary entry
                labels = []
                for entry in annotation:
                    if 'labels' in entry:
                        labels.append(entry['labels'][0])  # Assuming there's only one label in 'labels'
                    else:
                        labels.append('UNKNOWN')  # Use a default value if 'labels' key is missing
                pos_tags.append(labels)
            except json.JSONDecodeError:
                print(f"Error decoding JSON: {item}")
                pos_tags.append([])  # Add an empty list if there's an issue
        else:
            pos_tags.append([])  # Add an empty list for non-string items
    return pos_tags

# Extract the POS tags for both annotators
pos_tags_1 = extract_labels(df1['pos_tag'])
pos_tags_2 = extract_labels(df2['pos_tag'])

# Filter out empty lists (rows where there's no POS tagging)
valid_pairs = [(tags_1, tags_2) for tags_1, tags_2 in zip(pos_tags_1, pos_tags_2) if len(tags_1) == len(tags_2) and len(tags_1) > 0]

# If there are no valid pairs left, print a warning and stop the computation
if not valid_pairs:
    print("No valid pairs of annotations found.")
else:
    # Calculate Cohen's Kappa for valid pairs
    cohen_kappa_scores = [cohen_kappa_score(tags_1, tags_2) for tags_1, tags_2 in valid_pairs]

    # Calculate the average Cohen's Kappa score
    average_cohen_kappa = np.mean(cohen_kappa_scores)
    print(f"Average Cohen's Kappa: {average_cohen_kappa}")

Average Cohen's Kappa: 0.9716324637779374


In [None]:
import pandas as pd
import numpy as np
from statsmodels.stats.inter_rater import fleiss_kappa

# Step 1: Read the CSV files
file1 = pd.read_csv('file1_cv.csv')
file2 = pd.read_csv('file2_cv.csv')
file3 = pd.read_csv('file3_cv.csv')

# Step 2: Extract the 'truck label' column
labels_1 = file1['truck_label']
labels_2 = file2['truck_label']
labels_3 = file3['truck_label']

# Step 3: Combine the labels into a single DataFrame
data = pd.DataFrame({
    'Rater1': labels_1,
    'Rater2': labels_2,
    'Rater3': labels_3
})

# Step 4: Map labels to numerical values (Truck = 1, No Truck = 0)
data = data.replace({'Truck': 1, 'No Truck': 0})

# Step 5: Create the matrix for Fleiss' kappa
# Convert each row into the count of occurrences for each category
category_counts = data.apply(pd.Series.value_counts, axis=1).fillna(0).astype(int)

# Ensure columns are in the correct order (0, 1)
category_counts = category_counts.reindex(columns=[0, 1], fill_value=0)

# Step 6: Calculate Fleiss' kappa
kappa = fleiss_kappa(category_counts.values)
print(f"Fleiss' kappa: {kappa}")


Fleiss' kappa: 0.8136645962732917


  data = data.replace({'Truck': 1, 'No Truck': 0})
