In [1]:
import numpy as np
import os
import shutil
import json

In [2]:
# Prepare the data
data_folder = '../data'
image_formats = ['.jpg', '.jpeg', '.png', '.bmp']
output_folder = os.path.join(data_folder, '_preped')
os.makedirs(output_folder, exist_ok=True)

for foldername in os.listdir(data_folder):
    folder_path = os.path.join(data_folder, foldername)
    if os.path.isdir(folder_path) and foldername != '_preped':
        for filename in os.listdir(folder_path):
            file_path = os.path.join(folder_path, filename)
            if os.path.isfile(file_path):
                name, extension = os.path.splitext(filename)
                if extension in image_formats:
                    new_filename = f"{foldername}_{name}{extension}"
                    new_file_path = os.path.join(output_folder, new_filename)
                    shutil.copy2(file_path, new_file_path)

In [3]:
label_file = ''
# Create labels folder and save modified JSON
labels_folder = '../data/_labels'
os.makedirs(labels_folder, exist_ok=True)
for foldername in os.listdir(data_folder):
    folder_path = os.path.join(data_folder, foldername)
    if os.path.isdir(folder_path) and foldername != '_labels': 
        for filename in os.listdir(folder_path):
            file_path = os.path.join(folder_path, filename)
            if os.path.isfile(file_path):
                name, extension = os.path.splitext(filename)
                if extension == '.json':
                    label_file = file_path
                    break
                else:
                    label_file = 'not found'
        if label_file == 'not found':
            print(f"No label file found in folder: {foldername}")
            continue

        print(f"Label file found: {label_file}")
        with open(label_file, 'r', encoding='utf-8') as f:
            data = json.load(f)

        cnt = 0
        # Process each entry
        for entry in data:
            if 'file_upload' in entry:
                # Remove labelstudio hash prefix
                parts = entry['file_upload'].split('-', 1)  # Split only on first '-'
                if len(parts) > 1:
                    entry['file_upload'] = f"{foldername}_{parts[1]}"
                    cnt += 1

        # Get original filename and create new path
        original_filename = os.path.basename(label_file)
        new_label_path = os.path.join(labels_folder, original_filename)

        # Save the modified JSON
        with open(new_label_path, 'w', encoding='utf-8') as f:
            json.dump(data, f, indent=4, ensure_ascii=False)
        print(f"Total entries updated: {cnt}")

Label file found: ../data\B8V41Y\b8v41y.json
Total entries updated: 20
Label file found: ../data\C6037J\C6037J.json
Total entries updated: 34
Label file found: ../data\consensus\b8v41y.json
Total entries updated: 57
Label file found: ../data\D6AE9F\D6AE9F.json
Total entries updated: 22
No label file found in folder: ECSGGY
Label file found: ../data\FGWUFP\FGWUFP.json
Total entries updated: 20
Label file found: ../data\FO6K58\FO6K58_labels.json
Total entries updated: 32
No label file found in folder: GI9Y8B
Label file found: ../data\GK1XQ4\project-1-at-2025-10-15-23-46-9d203653.json
Total entries updated: 52
Label file found: ../data\H51B9J\H51B9J.json
Total entries updated: 23
Label file found: ../data\ITWQ3V\ITWQ3V.json
Total entries updated: 23
Label file found: ../data\NC1O2T\hf_labels_export.json
Total entries updated: 20
Label file found: ../data\NX9GA4\NX9GA4_ankles_labeled.json
Total entries updated: 20
Label file found: ../data\ODZF0M\project-2-at-2025-10-16-02-08-8ee4fdfa.json

In [4]:
#Match the file names with the labels
image_names = list(os.listdir(output_folder))
data_ready = []
for label_filename in os.listdir(labels_folder):
    label_path = os.path.join(labels_folder, label_filename)
    with open(label_path, 'r', encoding='utf-8') as f:
        labels = json.load(f)
    for entry in labels:
        if 'file_upload' in entry:
            if entry['file_upload'] in image_names:
                result = entry['annotations'][0].get('result')
                if len(result) > 0:
                    label = result[0].get('value').get('choices')[0]
                    data_ready.append((entry['file_upload'], label))
print(f"Total matched entries: {len(data_ready)}")

Total matched entries: 242


In [11]:
# Get the majority class
labels = [label for _, label in data_ready]
# Reaname the 3 wrong labels:
for i in range(len(labels)):
    if labels[i] == 'neutral': labels[i] = '2_Neutralis'
    elif labels[i] == 'pronation': labels[i] = '1_Pronacio'
    elif labels[i] == 'supination': labels[i] = '3_Szupinacio'

unique_labels, counts = np.unique(labels, return_counts=True)
majority_class = unique_labels[np.argmax(counts)]
print(f"Majority class: {majority_class}")

Majority class: 1_Pronacio


In [12]:
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report
# Baseline: Always predict the majority class
def baseline_predict(data):
    return [majority_class] * len(data)

# Evaluate baseline accuracy
true_labels = labels
predicted_labels = baseline_predict(data_ready)
accuracy = np.mean([true == pred for true, pred in zip(true_labels, predicted_labels)])
precision = precision_score(true_labels, predicted_labels, average='weighted')
recall = recall_score(true_labels, predicted_labels, average='weighted')
f1 = f1_score(true_labels, predicted_labels, average='weighted')

print(f"Baseline accuracy: {accuracy * 100:.2f}%")
print(f"Baseline precision: {precision * 100:.2f}%")
print(f"Baseline recall: {recall * 100:.2f}%")

# For detailed per-class metrics
print("\nDetailed Classification Report:")
print(classification_report(true_labels, predicted_labels))


Baseline accuracy: 43.39%
Baseline precision: 18.83%
Baseline recall: 43.39%

Detailed Classification Report:
              precision    recall  f1-score   support

  1_Pronacio       0.43      1.00      0.61       105
 2_Neutralis       0.00      0.00      0.00        98
3_Szupinacio       0.00      0.00      0.00        39

    accuracy                           0.43       242
   macro avg       0.14      0.33      0.20       242
weighted avg       0.19      0.43      0.26       242



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [31]:
from collections import Counter

# Read what picture are in consensus text file
consensus_file_path =  os.path.join(data_folder, 'consensus/anklealign-consensus.txt')
with open(consensus_file_path, 'r', encoding='utf-8') as f:
    consensus_images = f.read().splitlines()

img_names = []
# Get every image name from the consensus file
for img in consensus_images:
    parts = img.split('\\')
    if len(parts) > 1:
        img_names.append(parts[-1])

# Count occurrences of each image name
img_counts = Counter(img_names)

# Keep only images that appear exactly once
unique_consensus_image_names = [img for img, count in img_counts.items() if count == 1]

print(f"Total images in consensus: {len(img_names)}")
print(f"Unique images (appearing exactly once): {len(unique_consensus_image_names)}")
print(f"Duplicate images removed: {len(img_names) - len(unique_consensus_image_names)}")

unique_consensus_images = []

# Rename the images as the prepared data
for img in consensus_images:
    parts = img.split('\\')
    if len(parts) > 1 and parts[2] in unique_consensus_image_names:
        unique_consensus_images.append(f"{parts[1]}_{parts[2]}")
print(len(unique_consensus_images))


Total images in consensus: 57
Unique images (appearing exactly once): 49
Duplicate images removed: 8
49


In [None]:
# Match the consensus images with the prepared data
matched_consensus = []
for img in unique_consensus_images:
    for data_img, _ in data_ready:
        if img == data_img:
            matched_consensus.append((data_img))

print(f"Total matched consensus entries: {len(matched_consensus)}")

train_data = [(img, label) for img, label in data_ready if img not in matched_consensus]
test_data = unique_consensus_images

Total matched consensus entries: 23
