In [15]:
from dotenv import dotenv_values
import yaml
import pandas as pd

hf_token = dotenv_values(".env")['HF_TOKEN']

In [3]:
from huggingface_hub import HfApi, list_models, ModelCard

# Configure a HfApi client
hf_api = HfApi(
    endpoint="https://huggingface.co", # Can be a Private Hub endpoint.
    token=hf_token, # Token is not persisted on the machine.
)


In [42]:
def extract_accuracy(results):
    for result in results:
        for metric in result.get("metrics", []):
            if metric.get("type") == "accuracy":
                accuracy_value = metric.get("value")
                if isinstance(accuracy_value, list) and accuracy_value:
                    return float(accuracy_value[0])
                elif isinstance(accuracy_value, (float, int)):
                    return float(accuracy_value)
    return 0.0

def extract_dataset(results):
    for result in results:
        dataset = result.get("dataset")
        if dataset:
            return dataset.get("name")
    return None


def is_valid_card_data(card_data):
    necessary_keys = ["task", "dataset", "metrics"]
    model_index = card_data.get("model-index", [])
    if not model_index:
        return False, None, None

    for entry in model_index:
        results = entry.get("results", [])
        if not results:
            return False, None, None
        for result in results:
            if not all(key in result for key in necessary_keys):
                return False, None, None
    return True, extract_accuracy(results), extract_dataset(results)


In [45]:
models = hf_api.list_models(task="image-classification", cardData=True)

processed_models = []
accuracy_values = []
datasets = []
for model in models:
    if model.card_data:
        card_data = yaml.safe_load(str(model.card_data))
        is_valid, accuracy, dataset = is_valid_card_data(card_data)
        if is_valid:
            model.card_data = card_data
            processed_models.append(model)
            accuracy_values.append(accuracy or 0)
            datasets.append(dataset or None)

df = pd.DataFrame({'model': processed_models, 'accuracy': accuracy_values, 'dataset': datasets})
df = df.sort_values(by='accuracy', ascending=False)


Invalid model-index. Not loading eval results into CardData.
Invalid model-index. Not loading eval results into CardData.
Invalid model-index. Not loading eval results into CardData.
Invalid model-index. Not loading eval results into CardData.
Invalid model-index. Not loading eval results into CardData.
Invalid model-index. Not loading eval results into CardData.
Invalid model-index. Not loading eval results into CardData.
Invalid model-index. Not loading eval results into CardData.
Invalid model-index. Not loading eval results into CardData.
Invalid model-index. Not loading eval results into CardData.
Invalid model-index. Not loading eval results into CardData.
Invalid model-index. Not loading eval results into CardData.
Invalid model-index. Not loading eval results into CardData.
Invalid model-index. Not loading eval results into CardData.
Invalid model-index. Not loading eval results into CardData.
Invalid model-index. Not loading eval results into CardData.
Invalid model-index. Not

In [47]:
df.to_csv("img_cls_models.csv")
df.to_pickle("img_cls_models.pkl")

In [50]:
grouped_dfs = {name: group.drop(columns='dataset') for name, group in df.groupby('dataset')}

for dataset_name, data in grouped_dfs.items():
    print(f"Dataset: {dataset_name}")


Dataset: ./data/games-ad-0306
Dataset: ./mgr/dataset/HF_DS
Dataset: 1aurent/Kather-texture-2016
Dataset: 1aurent/LC25000
Dataset: Beans
Dataset: Bird Species
Dataset: CIFAR-10
Dataset: CIFAR-100
Dataset: CIFAR100
Dataset: Camelyon16[Meta]
Dataset: CelebA-faces
Dataset: Cifar10
Dataset: Cifar100
Dataset: Dataset_points_durs_v1
Dataset: Dog Food
Dataset: Falah/Alzheimer_MRI
Dataset: FastJobs/Visual_Emotional_Analysis
Dataset: HumanEval
Dataset: Human_Action_Recognition
Dataset: Indian-Food-Images
Dataset: JLB-JLB/seizure_eeg_greyscale_224x224_6secWindow
Dataset: KTH-TIPS2-b
Dataset: MNIST
Dataset: Matthijs/snacks
Dataset: New Plant Diseases Dataset
Dataset: RiniPL/Dementia_Dataset
Dataset: SVHN
Dataset: TCGA-BRCA
Dataset: action_class
Dataset: agent_action_class
Dataset: amazonian_fish_classifier_data
Dataset: arabic-handwritten-characters
Dataset: bazyl/GTSRB
Dataset: beans
Dataset: bird-data
Dataset: bird_species_dataset
Dataset: blurry images
Dataset: brain-tumor-collection
Dataset: b

In [13]:
cheese = hf_api.list_models(model_name="yolov8n-chest-xray-classification", cardData=True)
cheese = [model for model in cheese]
cheese[0].card_data.datasets

['keremberke/chest-xray-classification']