In [1]:
from dotenv import dotenv_values

hf_token = dotenv_values(".env")['HF_TOKEN']

In [2]:
from huggingface_hub import HfApi, list_models

# Or configure a HfApi client
hf_api = HfApi(
    endpoint="https://huggingface.co", # Can be a Private Hub endpoint.
    token=hf_token, # Token is not persisted on the machine.
)


In [3]:
import pandas as pd

models = hf_api.list_models()
df = pd.DataFrame(models)

# save to csv and pkl
df.to_csv("models.csv")
df.to_pickle("models.pkl")

In [4]:
import pandas as pd

data = pd.read_pickle("models.pkl")
data

Unnamed: 0,id,author,sha,created_at,last_modified,private,gated,disabled,downloads,likes,...,pipeline_tag,mask_token,card_data,widget_data,model_index,config,transformers_info,siblings,spaces,safetensors
0,albert/albert-base-v1,,,2022-03-02 23:29:04+00:00,,False,,,15973,6,...,fill-mask,,,,,,,,,
1,albert/albert-base-v2,,,2022-03-02 23:29:04+00:00,,False,,,2704000,88,...,fill-mask,,,,,,,,,
2,albert/albert-large-v1,,,2022-03-02 23:29:04+00:00,,False,,,1764,2,...,fill-mask,,,,,,,,,
3,albert/albert-large-v2,,,2022-03-02 23:29:04+00:00,,False,,,423476,13,...,fill-mask,,,,,,,,,
4,albert/albert-xlarge-v1,,,2022-03-02 23:29:04+00:00,,False,,,1372,3,...,fill-mask,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
620295,happylayers/sc17,,,2024-04-25 05:40:01+00:00,,False,,,0,0,...,text-generation,,,,,,,,,
620296,wsincos/sd-class-butterflies-64,,,2024-04-25 05:41:11+00:00,,False,,,0,0,...,,,,,,,,,,
620297,unclecode/tinyllama-function-call-16bit-250424,,,2024-04-25 05:42:25+00:00,,False,,,0,0,...,text-generation,,,,,,,,,
620298,koesn/Mistral-7B-Instruct-v0.2-GGUF,,,2024-04-25 05:43:20+00:00,,False,,,0,0,...,,,,,,,,,,


In [5]:
# Get models based on datasets
from huggingface_hub import ModelFilter
datasets = hf_api.list_models(filter="squad")
for dataset in datasets:
    print(dataset)
    break

ModelInfo(id='Shavrina/RusEnQA', author='Shavrina', sha='4f3582e6af0c44983206c3099f9d3ee35fc6d350', created_at=datetime.datetime(2022, 3, 2, 23, 29, 5, tzinfo=datetime.timezone.utc), last_modified=datetime.datetime(2021, 11, 18, 13, 37, 56, tzinfo=datetime.timezone.utc), private=False, gated=False, disabled=None, downloads=0, likes=0, library_name=None, tags=['PyTorch', 'Transformers', 'gpt2', 'squad', 'lm-head', 'casual-lm', 'text2text-generation', 'ru', 'en', 'region:us'], pipeline_tag='text2text-generation', mask_token=None, card_data=None, widget_data=None, model_index=None, config=None, transformers_info=None, siblings=[RepoSibling(rfilename='.gitattributes', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='README.md', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='deepspeed_config.json', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='merges.txt', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='mp_rank_00_model_states.pt', size=None, bl

In [6]:
# Get the public list of all dattasets
dataset_list = hf_api.list_datasets()
dataset_df = pd.DataFrame(dataset_list)

dataset_df.to_csv("datasets.csv")
dataset_df.to_pickle("datasets.pkl")

In [7]:
dataset_df

Unnamed: 0,id,author,sha,created_at,last_modified,private,gated,disabled,downloads,likes,paperswithcode_id,tags,card_data,siblings
0,acronym_identification,,15ef643450d589d5883e289ffadeb03563e80a9e,2022-03-02 23:29:22+00:00,2024-01-09 11:39:57+00:00,False,False,False,632,18,acronym-identification,"[task_categories:token-classification, annotat...",,
1,ade_corpus_v2,,4ba01c71687dd7c996597042449448ea312126cf,2022-03-02 23:29:22+00:00,2024-01-09 11:42:58+00:00,False,False,False,2056,23,,"[task_categories:text-classification, task_cat...",,
2,UCLNLP/adversarial_qa,UCLNLP,c2d5f738db1ad21a4126a144dfbb00cb51e0a4a9,2022-03-02 23:29:22+00:00,2023-12-21 14:20:00+00:00,False,False,False,536,32,adversarialqa,"[task_categories:question-answering, task_ids:...",,
3,aeslc,,2305f2e63b68056f9b9037a3805c8c196e0d5581,2022-03-02 23:29:22+00:00,2024-01-09 11:49:13+00:00,False,False,False,2341,10,aeslc,"[task_categories:summarization, annotations_cr...",,
4,afrikaans_ner_corpus,,445834a997dce8b40e1d108638064381de80c497,2022-03-02 23:29:22+00:00,2024-01-09 11:51:47+00:00,False,False,False,346,4,,"[task_categories:token-classification, task_id...",,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136464,susnatak/Healthcare_17k,susnatak,09dfad6d85e11266d8cc5581b9fa37a700d85aa1,2024-04-25 05:29:58+00:00,2024-04-25 05:30:04+00:00,False,False,False,0,0,,"[croissant, region:us]",,
136465,TobiRobi03/chatbot-finetune,TobiRobi03,fec1f0baf804fdc3546be2be358bb0b6ba1f054b,2024-04-25 05:35:18+00:00,2024-04-25 05:39:54+00:00,False,False,False,0,0,,[region:us],,
136466,japanese-asr/whisper_transcriptions.reazonspee...,japanese-asr,a4a06e2c0994eb0970a85e201d2dcff4dbfda335,2024-04-25 05:38:00+00:00,2024-04-25 05:43:46+00:00,False,False,False,0,0,,"[croissant, region:us]",,
136467,opensporks/stocknewseventssentiment-snes-10,opensporks,e22be961d8976ab54be6033dbe83bb9070a541c1,2024-04-25 05:41:19+00:00,2024-04-25 05:41:25+00:00,False,False,False,0,0,,"[license:cc0-1.0, croissant, region:us]",,
