<a href="https://colab.research.google.com/github/hamidahoderinwale/model_metadata_analyses/blob/main/scraping_hf.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Script 3
!pip install huggingface_hub
!pip install adapters
import requests
import pandas as pd
import datetime
import json
import csv
from huggingface_hub import HfApi
from bs4 import BeautifulSoup
from adapters import list_adapters
from huggingface_hub import hf_hub_download
from adapters import AutoAdapterModel
import re



# Initialize API
api = HfApi()

# Function to validate Hugging Face model URL
def validate_hf_model_url(url):
    pattern = r"^https://huggingface.co/([\w\-]+)/([\w\-]+)$"
    match = re.match(pattern, url)
    return match.groups() if match else None

# page with model finetunes
def get_finetuned_models_page(model_org, model_name):
    all_model_links = []  # Store all links across pages
    page_num = 0
    while True:
        search_url = f"https://huggingface.co/models?other=base_model:finetune:{model_org}/{model_name}&p={page_num}"
        response = requests.get(search_url)
        if response.status_code != 200:
            break  # Exit if page not found

        soup = BeautifulSoup(response.text, "html.parser")
        model_divs = soup.find_all("div", class_="w-full truncate")
        if not model_divs:
            break  # Exit if no more models on the page

        for div in model_divs:
            header = div.find("header")
            if header:
                model_link = header.get("title")
                if model_link:
                    all_model_links.append(f"https://huggingface.co/{model_link}")

        page_num += 1  # Move to the next page

    return all_model_links

# model card data
def get_model_card(model_id):
    try:
        # Try to download model card if available
        readme_path = hf_hub_download(repo_id=model_id, filename='README.md')
        with open(readme_path, 'r', encoding='utf-8') as f:
            card_content = f.read()
        return card_content
    except Exception as e:
        print(f"  Could not download model card: {str(e)[:100]}...")
        return ""


# Function to get parent model
def get_parent_model(model_url):
    return model_url.split(f"/{model_name}/")[0]

# Truncate metadata
def filter_metadata(json_metadata):
            keys_to_keep = ["modelId", "sha", "tags", "downloads", "pipeline_tag"]
            return {k: json_metadata.get(k) for k in keys_to_keep if k in json_metadata}
            filtered_metadata = filter_metadata(api.model_info(model_id).__dict__)

# Get adapter models
def get_adapter_models_page(model_org, model_name):
    all_adapter_links = []  # Store all adapter links across pages
    page_num = 0
    while True:
        search_url = f"https://huggingface.co/models?other=base_model:adapter:{model_org}/{model_name}&p={page_num}"
        response = requests.get(search_url)
        if response.status_code != 200:
            break  # Exit if page not found

        soup = BeautifulSoup(response.text, "html.parser")
        model_divs = soup.find_all("div", class_="w-full truncate")
        if not model_divs:
            break  # Exit if no more models on the page

        for div in model_divs:
            header = div.find("header")
            if header:
                model_link = header.get("title")
                if model_link:
                    all_adapter_links.append(f"https://huggingface.co/{model_link}")

        page_num += 1  # Move to the next page

    return all_adapter_links

# Recursive DFS (depth-first search) for finding fine-tunes
def dfs_finetunes(model_url, visited, depth=0, results=None):
       if results is None:
           results = []

       if model_url in visited:
           return results
       visited.add(model_url)

       validated = validate_hf_model_url(model_url)
       if not validated:
           print(f"Invalid URL skipped: {model_url}")
           model_url = "N/A"
           return results

       model_org, model_name = validated
       model_id = f"{model_org}/{model_name}"


       print(f"\n{'  ' * depth}Fetching metadata for: {model_id}")
       try:
           model_metadata = api.model_info(model_id).__dict__
           json_metadata = json.dumps(model_metadata, default=str)
           model_card = get_model_card(model_id)

       except Exception as e:
           print(f"Error fetching metadata: {e}")
           return results

       finetune_links = get_finetuned_models_page(model_org, model_name)
       # Removing Duplicate Children
       finetune_links = list(set(finetune_links))
       print(f"{'  ' * depth}Found {len(finetune_links)} fine-tunes at depth {depth}.")
       adapter_links = get_adapter_models_page(model_org, model_name)
       print(f"{'  ' * depth}Found {len(adapter_links)} adapter models for {model_id}.")

       results.append({
           "model_id": model_id,
           "card": model_card,
           "metadata": json_metadata,
           "depth": depth,
           "children": finetune_links,
           "children_count": len(finetune_links),
           "adapters": adapter_links,
           "adapters_count": len(adapter_links),
           "parent_model": get_parent_model(model_url)
       })

       for link in finetune_links:
             dfs_finetunes(link, visited, depth + 1, results)
       return results

# Timestamp for the run
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

# Function to save results as JSON
def save_json(results, model_name):
    filename = f"{model_name}_finetunes_{timestamp}.json"
    data = {
        "models": results
    }
    with open(filename, "w") as f:
        json.dump(data, f, indent=4, default=str)
    print(f"Results saved to {filename}")

# Function to save results as CSV
''' def save_csv(results, model_name):
    filename = f"{model_name}_{timestamp}_finetunes.csv"
    with open(filename, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=["model_id", "depth", "children_count", "children", "metadata"])
        writer.writeheader()
        for entry in results:
            # Ensure metadata is a JSON string
            if isinstance(entry["metadata"], dict):
                entry["metadata"] = json.dumps(entry["metadata"], indent=2, default=str)
            # Join children list as a string
            entry["children"] = ", ".join(entry["children"])
            writer.writerow(entry)
    print(f"Results saved to {filename}") '''

# Function to save results as CSV (pandas)
def save_csv(results, model_name):
    filename = f"{model_name}_finetunes_{timestamp}.csv"
    df = pd.DataFrame(results)
    df.to_csv(filename, index=True)
    print(f"Results saved to {filename}")

# Main execution
if __name__ == "__main__":
    model_url = input("Enter the Hugging Face model URL: ").strip()
    visited = set()
    results = dfs_finetunes(model_url, visited)

    if results:
        model_name = results[0]["model_id"].split("/")[-1]  # Extract model name for file naming
        save_json(results, model_name)
        save_csv(results, model_name)
    else:
        print("No fine-tuned models found.")

'''Links for testing: https://huggingface.co/NousResearch/DeepHermes-3-Mistral-24B-Preview (3 fine-tunes at depth 0, 1 fine-tune at depth 1 for 'AlSamCur123/DeepHermes-3-Mistral-24BContinuedFine')
https://huggingface.co/perplexity-ai/r1-1776 (11 fine-tunes at depth 0)'''

Enter the Hugging Face model URL: https://huggingface.co/deepseek-ai/DeepSeek-R1

Fetching metadata for: deepseek-ai/DeepSeek-R1
Found 297 fine-tunes at depth 0.
Found 121 adapter models for deepseek-ai/DeepSeek-R1.


NameError: name 'model_name' is not defined

In [None]:
# Script 3
!pip install huggingface_hub
!pip install adapters
import requests
import pandas as pd
import datetime
import json
import csv
from huggingface_hub import HfApi
from bs4 import BeautifulSoup
from adapters import list_adapters
from huggingface_hub import hf_hub_download
from adapters import AutoAdapterModel
import re



# Initialize API
api = HfApi()

# Function to validate Hugging Face model URL
def validate_hf_model_url(url):
    pattern = r"^https://huggingface.co/([\w\-]+)/([\w\-]+)$"
    match = re.match(pattern, url)
    return match.groups() if match else None

# page with model finetunes
def get_finetuned_models_page(model_org, model_name):
    all_model_links = []  # Store all links across pages
    page_num = 0
    while True:
        search_url = f"https://huggingface.co/models?other=base_model:finetune:{model_org}/{model_name}&p={page_num}"
        response = requests.get(search_url)
        if response.status_code != 200:
            break  # Exit if page not found

        soup = BeautifulSoup(response.text, "html.parser")
        model_divs = soup.find_all("div", class_="w-full truncate")
        if not model_divs:
            break  # Exit if no more models on the page

        for div in model_divs:
            header = div.find("header")
            if header:
                model_link = header.get("title")
                if model_link:
                    all_model_links.append(f"https://huggingface.co/{model_link}")

        page_num += 1  # Move to the next page

    return all_model_links

# model card data
def get_model_card(model_id):
    try:
        # Try to download model card if available
        readme_path = hf_hub_download(repo_id=model_id, filename='README.md')
        with open(readme_path, 'r', encoding='utf-8') as f:
            card_content = f.read()
        return card_content
    except Exception as e:
        print(f"  Could not download model card: {str(e)[:100]}...")
        return ""


# Function to get parent model
def get_parent_model(model_url):
    return model_url.split(f"/{model_name}/")[0]

# Truncate metadata
def filter_metadata(json_metadata):
            keys_to_keep = ["modelId", "sha", "tags", "downloads", "pipeline_tag"]
            return {k: json_metadata.get(k) for k in keys_to_keep if k in json_metadata}
            filtered_metadata = filter_metadata(api.model_info(model_id).__dict__)

# Get adapter models
def get_adapter_models_page(model_org, model_name):
    all_adapter_links = []  # Store all adapter links across pages
    page_num = 0
    while True:
        search_url = f"https://huggingface.co/models?other=base_model:adapter:{model_org}/{model_name}&p={page_num}"
        response = requests.get(search_url)
        if response.status_code != 200:
            break  # Exit if page not found

        soup = BeautifulSoup(response.text, "html.parser")
        model_divs = soup.find_all("div", class_="w-full truncate")
        if not model_divs:
            break  # Exit if no more models on the page

        for div in model_divs:
            header = div.find("header")
            if header:
                model_link = header.get("title")
                if model_link:
                    all_adapter_links.append(f"https://huggingface.co/{model_link}")

        page_num += 1  # Move to the next page

    return all_adapter_links

# Recursive DFS (depth-first search) for finding fine-tunes
def dfs_finetunes(model_url, visited, depth=0, results=None):
       if results is None:
           results = []

       if model_url in visited:
           return results
       visited.add(model_url)

       validated = validate_hf_model_url(model_url)
       if not validated:
           print(f"Invalid URL skipped: {model_url}")
           model_url = "N/A"
           return results

       model_org, model_name = validated
       model_id = f"{model_org}/{model_name}"


       print(f"\n{'  ' * depth}Fetching metadata for: {model_id}")
       try:
           model_metadata = api.model_info(model_id).__dict__
           json_metadata = json.dumps(model_metadata, default=str)
           model_card = get_model_card(model_id)

       except Exception as e:
           print(f"Error fetching metadata: {e}")
           return results

       finetune_links = get_finetuned_models_page(model_org, model_name)
       # Removing Duplicate Children
       finetune_links = list(set(finetune_links))
       print(f"{'  ' * depth}Found {len(finetune_links)} fine-tunes at depth {depth}.")
       adapter_links = get_adapter_models_page(model_org, model_name)
       print(f"{'  ' * depth}Found {len(adapter_links)} adapter models for {model_id}.")

       results.append({
           "model_id": model_id,
           "card": model_card,
           "metadata": json_metadata,
           "depth": depth,
           "children": finetune_links,
           "children_count": len(finetune_links),
           "adapters": adapter_links,
           "adapters_count": len(adapter_links),
           "parent_model": get_parent_model(model_url)
       })

       for link in finetune_links:
             dfs_finetunes(link, visited, depth + 1, results)
       return results

# Timestamp for the run
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

# Function to save results as JSON
def save_json(results, model_name):
    filename = f"{model_name}_finetunes_{timestamp}.json"
    data = {
        "models": results
    }
    with open(filename, "w") as f:
        json.dump(data, f, indent=4, default=str)
    print(f"Results saved to {filename}")

# Function to save results as CSV
''' def save_csv(results, model_name):
    filename = f"{model_name}_{timestamp}_finetunes.csv"
    with open(filename, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=["model_id", "depth", "children_count", "children", "metadata"])
        writer.writeheader()
        for entry in results:
            # Ensure metadata is a JSON string
            if isinstance(entry["metadata"], dict):
                entry["metadata"] = json.dumps(entry["metadata"], indent=2, default=str)
            # Join children list as a string
            entry["children"] = ", ".join(entry["children"])
            writer.writerow(entry)
    print(f"Results saved to {filename}") '''

# Function to save results as CSV (pandas)
def save_csv(results, model_name):
    filename = f"{model_name}_finetunes_{timestamp}.csv"
    df = pd.DataFrame(results)
    df.to_csv(filename, index=True)
    print(f"Results saved to {filename}")

# Main execution
if __name__ == "__main__":
    model_url = input("Enter the Hugging Face model URL: ").strip()
    visited = set()
    results = dfs_finetunes(model_url, visited)

    if results:
        model_name = results[0]["model_id"].split("/")[-1]  # Extract model name for file naming
        save_json(results, model_name)
        save_csv(results, model_name)
    else:
        print("No fine-tuned models found.")

'''Links for testing: https://huggingface.co/NousResearch/DeepHermes-3-Mistral-24B-Preview (3 fine-tunes at depth 0, 1 fine-tune at depth 1 for 'AlSamCur123/DeepHermes-3-Mistral-24BContinuedFine')
https://huggingface.co/perplexity-ai/r1-1776 (11 fine-tunes at depth 0)'''

Enter the Hugging Face model URL: https://huggingface.co/deepseek-ai/DeepSeek-R1

Fetching metadata for: deepseek-ai/DeepSeek-R1
Found 297 fine-tunes at depth 0.
Found 121 adapter models for deepseek-ai/DeepSeek-R1.


NameError: name 'model_name' is not defined

In [None]:
# Script 3
!pip install huggingface_hub
!pip install adapters
import requests
import pandas as pd
import datetime
import json
import csv
from huggingface_hub import HfApi
from bs4 import BeautifulSoup
from adapters import list_adapters
from huggingface_hub import hf_hub_download
from adapters import AutoAdapterModel
import re



# Initialize API
api = HfApi()

# Function to validate Hugging Face model URL
def validate_hf_model_url(url):
    pattern = r"^https://huggingface.co/([\w\-]+)/([\w\-]+)$"
    match = re.match(pattern, url)
    return match.groups() if match else None

# page with model finetunes
def get_finetuned_models_page(model_org, model_name):
    all_model_links = []  # Store all links across pages
    page_num = 0
    while True:
        search_url = f"https://huggingface.co/models?other=base_model:finetune:{model_org}/{model_name}&p={page_num}"
        response = requests.get(search_url)
        if response.status_code != 200:
            break  # Exit if page not found

        soup = BeautifulSoup(response.text, "html.parser")
        model_divs = soup.find_all("div", class_="w-full truncate")
        if not model_divs:
            break  # Exit if no more models on the page

        for div in model_divs:
            header = div.find("header")
            if header:
                model_link = header.get("title")
                if model_link:
                    all_model_links.append(f"https://huggingface.co/{model_link}")

        page_num += 1  # Move to the next page

    return all_model_links

# model card data
def get_model_card(model_id):
    try:
        # Try to download model card if available
        readme_path = hf_hub_download(repo_id=model_id, filename='README.md')
        with open(readme_path, 'r', encoding='utf-8') as f:
            card_content = f.read()
        return card_content
    except Exception as e:
        print(f"  Could not download model card: {str(e)[:100]}...")
        return ""


# Function to get parent model
def get_parent_model(model_url):
    return model_url.split(f"/{model_name}/")[0]

# Truncate metadata
def filter_metadata(json_metadata):
            keys_to_keep = ["modelId", "sha", "tags", "downloads", "pipeline_tag"]
            return {k: json_metadata.get(k) for k in keys_to_keep if k in json_metadata}
            filtered_metadata = filter_metadata(api.model_info(model_id).__dict__)

# Get adapter models
def get_adapter_models_page(model_org, model_name):
    all_adapter_links = []  # Store all adapter links across pages
    page_num = 0
    while True:
        search_url = f"https://huggingface.co/models?other=base_model:adapter:{model_org}/{model_name}&p={page_num}"
        response = requests.get(search_url)
        if response.status_code != 200:
            break  # Exit if page not found

        soup = BeautifulSoup(response.text, "html.parser")
        model_divs = soup.find_all("div", class_="w-full truncate")
        if not model_divs:
            break  # Exit if no more models on the page

        for div in model_divs:
            header = div.find("header")
            if header:
                model_link = header.get("title")
                if model_link:
                    all_adapter_links.append(f"https://huggingface.co/{model_link}")

        page_num += 1  # Move to the next page

    return all_adapter_links

# Recursive DFS (depth-first search) for finding fine-tunes
def dfs_finetunes(model_url, visited, depth=0, results=None):
       if results is None:
           results = []

       if model_url in visited:
           return results
       visited.add(model_url)

       validated = validate_hf_model_url(model_url)
       if not validated:
           print(f"Invalid URL skipped: {model_url}")
           model_url = "N/A"
           return results

       model_org, model_name = validated
       model_id = f"{model_org}/{model_name}"


       print(f"\n{'  ' * depth}Fetching metadata for: {model_id}")
       try:
           model_metadata = api.model_info(model_id).__dict__
           json_metadata = json.dumps(model_metadata, default=str)
           model_card = get_model_card(model_id)

       except Exception as e:
           print(f"Error fetching metadata: {e}")
           return results

       finetune_links = get_finetuned_models_page(model_org, model_name)
       # Removing Duplicate Children
       finetune_links = list(set(finetune_links))
       print(f"{'  ' * depth}Found {len(finetune_links)} fine-tunes at depth {depth}.")
       adapter_links = get_adapter_models_page(model_org, model_name)
       print(f"{'  ' * depth}Found {len(adapter_links)} adapter models for {model_id}.")

       results.append({
           "model_id": model_id,
           "card": model_card,
           "metadata": json_metadata,
           "depth": depth,
           "children": finetune_links,
           "children_count": len(finetune_links),
           "adapters": adapter_links,
           "adapters_count": len(adapter_links),
           "parent_model": get_parent_model(model_url)
       })

       for link in finetune_links:
             dfs_finetunes(link, visited, depth + 1, results)
       return results

# Timestamp for the run
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

# Function to save results as JSON
def save_json(results, model_name):
    filename = f"{model_name}_finetunes_{timestamp}.json"
    data = {
        "models": results
    }
    with open(filename, "w") as f:
        json.dump(data, f, indent=4, default=str)
    print(f"Results saved to {filename}")

# Function to save results as CSV
''' def save_csv(results, model_name):
    filename = f"{model_name}_{timestamp}_finetunes.csv"
    with open(filename, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=["model_id", "depth", "children_count", "children", "metadata"])
        writer.writeheader()
        for entry in results:
            # Ensure metadata is a JSON string
            if isinstance(entry["metadata"], dict):
                entry["metadata"] = json.dumps(entry["metadata"], indent=2, default=str)
            # Join children list as a string
            entry["children"] = ", ".join(entry["children"])
            writer.writerow(entry)
    print(f"Results saved to {filename}") '''

# Function to save results as CSV (pandas)
def save_csv(results, model_name):
    filename = f"{model_name}_finetunes_{timestamp}.csv"
    df = pd.DataFrame(results)
    df.to_csv(filename, index=True)
    print(f"Results saved to {filename}")

# Main execution
if __name__ == "__main__":
    model_url = input("Enter the Hugging Face model URL: ").strip()
    visited = set()
    results = dfs_finetunes(model_url, visited)

    if results:
        model_name = results[0]["model_id"].split("/")[-1]  # Extract model name for file naming
        save_json(results, model_name)
        save_csv(results, model_name)
    else:
        print("No fine-tuned models found.")

'''Links for testing: https://huggingface.co/NousResearch/DeepHermes-3-Mistral-24B-Preview (3 fine-tunes at depth 0, 1 fine-tune at depth 1 for 'AlSamCur123/DeepHermes-3-Mistral-24BContinuedFine')
https://huggingface.co/perplexity-ai/r1-1776 (11 fine-tunes at depth 0)'''

Enter the Hugging Face model URL: https://huggingface.co/deepseek-ai/DeepSeek-R1

Fetching metadata for: deepseek-ai/DeepSeek-R1
Found 297 fine-tunes at depth 0.
Found 121 adapter models for deepseek-ai/DeepSeek-R1.


NameError: name 'model_name' is not defined

# Script 1
Takes in a (validated) model url and output its metadata

In [37]:
# Script 1: takes input model url, validates url, and gives model metadata
!pip install validators
from huggingface_hub import HfApi
import huggingface_hub as hf
import validators
import json
import csv

hf_api = HfApi()

def input_url():
    while True:
        input_model_url = input("Enter model URL: ")
        print(f"You entered: {input_model_url}")

        if validators.url(input_model_url) and "huggingface.co" in input_model_url:
                # Extract the model ID from the URL
                model_id = input_model_url.split("huggingface.co/")[-1]
                model_info = hf_api.model_info(model_id) # Get model info: https://huggingface.co/docs/huggingface_hub/v0.29.2/en/package_reference/hf_api#huggingface_hub.ModelInfo
                model_card = hf.hf_hub_download(models[0].modelId, 'README.md')

                metadata = { # Moved metadata assignment inside the if block
                    "model_id": model_id,
                    "model_info": model_info.__dict__
                }
                return metadata # Return is now inside if block
                json_output = json.dumps(metadata, indent=4, default=str)
                with open('model_info.json', 'w') as json_file:
                 json_file.write(json_output)
                print(json_output)
        else:
            print("Invalid URL. Please enter a valid Hugging Face model URL.")
            # Optionally: You could return None or an empty dictionary here
            # to indicate an invalid URL
# To test: https://huggingface.co/deepseek-ai/DeepSeek-R1

input_url()


Collecting validators
  Downloading validators-0.34.0-py3-none-any.whl.metadata (3.8 kB)
Downloading validators-0.34.0-py3-none-any.whl (43 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.5/43.5 kB[0m [31m781.3 kB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: validators
Successfully installed validators-0.34.0
Enter model URL: https://huggingface.co/deepseek-ai/DeepSeek-R1
You entered: https://huggingface.co/deepseek-ai/DeepSeek-R1


TypeError: 'generator' object is not subscriptable

# Script 2
Takes in a (validated) model and outputs the children models/fine-tunes

In [None]:
# Script 2
  # 1. Take link as input (format check). This is the "main model"
  # 2. Give the link to the page with the fine-tunes for the inputted model

from huggingface_hub import HfApi
import requests
from bs4 import BeautifulSoup
import re

# Initialize API
api = HfApi()

# Function to validate Hugging Face model URL
def validate_hf_model_url(url):
    pattern = r"^https://huggingface.co/([\w\-]+)/([\w\-]+)$"
    match = re.match(pattern, url)
    if match:
        return match.groups()  # Returns (org/user, model_name)
    return None

# Function to find fine-tuned models
def get_finetuned_models_page(model_org, model_name):
    search_url = f"https://huggingface.co/models?search={model_name}"
    response = requests.get(search_url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")
        model_links = [
            a["href"] for a in soup.find_all("a", href=True)
            if model_name.lower() in a["href"].lower()
        ]
        return [f"https://huggingface.co{link}" for link in model_links if model_org not in link]

    return []

# Main execution
model_url = input("Enter the Hugging Face model URL: ").strip()

validated = validate_hf_model_url(model_url)
if validated:
    org, model_name = validated
    finetune_links = get_finetuned_models_page(org, model_name)

    if finetune_links:
        print("Fine-tuned models found:")
        for link in finetune_links:
            print(link)
    else:
        print("No fine-tuned models found for this model.")
else:
    print("Invalid Hugging Face model URL format.")



# Script 3
## Search steps overview
- `dfs_finetunes` we take the `model_url` as input. Alternatively, we can add this var as an argument.
- We go layer-by-layer and find the children of the current model (i.e. the fine-tunes of a model)
- We call the `dfs_funetunes` function recursively and store the models that have been "visited" to avoid duplicates.
- We have a dictionary of information that we store about the "current model" and have the information stored in respective columns (model_id, card, metadata, depth, children (list of model links), children count).
- We have a `results` list that has the information about all the models and their fine-tunes.

In [5]:
# Script 3
!pip install huggingface_hub
!pip install adapters
!pip install backoff # handle rate-limiting
import requests
import pandas as pd
import datetime
import json
import csv
from huggingface_hub import HfApi
from bs4 import BeautifulSoup
from adapters import list_adapters
from huggingface_hub import hf_hub_download
from adapters import AutoAdapterModel
import re
import backoff



# Initialize API
api = HfApi()

# Function to validate Hugging Face model URL
def validate_hf_model_url(url):
    pattern = r"^https://huggingface.co/([\w\-]+)/([\w\-]+)$"
    match = re.match(pattern, url)
    return match.groups() if match else None

# page with model finetunes
def get_finetuned_models_page(model_org, model_name):
    all_model_links = []  # Store all links across pages
    page_num = 0
    while True:
        search_url = f"https://huggingface.co/models?other=base_model:finetune:{model_org}/{model_name}&p={page_num}"
        response = requests.get(search_url)
        if response.status_code != 200:
            break  # Exit if page not found

        soup = BeautifulSoup(response.text, "html.parser")
        model_divs = soup.find_all("div", class_="w-full truncate")
        if not model_divs:
            break  # Exit if no more models on the page

        for div in model_divs:
            header = div.find("header")
            if header:
                model_link = header.get("title")
                if model_link:
                    all_model_links.append(f"https://huggingface.co/{model_link}")

        page_num += 1  # Move to the next page

    return all_model_links

# model card data
@backoff.on_exception(backoff.expo,
                      requests.exceptions.RequestException,
                      max_tries=5)
def get_model_card(model_id):
    try:
        # Try to download model card if available
        readme_path = hf_hub_download(repo_id=model_id, filename='README.md')
        with open(readme_path, 'r', encoding='utf-8') as f:
            card_content = f.read()
        return card_content
    except Exception as e:
        print(f"  Could not download model card: {str(e)[:100]}...")
        return ""

# Truncate metadata
def filter_metadata(json_metadata):
            keys_to_keep = ["modelId", "sha", "tags", "downloads", "pipeline_tag"]
            return {k: json_metadata.get(k) for k in keys_to_keep if k in json_metadata}
            filtered_metadata = filter_metadata(api.model_info(model_id).__dict__)

# Get adapter models
@backoff.on_exception(backoff.expo,
                      requests.exceptions.RequestException,
                      max_tries=5)
def get_adapter_models_page(model_org, model_name):
    all_adapter_links = []  # Store all adapter links across pages
    page_num = 0
    while True:
        search_url = f"https://huggingface.co/models?other=base_model:adapter:{model_org}/{model_name}&p={page_num}"
        response = requests.get(search_url)
        if response.status_code != 200:
            break  # Exit if page not found

        soup = BeautifulSoup(response.text, "html.parser")
        model_divs = soup.find_all("div", class_="w-full truncate")
        if not model_divs:
            break  # Exit if no more models on the page

        for div in model_divs:
            header = div.find("header")
            if header:
                model_link = header.get("title")
                if model_link:
                    all_adapter_links.append(f"https://huggingface.co/{model_link}")

        page_num += 1  # Move to the next page

    return all_adapter_links

# Recursive DFS (depth-first search) for finding fine-tunes
@backoff.on_exception(backoff.expo,
                      (requests.exceptions.RequestException,
                       huggingface_hub.utils.HfHubHTTPError),
                      max_tries=5)
def dfs_finetunes(model_url, visited, depth=0, results=None):
       if results is None:
           results = []

       if model_url in visited:
           return results
       visited.add(model_url)

       validated = validate_hf_model_url(model_url)
       if not validated:
           print(f"Invalid URL skipped: {model_url}")
           model_url = "N/A"
           return results

       model_org, model_name = validated
       model_id = f"{model_org}/{model_name}"


       print(f"\n{'  ' * depth}Fetching metadata for: {model_id}")
       try:
           model_metadata = api.model_info(model_id).__dict__
           json_metadata = json.dumps(model_metadata, default=str)
           model_card = get_model_card(model_id)

       except Exception as e:
           print(f"Error fetching metadata: {e}")
           return results

       finetune_links = get_finetuned_models_page(model_org, model_name)
       # Removing Duplicate Children
       finetune_links = list(set(finetune_links))
       print(f"{'  ' * depth}Found {len(finetune_links)} fine-tunes at depth {depth}.")
       adapter_links = get_adapter_models_page(model_org, model_name)
       print(f"{'  ' * depth}Found {len(adapter_links)} adapter models for {model_id}.")

       results.append({
           "model_id": model_id,
           "card": model_card,
           "metadata": json_metadata,
           "depth": depth,
           "children": finetune_links,
           "children_count": len(finetune_links),
           "adapters": adapter_links,
           "adapters_count": len(adapter_links)
       })

       for link in finetune_links:
             dfs_finetunes(link, visited, depth + 1, results)
       return results

# Timestamp for the run
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

# Function to save results as JSON
def save_json(results, model_name):
    filename = f"{model_name}_finetunes_{timestamp}.json"
    data = {
        "models": results
    }
    with open(filename, "w") as f:
        json.dump(data, f, indent=4, default=str)
    print(f"Results saved to {filename}")

# Function to save results as CSV
''' def save_csv(results, model_name):
    filename = f"{model_name}_{timestamp}_finetunes.csv"
    with open(filename, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=["model_id", "depth", "children_count", "children", "metadata"])
        writer.writeheader()
        for entry in results:
            # Ensure metadata is a JSON string
            if isinstance(entry["metadata"], dict):
                entry["metadata"] = json.dumps(entry["metadata"], indent=2, default=str)
            # Join children list as a string
            entry["children"] = ", ".join(entry["children"])
            writer.writerow(entry)
    print(f"Results saved to {filename}") '''

# Function to save results as CSV (pandas)
def save_csv(results, model_name):
    filename = f"{model_name}_finetunes_{timestamp}.csv"
    df = pd.DataFrame(results)
    df.to_csv(filename, index=True)
    print(f"Results saved to {filename}")

# Main execution
@backoff.on_exception(backoff.expo,
                      ValueError,
                      max_time=lookup_max_time)

if __name__ == "__main__":
    model_url = input("Enter the Hugging Face model URL: ").strip()
    visited = set()
    results = dfs_finetunes(model_url, visited)

    if results:
        model_name = results[0]["model_id"].split("/")[-1]  # Extract model name for file naming
        save_json(results, model_name)
        save_csv(results, model_name)
    else:
        print("No fine-tuned models found.")

'''Links for testing: https://huggingface.co/NousResearch/DeepHermes-3-Mistral-24B-Preview (3 fine-tunes at depth 0, 1 fine-tune at depth 1 for 'AlSamCur123/DeepHermes-3-Mistral-24BContinuedFine')
https://huggingface.co/perplexity-ai/r1-1776 (11 fine-tunes at depth 0)'''

Enter the Hugging Face model URL: https://huggingface.co/deepseek-ai/DeepSeek-R1

Fetching metadata for: deepseek-ai/DeepSeek-R1
Found 297 fine-tunes at depth 0.
Found 121 adapter models for deepseek-ai/DeepSeek-R1.

  Fetching metadata for: nvidia/DeepSeek-R1-FP4


README.md:   0%|          | 0.00/4.94k [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for nvidia/DeepSeek-R1-FP4.

  Fetching metadata for: harshw030/sameeraAI


README.md:   0%|          | 0.00/284 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for harshw030/sameeraAI.

  Fetching metadata for: maersee3423423/statuetka


README.md:   0%|          | 0.00/60.0 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for maersee3423423/statuetka.

  Fetching metadata for: Duckets/Duckbot1


README.md:   0%|          | 0.00/209 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for Duckets/Duckbot1.

  Fetching metadata for: Prarabdha/law_gpt


README.md:   0%|          | 0.00/5.25k [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for Prarabdha/law_gpt.

  Fetching metadata for: Kelly70/Kelly


README.md:   0%|          | 0.00/277 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for Kelly70/Kelly.

  Fetching metadata for: Szilard12/UNITY


README.md:   0%|          | 0.00/127 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for Szilard12/UNITY.

  Fetching metadata for: desmond-initiative/news_api_context


README.md:   0%|          | 0.00/203 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for desmond-initiative/news_api_context.

  Fetching metadata for: raajveers/youtube-title-gen


README.md:   0%|          | 0.00/5.28k [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for raajveers/youtube-title-gen.

  Fetching metadata for: CynthiaAAAA/deepseek-chat


README.md:   0%|          | 0.00/13.9k [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for CynthiaAAAA/deepseek-chat.

  Fetching metadata for: clgingeniero/sammarty


README.md:   0%|          | 0.00/162 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for clgingeniero/sammarty.

  Fetching metadata for: ashad846004/DeepSeek-R1-Medical-COT


README.md:   0%|          | 0.00/4.67k [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for ashad846004/DeepSeek-R1-Medical-COT.

  Fetching metadata for: lorenzzzo/lorezAI


README.md:   0%|          | 0.00/137 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for lorenzzzo/lorezAI.

  Fetching metadata for: sarthak156/anichat


README.md:   0%|          | 0.00/211 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for sarthak156/anichat.

  Fetching metadata for: vataAiTech/songSystem


README.md:   0%|          | 0.00/87.0 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for vataAiTech/songSystem.

  Fetching metadata for: YooJeahkhn/YooJeahkhn


README.md:   0%|          | 0.00/117 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for YooJeahkhn/YooJeahkhn.

  Fetching metadata for: Fr0sT-FLAB/SolidityGPT


README.md:   0%|          | 0.00/933 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for Fr0sT-FLAB/SolidityGPT.

  Fetching metadata for: Pim-mobile/Our-Pim


README.md:   0%|          | 0.00/248 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for Pim-mobile/Our-Pim.

  Fetching metadata for: Al-rahman/Deepseek


README.md:   0%|          | 0.00/266 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for Al-rahman/Deepseek.

  Fetching metadata for: jatin183/Celci


README.md:   0%|          | 0.00/95.0 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for jatin183/Celci.

  Fetching metadata for: margerz156/margthink


README.md:   0%|          | 0.00/5.43k [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for margerz156/margthink.

  Fetching metadata for: buyun/test-model


README.md:   0%|          | 0.00/1.11k [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for buyun/test-model.

  Fetching metadata for: Futuresony/Future_pics_26-01-2025


README.md:   0%|          | 0.00/241 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for Futuresony/Future_pics_26-01-2025.

  Fetching metadata for: drperkybottom/DeepLerting-LLM


README.md:   0%|          | 0.00/171 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for drperkybottom/DeepLerting-LLM.

  Fetching metadata for: Minnus/rtrancit


README.md:   0%|          | 0.00/213 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for Minnus/rtrancit.

  Fetching metadata for: chitdev/deepseek-r1-distill-7b


README.md:   0%|          | 0.00/123 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for chitdev/deepseek-r1-distill-7b.

  Fetching metadata for: usersomethingelze/birdinyourear


README.md:   0%|          | 0.00/175 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for usersomethingelze/birdinyourear.

  Fetching metadata for: Tackit/Flensburg


README.md:   0%|          | 0.00/202 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for Tackit/Flensburg.

  Fetching metadata for: FernDelga/CorpoBotdelFer


README.md:   0%|          | 0.00/180 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for FernDelga/CorpoBotdelFer.

  Fetching metadata for: devayanihodgir/Resume_Analyzer


README.md:   0%|          | 0.00/218 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for devayanihodgir/Resume_Analyzer.

  Fetching metadata for: devl-8980-sn/india_legal_QA_deepseek


README.md:   0%|          | 0.00/235 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for devl-8980-sn/india_legal_QA_deepseek.

  Fetching metadata for: xiaoyuboi/test-model


README.md:   0%|          | 0.00/2.00k [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for xiaoyuboi/test-model.

  Fetching metadata for: niloyda/AnythingChatBot


README.md:   0%|          | 0.00/278 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for niloyda/AnythingChatBot.

  Fetching metadata for: saleh1977/nexta-9101


README.md:   0%|          | 0.00/489 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for saleh1977/nexta-9101.

  Fetching metadata for: ayeshawtahir/pharmacopeia


README.md:   0%|          | 0.00/201 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for ayeshawtahir/pharmacopeia.

  Fetching metadata for: sarvar3697/sarvar_2


README.md:   0%|          | 0.00/335 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for sarvar3697/sarvar_2.

  Fetching metadata for: feitap/exp


README.md:   0%|          | 0.00/80.0 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for feitap/exp.

  Fetching metadata for: marlono/test


README.md:   0%|          | 0.00/133 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for marlono/test.

  Fetching metadata for: Adamastor/bully


README.md:   0%|          | 0.00/119 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for Adamastor/bully.

  Fetching metadata for: zain10000/ChatBot


README.md:   0%|          | 0.00/5.26k [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for zain10000/ChatBot.

  Fetching metadata for: karim8955/mate


README.md:   0%|          | 0.00/5.50k [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for karim8955/mate.

  Fetching metadata for: usamaaleem99tech/DeepSeek-R1-Medical


README.md:   0%|          | 0.00/275 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for usamaaleem99tech/DeepSeek-R1-Medical.

  Fetching metadata for: rshaikh22/coachcarellm


README.md:   0%|          | 0.00/130 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for rshaikh22/coachcarellm.

  Fetching metadata for: DangChuVM/Model


README.md:   0%|          | 0.00/128 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for DangChuVM/Model.

  Fetching metadata for: YuRiVeRTi/VQ1


README.md:   0%|          | 0.00/5.87k [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for YuRiVeRTi/VQ1.

  Fetching metadata for: rehamhisham/saas


README.md:   0%|          | 0.00/248 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for rehamhisham/saas.
Invalid URL skipped: https://huggingface.co/nicogptai/omega.1-2

  Fetching metadata for: guanglian/test


README.md:   0%|          | 0.00/6.12k [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for guanglian/test.

  Fetching metadata for: wsxdyzx2025/weigb


README.md:   0%|          | 0.00/267 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for wsxdyzx2025/weigb.

  Fetching metadata for: farypor/seoaigen


README.md:   0%|          | 0.00/115 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for farypor/seoaigen.

  Fetching metadata for: seenutheleo/imdb-model


README.md:   0%|          | 0.00/93.0 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for seenutheleo/imdb-model.

  Fetching metadata for: silence09/DeepSeek-R1-3layers


README.md:   0%|          | 0.00/2.07k [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for silence09/DeepSeek-R1-3layers.

  Fetching metadata for: PARSIS/Moshaver


README.md:   0%|          | 0.00/60.0 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for PARSIS/Moshaver.

  Fetching metadata for: yifan-playground/deepseek-r1


README.md:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for yifan-playground/deepseek-r1.

  Fetching metadata for: curryNI/huaiqing_ml_model


README.md:   0%|          | 0.00/93.0 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for curryNI/huaiqing_ml_model.
Invalid URL skipped: https://huggingface.co/raulmoraless/Raul.IA

  Fetching metadata for: kkangnom/test


README.md:   0%|          | 0.00/45.0 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for kkangnom/test.

  Fetching metadata for: huihui-ai/DeepSeek-R1-Pruned-Coder-411B


README.md:   0%|          | 0.00/3.92k [00:00<?, ?B/s]

  Found 2 fine-tunes at depth 1.
  Found 0 adapter models for huihui-ai/DeepSeek-R1-Pruned-Coder-411B.

    Fetching metadata for: mradermacher/DeepSeek-R1-Pruned-Coder-411B-i1-GGUF


README.md:   0%|          | 0.00/17.7k [00:00<?, ?B/s]

    Found 0 fine-tunes at depth 2.
    Found 0 adapter models for mradermacher/DeepSeek-R1-Pruned-Coder-411B-i1-GGUF.

    Fetching metadata for: mradermacher/DeepSeek-R1-Pruned-Coder-411B-GGUF


README.md:   0%|          | 0.00/11.4k [00:00<?, ?B/s]

    Found 0 fine-tunes at depth 2.
    Found 0 adapter models for mradermacher/DeepSeek-R1-Pruned-Coder-411B-GGUF.
Invalid URL skipped: https://huggingface.co/OmarGX/Omar.Gx

  Fetching metadata for: CyrusXtovia/MetLawBot


README.md:   0%|          | 0.00/60.0 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for CyrusXtovia/MetLawBot.

  Fetching metadata for: GeorgeWeasley84/convert-case


README.md:   0%|          | 0.00/130 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for GeorgeWeasley84/convert-case.

  Fetching metadata for: Ai1God/Godboy


README.md:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for Ai1God/Godboy.

  Fetching metadata for: GalaxyPoo/Mine


README.md:   0%|          | 0.00/80.0 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for GalaxyPoo/Mine.

  Fetching metadata for: Aspenini/Backwards-AI


README.md:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for Aspenini/Backwards-AI.

  Fetching metadata for: tempbggff/test


README.md:   0%|          | 0.00/70.0 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for tempbggff/test.

  Fetching metadata for: Withersen/AIArtCreator


README.md:   0%|          | 0.00/76.0 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for Withersen/AIArtCreator.

  Fetching metadata for: coralgables/crypto


README.md:   0%|          | 0.00/115 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for coralgables/crypto.

  Fetching metadata for: ExplodeMediaG/011_search-model


README.md:   0%|          | 0.00/246 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for ExplodeMediaG/011_search-model.

  Fetching metadata for: Awaiz031/Awaizahmad


README.md:   0%|          | 0.00/241 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for Awaiz031/Awaizahmad.

  Fetching metadata for: Yadav009/Aiclothchange


README.md:   0%|          | 0.00/206 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for Yadav009/Aiclothchange.

  Fetching metadata for: zedx1/BlueAI


README.md:   0%|          | 0.00/274 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for zedx1/BlueAI.

  Fetching metadata for: SirFestus/Text-To-Text


README.md:   0%|          | 0.00/405 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for SirFestus/Text-To-Text.

  Fetching metadata for: michaelngangom/dummy-bank


README.md:   0%|          | 0.00/183 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for michaelngangom/dummy-bank.

  Fetching metadata for: Albert9527/model-demo


README.md:   0%|          | 0.00/58.0 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for Albert9527/model-demo.

  Fetching metadata for: Hamzillo/Lolo


README.md:   0%|          | 0.00/199 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for Hamzillo/Lolo.

  Fetching metadata for: Nerker/Rdrffg


README.md:   0%|          | 0.00/140 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for Nerker/Rdrffg.

  Fetching metadata for: zonnell/discord


README.md:   0%|          | 0.00/95.0 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for zonnell/discord.

  Fetching metadata for: Mylamoore040/Myla


README.md:   0%|          | 0.00/301 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for Mylamoore040/Myla.

  Fetching metadata for: Lotusaihk/lotusaihk


README.md:   0%|          | 0.00/62.0 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for Lotusaihk/lotusaihk.

  Fetching metadata for: Vepa1979/turkmence


README.md:   0%|          | 0.00/188 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for Vepa1979/turkmence.

  Fetching metadata for: raghu1155/DeepSeek-R1-Codegeneration-COT


README.md:   0%|          | 0.00/5.41k [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for raghu1155/DeepSeek-R1-Codegeneration-COT.

  Fetching metadata for: tornado4651/test


README.md:   0%|          | 0.00/223 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for tornado4651/test.

  Fetching metadata for: Raymondjoe007/thor


README.md:   0%|          | 0.00/247 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for Raymondjoe007/thor.

  Fetching metadata for: ManishDipole/Demo


README.md:   0%|          | 0.00/286 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for ManishDipole/Demo.

  Fetching metadata for: lilmos/twins-ai


README.md:   0%|          | 0.00/60.0 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for lilmos/twins-ai.

  Fetching metadata for: FarhanisGoingTomakeaAi/NiteTalkbot


README.md:   0%|          | 0.00/134 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for FarhanisGoingTomakeaAi/NiteTalkbot.

  Fetching metadata for: mradermacher/DeepSeek-R1-GGUF


README.md:   0%|          | 0.00/12.8k [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for mradermacher/DeepSeek-R1-GGUF.

  Fetching metadata for: Dimaswa/openrail


README.md:   0%|          | 0.00/219 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for Dimaswa/openrail.

  Fetching metadata for: fematt/telebot


README.md:   0%|          | 0.00/45.0 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for fematt/telebot.

  Fetching metadata for: Owen14gjqwertkeyboard/LibrarianAI


README.md:   0%|          | 0.00/77.0 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for Owen14gjqwertkeyboard/LibrarianAI.

  Fetching metadata for: yookidz/my-code-Llama


README.md:   0%|          | 0.00/129 [00:00<?, ?B/s]

  Found 0 fine-tunes at depth 1.
  Found 0 adapter models for yookidz/my-code-Llama.

  Fetching metadata for: tonybb815/Tiny
Error fetching metadata: 429 Client Error: Too Many Requests for url: https://huggingface.co/api/models/tonybb815/Tiny

  Fetching metadata for: Haryni/model
Error fetching metadata: 429 Client Error: Too Many Requests for url: https://huggingface.co/api/models/Haryni/model

  Fetching metadata for: Klanik58/Devrim_DSE
Error fetching metadata: 429 Client Error: Too Many Requests for url: https://huggingface.co/api/models/Klanik58/Devrim_DSE

  Fetching metadata for: samaraamfetamina/frai
Error fetching metadata: 429 Client Error: Too Many Requests for url: https://huggingface.co/api/models/samaraamfetamina/frai

  Fetching metadata for: djibhefihnserfnh/vxfvf
Error fetching metadata: 429 Client Error: Too Many Requests for url: https://huggingface.co/api/models/djibhefihnserfnh/vxfvf

  Fetching metadata for: kalleopinheiro/deepseek
Error fetching metadata: 429 

"Links for testing: https://huggingface.co/NousResearch/DeepHermes-3-Mistral-24B-Preview (3 fine-tunes at depth 0, 1 fine-tune at depth 1 for 'AlSamCur123/DeepHermes-3-Mistral-24BContinuedFine')\nhttps://huggingface.co/perplexity-ai/r1-1776 (11 fine-tunes at depth 0)"

# Scripts 4 (Tree Viz)

Iteratively produces model tree datasets when given a list of models.

In [None]:
# 4.1 Phylogenetic tree

!pip install Bio
!pip install pandas
import pandas as pd
import ast
import matplotlib.pyplot as plt
from Bio import Phylo
from Bio.Phylo.BaseTree import Clade, Tree

# Load and prepare tree
df = pd.read_csv("/content/DeepSeek-R1_finetunes_20250408_202441.csv")
df['children'] = df['children'].apply(ast.literal_eval)

def build_phylo_tree_from_dfs(df):
    clade_map = {row['model_id']: Clade(name=row['model_id']) for _, row in df.iterrows()}
    parent_links = {}
    for _, row in df.iterrows():
        parent = row['model_id']
        for child_url in row['children']:
            child = '/'.join(child_url.split("/")[-2:])
            parent_links[child] = parent
    all_models = set(df['model_id'])
    child_models = set(parent_links.keys())
    root_model_id = list(all_models - child_models)[0]
    for child, parent in parent_links.items():
        if child in clade_map and parent in clade_map:
            clade_map[parent].clades.append(clade_map[child])
    return Tree(root=clade_map[root_model_id])

tree = build_phylo_tree_from_dfs(df)

# Plotting with labels on branches
fig = plt.figure(figsize=(24, 36))
ax = fig.add_subplot(1, 1, 1)

# Draw the tree without axes
Phylo.draw(tree, axes=ax, do_show=False)

# Remove all axes elements
ax.set_axis_off()
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)

# Ensure labels are positioned away from branch lines
for label in ax.texts:
    pos = label.get_position()
    label.set_position((pos[0], pos[1]))
    label.set_bbox(dict(facecolor='white', alpha=0.8, edgecolor='none', pad=3))

plt.tight_layout()
plt.savefig('clean_phylogenetic_tree.png', dpi=200, bbox_inches='tight')
plt.show()

In [None]:
# 4.1 Networkx (in progress)
!apt install graphviz libgraphviz-dev
!pip install pygraphviz
import pandas as pd
import ast
import matplotlib.pyplot as plt
import networkx as nx

# Load and prepare the dataset
df = pd.read_csv("/content/DeepSeek-R1_finetunes_20250408_202441.csv")
df['children'] = df['children'].apply(ast.literal_eval)

# Create a directed graph
G = nx.DiGraph()

# Add nodes and edges
for _, row in df.iterrows():
    parent = row['model_id']
    G.add_node(parent)
    for child_url in row['children']:
        child = '/'.join(child_url.split("/")[-2:])
        G.add_node(child)
        G.add_edge(parent, child)

# Find root node
root = [n for n, d in G.in_degree() if d == 0][0]

# Create a hierarchical layout
pos = nx.nx_agraph.graphviz_layout(G, prog='dot', root=root)

# Create figure
plt.figure(figsize=(16, 24))

# Draw nodes
nx.draw_networkx_nodes(G, pos, node_size=10, node_color='white', edgecolors='black')

# Draw edges
nx.draw_networkx_edges(G, pos, arrows=False)

# Draw labels, rotating only non-root nodes
label_pos = {k: (v[0], v[1]) for k, v in pos.items()}
for node, (x, y) in label_pos.items():
    if node != root:  # Rotate labels for non-root nodes
        plt.text(x, y, s=node, rotation=90, ha='center', va='center', fontsize=9)
    else:  # Keep root label horizontal
        plt.text(x, y, s=node, ha='center', va='center', fontsize=9)

# Remove axes
plt.axis('off')
plt.tight_layout()

# Save the figure
plt.savefig("mistral_finetune_tree_networkx.png", dpi=150, bbox_inches='tight')
plt.show()

print("Tree visualization saved to mistral_finetune_tree_networkx.png")

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
graphviz is already the newest version (2.42.2-6ubuntu0.1).
The following additional packages will be installed:
  libgail-common libgail18 libgtk2.0-0 libgtk2.0-bin libgtk2.0-common
  libgvc6-plugins-gtk librsvg2-common libxdot4
Suggested packages:
  gvfs
The following NEW packages will be installed:
  libgail-common libgail18 libgraphviz-dev libgtk2.0-0 libgtk2.0-bin
  libgtk2.0-common libgvc6-plugins-gtk librsvg2-common libxdot4
0 upgraded, 9 newly installed, 0 to remove and 30 not upgraded.
Need to get 2,434 kB of archives.
After this operation, 7,681 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 libgtk2.0-common all 2.24.33-2ubuntu2.1 [125 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 libgtk2.0-0 amd64 2.24.33-2ubuntu2.1 [2,038 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 libgail18 

FileNotFoundError: [Errno 2] No such file or directory: '/content/DeepSeek-R1_finetunes_20250408_202441.csv'

In [42]:
# 4.3 library (in progress)
!pip install ete3
!pip install tree
!pip install PyQt5
import pandas as pd
import ast
from ete3 import Tree, faces, AttrFace, TreeStyle


# Load and prepare the dataset
df = pd.read_csv("/content/DeepSeek-R1_finetunes_20250408_202441.csv")
df['children'] = df['children'].apply(ast.literal_eval)

# Function to build the ete3 tree recursively
def build_ete3_tree(df, parent_node=None, parent_name=None):
    if parent_node is None:
        # Create the root node if it's the first call
        root_name = df.loc[df['depth'] == 0, 'model_id'].iloc[0]
        tree = Tree(name=root_name)
        parent_node = tree
    else:
        tree = parent_node  # Continue building the existing tree

    # Find children of the current parent
    children_rows = df.loc[df['model_id'] == parent_name]
    if not children_rows.empty:
        children_urls = children_rows['children'].iloc[0]
        for child_url in children_urls:
            child_name = '/'.join(child_url.split("/")[-2:])
            child_node = tree.add_child(name=child_name)
            build_ete3_tree(df, child_node, child_name)  # Recursive call
    return tree

# Build the tree
tree = build_ete3_tree(df)

# Style the tree
ts.show_leaf_name = False  # Hide leaf names
ts.mode = "c"  # Circular tree layout
ts.root_opening_factor = 1  # Adjust root opening for circular layout

# Style the nodes
nst = NodeStyle()
nst["size"] = 0  # Hide node circles
nst["hz_line_width"] = 2
nst["vt_line_width"] = 2

for n in tree.traverse():
    n.set_style(nst)
    # Add model name as a branch label
    name_face = TextFace(n.name, fsize=10, fgcolor="black")
    n.add_face(name_face, column=0, position="branch-right")

# Render and save the tree
tree.render("mistral_finetune_tree_ete3.png", tree_style=ts, dpi=300)
print("Tree visualization saved to mistral_finetune_tree_ete3.png")

Collecting ete3
  Downloading ete3-3.1.3.tar.gz (4.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.8/4.8 MB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: ete3
  Building wheel for ete3 (setup.py) ... [?25l[?25hdone
  Created wheel for ete3: filename=ete3-3.1.3-py3-none-any.whl size=2273786 sha256=11e90a6cb410702e112dd5bb1b8cef8bb0a9090cbb05bb8d9a45eb80da1f6665
  Stored in directory: /root/.cache/pip/wheels/dd/a8/60/0a29caa9f8ceb7316704be63c1578ab13c36668abb646366ac
Successfully built ete3
Installing collected packages: ete3
Successfully installed ete3-3.1.3
Collecting tree
  Downloading Tree-0.2.4.tar.gz (6.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting svgwrite (from tree)
  Downloading svgwrite-1.4.3-py3-none-any.whl.metadata (8.8 kB)
Downloading svgwrite-1.4.3-py3-none-any.whl (67 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0

FileNotFoundError: [Errno 2] No such file or directory: '/content/DeepSeek-R1_finetunes_20250408_202441.csv'

# Script 6

Get top 1000 models from the model-hub ordered by the number of likes

In [None]:
!pip install huggingface_hub
import pandas as pd
import datetime
import csv
from huggingface_hub import HfApi

api = HfApi()
time = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
# documentation: https://huggingface.co/docs/hub/en/api, gets top 1000 models sorted by likes in descending order
models = api.list_models(sort="likes", direction="-1", limit=1000)

df = pd.DataFrame(models)
models_file = df.to_csv(f"{time}_model_list.csv", index="false")
print("Models saved to CSV")


# Script 7

This script allows you to iteratively run the dfs function for multiple models given as a CSV

In [None]:
# Multi-model runner for HuggingFace model analysis (sampled from top 10 models)
import requests
import pandas as pd
import datetime
import json
import csv
from huggingface_hub import HfApi
from bs4 import BeautifulSoup
from adapters import list_adapters
from huggingface_hub import hf_hub_download
from adapters import AutoAdapterModel
import re

# Multi-model runner for HuggingFace model analysis (sampled from top 10 models)
import requests
import pandas as pd
import datetime
import json
import csv
import os
import time
from huggingface_hub import HfApi
from bs4 import BeautifulSoup
import re


# Make sure these are imported or defined in the same file or via `import script3`
#from /content/script3.py import dfs_finetunes, save_json, save_csv  # Import from script3.py
# Assuming script3.py is in the same directory, and dfs_finetunes, save_json, and save_csv are defined there
from script3 import dfs_finetunes, save_json, save_csv


# Timestamp to share across all model saves
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

def process_models_from_csv(csv_path, output_dir="results", limit=10):
    """
    Processes top N models from a CSV by calling `dfs_finetunes` for each.
    Constructs model URLs from the 'id' column and saves results in JSON/CSV.
    """
    os.makedirs(output_dir, exist_ok=True)
    df = pd.read_csv(csv_path)

    if 'id' not in df.columns:
        raise ValueError("CSV must contain a column named 'id'.")

    df = df.head(limit)
    total_models = len(df)

    for idx, row in df.iterrows():
        model_id = row['id'].strip()
        model_url = f"https://huggingface.co/{model_id}"
        print(f"\n[{idx + 1}/{total_models}] Running analysis for: {model_url}")

        try:
            visited = set()
            results = dfs_finetunes(model_url, visited)

            if results:
                model_name = model_id.split("/")[-1]
                json_path = os.path.join(output_dir, f"{model_name}_finetunes_{timestamp}.json")
                csv_path = os.path.join(output_dir, f"{model_name}_finetunes_{timestamp}.csv")

                save_json(results, model_name)
                save_csv(results, model_name)

                print(f"Completed: {model_name}")
            else:
                print(f"No results for {model_id}")

            time.sleep(2)  # Rate limit buffer

        except Exception as e:
            print(f"Error with {model_id}: {e}")

    print(f"\nFinished processing {total_models} models. Saved to '{output_dir}'.")

if __name__ == "__main__":
    csv_input = input("Path to CSV with model IDs: ").strip()
    process_models_from_csv(csv_input)
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

def process_models_from_csv(csv_path, output_dir="results", limit=10):
    """
    Processes top N models from a CSV by calling `dfs_finetunes` for each.
    Constructs model URLs from the 'id' column and saves results in JSON/CSV.
    """
    os.makedirs(output_dir, exist_ok=True)
    df = pd.read_csv(csv_path)

    if 'id' not in df.columns:
        raise ValueError("CSV must contain a column named 'id'.")

    df = df.head(limit)
    total_models = len(df)

    for idx, row in df.iterrows():
        model_id = row['id'].strip()
        model_url = f"https://huggingface.co/{model_id}"
        print(f"\n[{idx + 1}/{total_models}] Running analysis for: {model_url}")

        try:
            visited = set()
            results = dfs_finetunes(model_url, visited)

            if results:
                model_name = model_id.split("/")[-1]
                json_path = os.path.join(output_dir, f"{model_name}_finetunes_{timestamp}.json")
                csv_path = os.path.join(output_dir, f"{model_name}_finetunes_{timestamp}.csv")

                save_json(results, model_name)
                save_csv(results, model_name)

                print(f"Completed: {model_name}")
            else:
                print(f"No results for {model_id}")

            time.sleep(2)  # Rate limit buffer

        except Exception as e:
            print(f"Error with {model_id}: {e}")

    print(f"\nFinished processing {total_models} models. Saved to '{output_dir}'.")

if __name__ == "__main__":
    csv_input = input("Path to CSV with model IDs: ").strip()
    process_models_from_csv(csv_input)

