In [2]:
import json
import requests
import pandas as pd
from typing import List, Dict, Any, Optional
import os
import csv
from urllib.parse import quote
import tempfile
from urllib.parse import urlparse
import time

In [None]:
# Pulling down all hardwar projects with git repositories from Hackaday.io
class hackClient:
    def __init__(self, api_key: str):
        self.base_url = "https://dev.hackaday.io/v2"
        self.api_key = api_key

    def get_hardware_projects(self, search_term: str):
        time.sleep(0.5)

        hardware_url = f"{self.base_url}/search?search_term={search_term}&limit=100&api_key={self.api_key}"
        try:
            response = requests.get(hardware_url)
            response.raise_for_status()
            return response.json()
        except requests.exceptions.RequestException as e:
            print(f"Error fetching metadata for {search_term}: {e}")
            return None

    def get_repo_links(self, project_id: str) -> List[str]:
        time.sleep(0.5)

        links_url = f"{self.base_url}/projects/{project_id}/links?api_key={self.api_key}"
        try:
            response = requests.get(links_url)
            response.raise_for_status()
            links_data = response.json()


            repo_links = [
                link.get("url") for link in links_data
                if any(domain in link.get("url", "") for domain in ["github.com", "gitlab.com"])
            ]
            return repo_links
        except requests.exceptions.RequestException as e:
            print(f"Error fetching links for project {project_id}: {e}")
            return []


def main():
    api_key = "API_KEY"
    client = hackClient(api_key)

    search_term = "hardware"
    hardware_projects = client.get_hardware_projects(search_term)

    if hardware_projects is not None:
        df = pd.DataFrame(hardware_projects.get("results", []))  
        github_links_list = []

        for index, row in df.iterrows():
            project_id = row.get('rid') 
            if project_id:
                github_links = client.get_repo_links(str(project_id))
                github_links_list.append(", ".join(github_links) if github_links else "")
            else:
                github_links_list.append("")

        df["github_links"] = github_links_list
        df = df[df["github_links"] != ""]  

        print("Hardware projects with GitHub links:")
        print(df[["title", "github_links"]].head())

        # Save to CSV
        temp_dir = tempfile.gettempdir()
        csv_path = os.path.join(temp_dir, "hardware_projects_git_links.csv")
        df.to_csv(csv_path, index=False)
        print(f"\nSaved to: {csv_path}")
    else:
        print("Failed to retrieve hardware projects.")


if __name__ == "__main__":
    main()

Hardware projects with GitHub links:
                                title  \
3                Polymorphic Hardware   
5        VirtualXT Hardware Validator   
6    Turnado Hardware MIDI Controller   
8  Simplify Embedded hardware designs   
9     Hardware 2FA TOTP authenticator   

                                        github_links  
3  https://github.com/PolymorphicLabs/PMLDot, htt...  
5  https://github.com/andreas-jonsson/virtualxt/t...  
6  https://github.com/liamlacey/Turnado-Hardware-...  
8  https://github.com/makarandkapoor/Circuit_Tree...  
9              https://github.com/itzandroidtab/totp  

Saved to: /var/folders/zk/g1hxt5814gvfmcc5h9n90jgc0000gn/T/hardware_projects_git_links.csv


In [None]:
# Pulling down all of the hardware projects and implementing pagination
class hackClient:
    def __init__(self, api_key: str):
        self.base_url = "https://dev.hackaday.io/v2"
        self.api_key = api_key

    def get_hardware_projects(self, search_term: str, limit: int = 100) -> List[Dict]:
        all_projects = []
        offset = 0

        while True:
            time.sleep(2)
            hardware_url = (
                f"{self.base_url}/search?search_term={search_term}"
                f"&limit={limit}&offset={offset}&api_key={self.api_key}"
            )

            try:
                response = requests.get(hardware_url)
                response.raise_for_status()
                data = response.json()

                # Prefer "projects", fall back to "results"
                projects = data.get("results", [])

                if not projects:
                    break  # No more projects, exit loop

                all_projects.extend(projects)
                offset += limit  # Move to next page
            except requests.exceptions.RequestException as e:
                print(f"Fetching projects with offset: {offset}")
                print(f"Error fetching page at offset {offset}: {e}")

                break

        return all_projects

    def get_repo_links(self, project_id: str) -> List[str]:
        time.sleep(2)  # Increase sleep to be API-friendly
        links_url = f"{self.base_url}/projects/{project_id}/links?api_key={self.api_key}"

        try:
            response = requests.get(links_url)
            response.raise_for_status()
            links_data = response.json()

        # Filter for GitHub and GitLab links
            repo_links = [
                link.get("url") for link in links_data
                if any(domain in link.get("url", "") for domain in ["github.com", "gitlab.com"])
            ]
            return repo_links

        except requests.exceptions.HTTPError as e:
            if response.status_code == 429:
                print(f"Rate limit hit for project {project_id}. Sleeping for 10 seconds...")
                time.sleep(10)
                # Retry once after waiting
                return self.get_repo_links(project_id)
            else:
                print(f"HTTP error fetching links for project {project_id}: {e}")
                return []

        except requests.exceptions.RequestException as e:
            print(f"General request error fetching links for project {project_id}: {e}")
            return []


def main():
    api_key = "ce9f1cdaa76cd5b5"
    client = hackClient(api_key)

    search_term = "hardware"
    hardware_projects = client.get_hardware_projects(search_term)

    if hardware_projects:
        # Filter to only dict entries to avoid DataFrame issues
        df = pd.DataFrame([proj for proj in hardware_projects if isinstance(proj, dict)])
        print("Columns in DataFrame:", df.columns.tolist())

        github_links_list = []
        for index, row in df.iterrows():
            project_id = row.get('rid') or row.get('id')
            if project_id:
                repo_links = client.get_repo_links(str(project_id))
                github_links_list.append(", ".join(repo_links) if repo_links else "")
            else:
                github_links_list.append("")

        df["github_links"] = github_links_list
        df = df[df["github_links"] != ""]

        print("Hardware projects with GitHub or GitLab links:")
        name_col = "title" if "title" in df.columns else "name"
        print(df[[name_col, "github_links"]].head())

        temp_dir = tempfile.gettempdir()
        csv_path = os.path.join(temp_dir, "hardware_projects_git_links.csv")
        df.to_csv(csv_path, index=False)
        print(f"\nSaved to: {csv_path}")
    else:
        print("Failed to retrieve hardware projects.")


if __name__ == "__main__":
    main()

Fetching projects with offset: 0
Error fetching page at offset 0: 429 Client Error: Too Many Requests for url: https://dev.hackaday.io/v2/search?search_term=hardware&limit=100&offset=0&api_key=ce9f1cdaa76cd5b5
Failed to retrieve hardware projects.
