**Query:** What are the most popular Python repositories that have been created in the last year?

In [1]:
import requests

# Define headers.
headers = {
    "Accept": "application/vnd.github+json",
    "X-GitHub-Api-Version" : "2022-11-28",
    }

ModuleNotFoundError: No module named 'requests'

In [None]:
def run_query(url):
    """Run a query, and return list of repo dicts."""
    print(f"Query URL: {url}")
    r = requests.get(url, headers=headers)
    print(f"Status code: {r.status_code}")

    # Convert the response object to a dictionary.
    response_dict = r.json()

    # Show basic information about the query results.
    print(f"Total repositories: {response_dict['total_count']}")
    
    complete_results = not response_dict['incomplete_results']
    print(f"Complete results: {complete_results}")
    
    # Pull the dictionaries for each repository returned.
    repo_dicts = response_dict['items']
    print(f"Repositories returned: {len(repo_dicts)}")
    
    return repo_dicts

In [None]:
def summarize_repos(repos):
    """Summarize a set of repositories."""
    for repo in repos:
        name = repo['name']
        stars = repo['stargazers_count']
        owner = repo['owner']['login']
        description = repo['description']
        link = repo['html_url']
        
        print(f"\nRepository: {name} ({stars})")
        print(f"  Owner: {owner}")
        print(f"  Description: {description}")
        print(f"  Repository: {link}")

In [None]:
url = "https://api.github.com/search/repositories"
url += "?q=language:python+stars:>1000"
url += "+created:2022-06-01..2023-06-01"
url += "&sort=stars&order=desc"

repo_dicts = run_query(url)
summarize_repos(repo_dicts)

**Query:** What are the most popular Python repositories that have been created in the last year, that aren't focused on AI?

In [None]:
url = "https://api.github.com/search/repositories"
url += "?q=language:python+stars:>1000"
url += "+NOT+gpt+NOT+llama+NOT+chat+NOT+llm+NOT+diffusion"
url += "+created:2022-06-01..2023-06-01"
url += "&sort=stars&order=desc"

repo_dicts = run_query(url)
summarize_repos(repo_dicts)

Filter even more AI-related posts.

In [None]:
def prune_repos(repos):
    """Return only non AI-related repos."""
    ai_terms = [
        'gpt', 'llama', 'chat', 'llm', 'diffusion', 'alpaca',
        ' ai', 'ai ', 'ai-', '-ai', 'openai', 'whisper',
        'rlhf', 'language model', 'langchain', 'transformer', 'gpu',
        'copilot', 'deep', 'embedding', 'model', 'pytorch',
    ]
    
    non_ai_repos = []
    for repo in repos:
        # Check for ai terms in name, owner, and description.
        name = repo['name'].lower()
        if any(ai_term in name for ai_term in ai_terms):
            continue
            
        owner = repo['owner']['login'].lower()
        if any(ai_term in owner for ai_term in ai_terms):
            continue
        
        #  Prune repos that don't have a description.
        if not repo['description']:
            continue

        description = repo['description'].lower()
        if any(ai_term in description for ai_term in ai_terms):
            continue
        
        non_ai_repos.append(repo)
    
    print(f"Keeping {len(non_ai_repos)} of {len(repos)} repos.")
    return non_ai_repos

In [None]:
pruned_repos = prune_repos(repo_dicts)
summarize_repos(pruned_repos)

Start with 100 repo dicts before pruning.

In [None]:
url = "https://api.github.com/search/repositories"
url += "?q=language:python+stars:>1000"
url += "+NOT+gpt+NOT+llama+NOT+chat+NOT+llm+NOT+diffusion"
url += "+created:2022-06-01..2023-06-01"
url += "&sort=stars&order=desc"
url += "&per_page=100&page=1"

repo_dicts = run_query(url)
pruned_repos = prune_repos(repo_dicts)
summarize_repos(pruned_repos)