In [1]:
# GitHub Lists Configuration
# This file contains the dictionary of GitHub starred lists to be used as tags

GITHUB_LISTS = {
    "stack": {
        "url": "https://github.com/stars/Veatec22/lists/stack",
        "description": "Core development stack and essential tools"
    },
    "nice-to-have": {
        "url": "https://github.com/stars/Veatec22/lists/nice-to-have", 
        "description": "Useful tools and libraries for future consideration"
    },
    "future-ideas": {
        "url": "https://github.com/stars/Veatec22/lists/future-ideas",
        "description": "Innovative projects and experimental technologies"
    },
    "ignore": {
        "url": "https://github.com/stars/Veatec22/lists/ignore",
        "description": "Repositories to ignore"
    }
    
}

# List of tag names for easy iteration
TAG_NAMES = list(GITHUB_LISTS.keys())

# Default sheet tab name for the unified starred data
STARRED_SHEET_TAB = "starred"

In [2]:
#!/usr/bin/env python3
"""
Test script to verify MotherDuck connection
"""

import os
import duckdb
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

MOTHERDUCK_TOKEN = os.getenv('MOTHERDUCK_TOKEN')
MOTHERDUCK_DB = os.getenv('MOTHERDUCK_DB', 'github')

def test_motherduck_connection():
    """Test the MotherDuck connection"""
    print("🧪 Testing MotherDuck connection...")
    
    try:
        # Try to connect to MotherDuck
        if MOTHERDUCK_TOKEN:
            connection_string = f'md:{MOTHERDUCK_DB}?motherduck_token={MOTHERDUCK_TOKEN}'
            print(f"🔐 Using token authentication for database: {MOTHERDUCK_DB}")
        else:
            connection_string = f'md:{MOTHERDUCK_DB}'
            print(f"🌐 Using browser authentication for database: {MOTHERDUCK_DB}")
        
        conn = duckdb.connect(connection_string)
        print("✅ Successfully connected to MotherDuck!")
        
        # Test basic functionality
        result = conn.execute("SELECT 1 as test_value").fetchone()
        print(f"✅ Basic query test: {result}")
        
        # Show databases
        print("\n📊 Available databases:")
        databases = conn.execute("SHOW DATABASES").fetchall()
        for db in databases:
            print(f"  - {db[0]}")
        
        # Check if our tables exist
        print(f"\n📋 Tables in {MOTHERDUCK_DB} database:")
        try:
            tables = conn.execute("SHOW TABLES").fetchall()
            if tables:
                for table in tables:
                    print(f"  - {table[0]}")
            else:
                print("  No tables found - run the sync scripts to populate data")
        except Exception as e:
            print(f"  Could not list tables: {e}")
        
        conn.close()
        print("\n🎉 MotherDuck connection test completed successfully!")
        return True
        
    except Exception as e:
        print(f"❌ Error testing MotherDuck connection: {e}")
        return False

if __name__ == '__main__':
    success = test_motherduck_connection()
    if not success:
        exit(1)

🧪 Testing MotherDuck connection...
🌐 Using browser authentication for database: github
✅ Successfully connected to MotherDuck!
✅ Basic query test: (1,)

📊 Available databases:
  - github
  - md_information_schema

📋 Tables in github database:
  No tables found - run the sync scripts to populate data

🎉 MotherDuck connection test completed successfully!


In [3]:
#!/usr/bin/env python3
"""
Unified Starred GitHub Repositories Fetcher
Fetches starred repositories from GitHub API and merges with curated list tags
Creates a comprehensive portfolio view combining detailed repo data with tag organization
Now uses MotherDuck (cloud DuckDB) for data storage
"""

import os
import sys
import time
import duckdb
from datetime import datetime
from collections import defaultdict
import requests
from bs4 import BeautifulSoup
import pandas as pd
from dotenv import load_dotenv

# === CONFIGURATION ===
load_dotenv()
GHUB_TOKEN = os.getenv('GHUB_TOKEN')
MOTHERDUCK_TOKEN = os.getenv('MOTHERDUCK_TOKEN')
MOTHERDUCK_DB = os.getenv('MOTHERDUCK_DB', 'github')  # Default database name

# GitHub API endpoints
API_STARRED_URL = 'https://api.github.com/user/starred'
API_RELEASES_URL = 'https://api.github.com/repos/{owner}/{repo}/releases/latest'
API_TOPICS_URL = 'https://api.github.com/repos/{owner}/{repo}/topics'

auth_headers = {
    'Authorization': f'token {GHUB_TOKEN}',
    'Accept': 'application/vnd.github.v3+json'
}

topics_headers = {
    'Authorization': f'token {GHUB_TOKEN}',
    'Accept': 'application/vnd.github.mercy-preview+json'  # Needed to access topics
}

def get_motherduck_connection():
    """Get connection to MotherDuck"""
    try:
        if MOTHERDUCK_TOKEN:
            connection_string = f'md:{MOTHERDUCK_DB}?motherduck_token={MOTHERDUCK_TOKEN}'
        else:
            # Use browser-based authentication
            connection_string = f'md:{MOTHERDUCK_DB}'
        
        conn = duckdb.connect(connection_string)
        print(f"✅ Connected to MotherDuck database: {MOTHERDUCK_DB}")
        return conn
    except Exception as e:
        print(f"❌ Error connecting to MotherDuck: {e}")
        raise

def get_starred_repos():
    """Fetch all starred repositories from GitHub API"""
    print("🔍 Fetching starred repositories...")
    starred = []
    page = 1

    while True:
        response = requests.get(
            API_STARRED_URL, 
            headers=auth_headers, 
            params={'per_page': 100, 'page': page}
        )
        
        if response.status_code != 200:
            print(f"Error fetching starred repos: {response.status_code} - {response.text}")
            break

        data = response.json()
        if not data:
            break

        starred.extend(data)
        page += 1
        print(f"📦 Fetched page {page-1} ({len(data)} repos)")

    print(f"✅ Total starred repositories: {len(starred)}")
    return starred

def get_last_release_date(owner, repo):
    """Get the last release date for a repository"""
    url = API_RELEASES_URL.format(owner=owner, repo=repo)
    response = requests.get(url, headers=auth_headers)

    if response.status_code == 200:
        return response.json().get("published_at")
    elif response.status_code == 404:
        return "No releases"
    else:
        return f"Error: {response.status_code}"

def get_repo_topics(owner, repo):
    """Get topics for a repository"""
    url = API_TOPICS_URL.format(owner=owner, repo=repo)
    response = requests.get(url, headers=topics_headers)

    if response.status_code == 200:
        return response.json().get('names', [])
    else:
        return []

def scrape_github_list(list_url, tag_name):
    """Scrape a single GitHub list and return repository names with tag"""
    print(f"🔍 Scraping list '{tag_name}': {list_url}")
    
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }

    try:
        response = requests.get(list_url, headers=headers)
        if response.status_code != 200:
            print(f"❌ Failed to load page for {tag_name}: {response.status_code}")
            return []

        soup = BeautifulSoup(response.text, 'html.parser')
        repo_blocks = soup.select('div#user-list-repositories > div.border-bottom')

        repo_names = []
        for block in repo_blocks:
            name_tag = block.select_one('h3 a')
            if not name_tag:
                continue
                
            full_name = name_tag['href'].strip('/')
            repo_names.append(full_name)

        print(f"✅ Found {len(repo_names)} repositories in '{tag_name}' list")
        return repo_names
        
    except Exception as e:
        print(f"❌ Error scraping {tag_name}: {str(e)}")
        return []

def get_curated_tags():
    """Fetch all curated lists and create a mapping of repo names to tags"""
    print("🏷️ Fetching curated list tags...")
    repo_tags = defaultdict(set)
    ignore_repos = set()
    
    for tag_name in TAG_NAMES:
        list_config = GITHUB_LISTS[tag_name]
        list_url = list_config['url']
        
        repo_names = scrape_github_list(list_url, tag_name)
        
        if tag_name == 'ignore':
            # Special handling for ignore list
            ignore_repos.update(repo_names)
            print(f"🚫 Added {len(repo_names)} repositories to ignore list")
        else:
            for repo_name in repo_names:
                repo_tags[repo_name].add(tag_name)
        
        # Be nice to GitHub
        time.sleep(1)
    
    print(f"✅ Collected tags for {len(repo_tags)} repositories")
    print(f"🚫 Ignoring {len(ignore_repos)} repositories")
    return repo_tags, ignore_repos

def process_repositories(repos, repo_tags, ignore_repos):
    """Process repositories and gather additional data, merging with curated tags"""
    print("🔄 Processing repositories and gathering additional data...")
    data = []

    for i, repo in enumerate(repos):
        full_name = repo['full_name']
        
        # Skip ignored repositories
        if full_name in ignore_repos:
            print(f"🚫 Skipping ignored repository: {full_name}")
            continue
            
        owner, repo_name = full_name.split('/')
        
        print(f"📊 Processing {full_name} ({i+1}/{len(repos)})")
        
        # Get additional data
        last_release = get_last_release_date(owner, repo_name)
        topics = get_repo_topics(owner, repo_name)
        
        # Get curated tags for this repo
        curated_tags = list(repo_tags.get(full_name, set()))
        
        # Combine GitHub topics and curated tags
        all_tags = topics + curated_tags
        
        data.append({
            'name': full_name,
            'description': repo.get('description', ''),
            'stars': repo['stargazers_count'],
            'forks': repo['forks_count'],
            'language': repo.get('language', 'Unknown'),
            'url': repo['html_url'],
            'last_release': last_release,
            'topics': ", ".join(topics),
            'curated_tags': ", ".join(sorted(curated_tags)),
            'all_tags': ", ".join(sorted(all_tags)),
            'tags_count': len(all_tags),
            'is_curated': len(curated_tags) > 0,
            'created_at': repo['created_at'],
            'updated_at': repo['updated_at'],
            'pushed_at': repo.get('pushed_at', ''),
            'open_issues': repo.get('open_issues_count', 0),
            'archived': repo.get('archived', False),
            'fork': repo.get('fork', False),
            'fetched_at': datetime.now().isoformat()
        })

        time.sleep(0.1)

    print(f"✅ Processed {len(data)} repositories (filtered out {len([r for r in repos if r['full_name'] in ignore_repos])} ignored)")
    return pd.DataFrame(data)

def upload_to_motherduck(df, table_name="starred"):
    """Upload DataFrame to MotherDuck"""
    print(f"📤 Uploading to MotherDuck: {table_name}")
    
    try:
        conn = get_motherduck_connection()
        
        # Create table if it doesn't exist and insert data
        conn.execute(f"DROP TABLE IF EXISTS {table_name}")
        conn.execute(f"CREATE TABLE {table_name} AS SELECT * FROM df")
        
        # Verify upload
        result = conn.execute(f"SELECT COUNT(*) FROM {table_name}").fetchone()
        row_count = result[0]
        
        print(f"✅ Uploaded {row_count} rows to MotherDuck table: {table_name}")
        
        conn.close()
        return True
        
    except Exception as e:
        print(f"❌ Error uploading to MotherDuck: {type(e).__name__}: {e}")
        raise

def main():
    """Main execution function"""
    print("🚀 Starting unified starred repositories sync with MotherDuck...")
    print(f"⏰ Started at: {datetime.now().isoformat()}")
    
    try:
        # Fetch starred repositories
        repos = get_starred_repos()
        
        if not repos:
            print("⚠️ No starred repositories found")
            return
        
        # Get curated tags from lists and ignore list
        repo_tags, ignore_repos = get_curated_tags()
        
        # Process repositories with merged data
        df = process_repositories(repos, repo_tags, ignore_repos)
        
        # Upload to MotherDuck
        upload_to_motherduck(df)
        
        # Print summary
        curated_count = len(df[df['is_curated'] == True])
        languages_count = len(df['language'].unique())
        
        print(f"\n📈 Portfolio Summary:")
        print(f"   • Total starred repositories: {len(df)}")
        print(f"   • Curated repositories: {curated_count}")
        print(f"   • Programming languages: {languages_count}")
        print(f"   • Total stars accumulated: {df['stars'].sum():,}")
        print(f"   • Ignored repositories: {len(ignore_repos)}")
        
        # Show curated tag distribution
        if curated_count > 0:
            print(f"   • Curated tag distribution:")
            for tag in [t for t in TAG_NAMES if t != 'ignore']:
                tag_count = len(df[df['curated_tags'].str.contains(tag, na=False)])
                if tag_count > 0:
                    print(f"     - {tag}: {tag_count} repos")
        
        print(f"\n🎉 Successfully synced unified starred repositories to MotherDuck!")
        
    except Exception as e:
        print(f"❌ Error: {str(e)}")
        sys.exit(1)
    
    print(f"✅ Completed at: {datetime.now().isoformat()}")

if __name__ == '__main__':
    main()

🚀 Starting unified starred repositories sync with MotherDuck...
⏰ Started at: 2025-07-04T13:49:48.864583
🔍 Fetching starred repositories...
📦 Fetched page 1 (63 repos)
✅ Total starred repositories: 63
🏷️ Fetching curated list tags...
🔍 Scraping list 'stack': https://github.com/stars/Veatec22/lists/stack
✅ Found 22 repositories in 'stack' list
🔍 Scraping list 'nice-to-have': https://github.com/stars/Veatec22/lists/nice-to-have
✅ Found 4 repositories in 'nice-to-have' list
🔍 Scraping list 'future-ideas': https://github.com/stars/Veatec22/lists/future-ideas
✅ Found 27 repositories in 'future-ideas' list
🔍 Scraping list 'ignore': https://github.com/stars/Veatec22/lists/ignore
✅ Found 3 repositories in 'ignore' list
🚫 Added 3 repositories to ignore list
✅ Collected tags for 53 repositories
🚫 Ignoring 3 repositories
🔄 Processing repositories and gathering additional data...
📊 Processing fastai/fastai (1/63)
🚫 Skipping ignored repository: sherlock-project/sherlock
🚫 Skipping ignored repositor

In [6]:
'''
GitHub Repository Recommender
Recommends GitHub repositories based on user's starred repositories topics.
Optimized for speed and relevance.
Now uses MotherDuck (cloud DuckDB) for data storage.
'''

import os
import sys
import time
import duckdb
import requests
import pandas as pd
from dotenv import load_dotenv
from collections import Counter
from tqdm import tqdm

# Import our lists configuration for ignore list
# === CONFIGURATION ===
load_dotenv()
GHUB_TOKEN = os.getenv('GHUB_TOKEN')
MOTHERDUCK_TOKEN = os.getenv('MOTHERDUCK_TOKEN')
MOTHERDUCK_DB = os.getenv('MOTHERDUCK_DB', 'github')  # Default database name

auth_headers = {
    'Authorization': f'token {GHUB_TOKEN}',
    'Accept': 'application/vnd.github.v3+json'
}

def get_motherduck_connection():
    """Get connection to MotherDuck"""
    try:
        if MOTHERDUCK_TOKEN:
            connection_string = f'md:{MOTHERDUCK_DB}?motherduck_token={MOTHERDUCK_TOKEN}'
        else:
            # Use browser-based authentication
            connection_string = f'md:{MOTHERDUCK_DB}'
        
        conn = duckdb.connect(connection_string)
        print(f"✅ Connected to MotherDuck database: {MOTHERDUCK_DB}")
        return conn
    except Exception as e:
        print(f"❌ Error connecting to MotherDuck: {e}")
        raise

def get_starred_repos_from_motherduck():
    """Read starred repositories from MotherDuck"""
    print(f"📊 Reading starred repositories from MotherDuck")
    
    try:
        conn = get_motherduck_connection()
        
        # Get starred repositories data
        query = "SELECT * FROM starred"
        df = conn.execute(query).df()
        
        conn.close()
        
        print(f"✅ Read {len(df)} rows from starred table")
        return df
        
    except Exception as e:
        print(f"❌ Error reading from MotherDuck: {type(e).__name__}: {e}")
        raise

def get_ignore_repos():
    """Get the list of repositories to ignore from the ignore list"""
    print("🚫 Getting ignore list...")
    
    try:
        # Import the scraper function from starred_fetcher
        
        ignore_config = GITHUB_LISTS.get('ignore')
        if not ignore_config:
            print("⚠️ No ignore list found in configuration")
            return set()
        
        ignore_repos = scrape_github_list(ignore_config['url'], 'ignore')
        ignore_set = set(ignore_repos)
        
        print(f"🚫 Found {len(ignore_set)} repositories to ignore")
        return ignore_set
        
    except Exception as e:
        print(f"❌ Error getting ignore list: {e}")
        return set()

def extract_topic_frequencies(df_starred):
    """Extract and count topic frequencies from starred repositories"""
    print("🔍 Analyzing topics from starred repositories...")
    
    all_topics = []
    starred_repo_full_names = set()
    
    for _, row in df_starred.iterrows():
        # Collect starred repo full names for filtering (owner/repo format)
        if pd.notna(row.get('name')):
            starred_repo_full_names.add(row['name'])
        
        # Extract topics
        topics_str = row.get('topics', '')
        if pd.notna(topics_str) and topics_str.strip():
            topics = [t.strip().lower() for t in topics_str.split(',') if t.strip()]
            all_topics.extend(topics)
    
    # Count topic frequencies
    topic_counter = Counter(all_topics)
    
    print(f"📈 Found {len(topic_counter)} unique topics from {len(df_starred)} starred repos")
    print(f"🔍 Tracking {len(starred_repo_full_names)} starred repositories for filtering")
    for topic, count in topic_counter.most_common(10):
        print(f"  • {topic}: {count} repos")
    
    return topic_counter, starred_repo_full_names

def search_repositories_by_topic(topic, min_stars=1000, max_results=50):
    """Search GitHub repositories by a specific topic"""
    print(f"🔎 Searching for topic: '{topic}' (min {min_stars} stars)")
    
    repositories = []
    per_page = 100
    max_pages = (max_results // per_page) + 1
    
    for page in range(1, max_pages + 1):
        query = f"topic:{topic} stars:>={min_stars}"
        params = {
            'q': query,
            'sort': 'stars',
            'order': 'desc',
            'per_page': per_page,
            'page': page
        }
        
        try:
            response = requests.get('https://api.github.com/search/repositories', 
                                  headers=auth_headers, params=params)
            
            if response.status_code != 200:
                print(f"⚠️ API error for topic '{topic}': {response.status_code}")
                break
            
            data = response.json()
            items = data.get('items', [])
            
            if not items:
                break
                
            repositories.extend(items)
            print(f"  📦 Page {page}: {len(items)} repos (total: {len(repositories)})")
            
            # Stop if we have enough results
            if len(repositories) >= max_results:
                repositories = repositories[:max_results]
                break
                
            # Rate limiting
            time.sleep(0.1)
            
        except Exception as e:
            print(f"❌ Error searching for topic '{topic}': {e}")
            break
    
    print(f"✅ Found {len(repositories)} repositories for topic '{topic}'")
    return repositories

def get_recommendations(topic_counter, starred_repo_full_names, ignore_repos, min_stars=1000, max_per_topic=50):
    """Get repository recommendations based on topic frequencies"""
    print("🎯 Generating recommendations based on topic analysis...")
    
    all_recommendations = {}
    filtered_count = 0
    ignored_count = 0
    
    # Process topics in order of frequency (most common first)
    for topic, frequency in tqdm(topic_counter.most_common(), desc="Processing topics"):
        # print(f"\n--- Processing topic: '{topic}' (appears in {frequency} starred repos) ---")
        
        repos = search_repositories_by_topic(topic, min_stars, max_per_topic)
        
        for repo in repos:
            repo_id = repo['id']
            repo_full_name = repo['full_name']  # This is "owner/repo" format
            
            # Skip if already starred (check against full_name)
            if repo_full_name in starred_repo_full_names:
                filtered_count += 1
                print(f"  🔄 Skipping already starred: {repo_full_name}")
                continue
            
            # Skip if in ignore list
            if repo_full_name in ignore_repos:
                ignored_count += 1
                print(f"  🚫 Skipping ignored repository: {repo_full_name}")
                continue
            
            # If we haven't seen this repo before, add it
            if repo_id not in all_recommendations:
                all_recommendations[repo_id] = {
                    'repo_data': repo,
                    'topic_matches': [],
                    'total_frequency': 0
                }
            
            # Add this topic match
            all_recommendations[repo_id]['topic_matches'].append(topic)
            all_recommendations[repo_id]['total_frequency'] += frequency
    
    print(f"\n✅ Found {len(all_recommendations)} unique recommendations")
    print(f"🔄 Filtered out {filtered_count} already-starred repositories")
    print(f"🚫 Filtered out {ignored_count} ignored repositories")
    return all_recommendations

def format_recommendations(recommendations_dict):
    """Format recommendations into a DataFrame sorted by relevance and stars"""
    print("📊 Formatting and ranking recommendations...")
    
    formatted_recommendations = []
    
    for repo_id, data in recommendations_dict.items():
        repo = data['repo_data']
        topic_matches = data['topic_matches']
        total_frequency = data['total_frequency']
        
        formatted_recommendations.append({
            'name': repo['full_name'],
            'description': repo.get('description', ''),
            'stars': repo['stargazers_count'],
            'forks': repo['forks_count'],
            'language': repo.get('language', 'Unknown'),
            'url': repo['html_url'],
            'topics': ', '.join(repo.get('topics', [])),
            'matched_topics': ', '.join(topic_matches),
            'topic_frequency_score': total_frequency,
            'num_topic_matches': len(topic_matches)
        })
    
    df = pd.DataFrame(formatted_recommendations)
    
    # Sort by topic frequency score (descending) and then by stars (descending)
    df = df.sort_values(['topic_frequency_score', 'stars'], ascending=[False, False])
    
    print(f"🏆 Top 10 recommendations:")
    for i, row in df.head(10).iterrows():
        print(f"  {i+1}. {row['name']} ({row['stars']:,} ⭐) - Score: {row['topic_frequency_score']}")
        print(f"     Matched topics: {row['matched_topics']}")
    
    return df

def upload_recommendations_to_motherduck(df, table_name='recommendations'):
    """Upload DataFrame to MotherDuck"""
    print(f"📤 Uploading recommendations to MotherDuck: {table_name}")
    
    try:
        conn = get_motherduck_connection()
        
        # Create table if it doesn't exist and insert data
        conn.execute(f"DROP TABLE IF EXISTS {table_name}")
        conn.execute(f"CREATE TABLE {table_name} AS SELECT * FROM df")
        
        # Verify upload
        result = conn.execute(f"SELECT COUNT(*) FROM {table_name}").fetchone()
        row_count = result[0]
        
        print(f"✅ Uploaded {row_count} rows to MotherDuck table: {table_name}")
        
        conn.close()
        return True
        
    except Exception as e:
        print(f"❌ Error uploading to MotherDuck: {type(e).__name__}: {e}")
        raise

def main():
    """Main execution function"""
    print("🚀 Starting optimized repository recommendation sync with MotherDuck...")
    
    try:
        # Read starred repositories from MotherDuck
        df_starred = get_starred_repos_from_motherduck()
        
        if df_starred.empty:
            print("⚠️ No starred repositories found in MotherDuck. Cannot generate recommendations.")
            return
        
        # Get ignore list
        ignore_repos = get_ignore_repos()
        
        # Extract topic frequencies and starred repo names
        topic_counter, starred_repo_full_names = extract_topic_frequencies(df_starred)
        
        if not topic_counter:
            print("⚠️ No topics found in starred repositories. Cannot generate recommendations.")
            return
        
        # Get recommendations based on topics
        recommendations_dict = get_recommendations(topic_counter, starred_repo_full_names, ignore_repos)
        
        if not recommendations_dict:
            print("⚠️ No recommendations found.")
            return
        
        # Format and sort recommendations
        df_recommendations = format_recommendations(recommendations_dict)
        
        # Upload recommendations to MotherDuck
        upload_recommendations_to_motherduck(df_recommendations)
        
        print("🎉 Successfully generated and uploaded repository recommendations!")
        print(f"📈 Generated {len(df_recommendations)} recommendations based on {len(topic_counter)} topics")
        print(f"🚫 Ignored {len(ignore_repos)} repositories from ignore list")
        
    except Exception as e:
        print(f"❌ Error: {str(e)}")
        sys.exit(1)

if __name__ == '__main__':
    main() 

🚀 Starting optimized repository recommendation sync with MotherDuck...
📊 Reading starred repositories from MotherDuck
✅ Connected to MotherDuck database: github
✅ Read 60 rows from starred table
🚫 Getting ignore list...
🔍 Scraping list 'ignore': https://github.com/stars/Veatec22/lists/ignore
✅ Found 3 repositories in 'ignore' list
🚫 Found 3 repositories to ignore
🔍 Analyzing topics from starred repositories...
📈 Found 391 unique topics from 60 starred repos
🔍 Tracking 60 starred repositories for filtering
  • python: 35 repos
  • machine-learning: 25 repos
  • data-science: 20 repos
  • deep-learning: 14 repos
  • data-analysis: 12 repos
  • hacktoberfest: 11 repos
  • data-visualization: 9 repos
  • llm: 7 repos
  • ai: 7 repos
  • analytics: 7 repos
🎯 Generating recommendations based on topic analysis...


Processing topics:   0%|          | 0/391 [00:00<?, ?it/s]

🔎 Searching for topic: 'python' (min 1000 stars)


Processing topics:   0%|          | 1/391 [00:03<22:49,  3.51s/it]

  📦 Page 1: 100 repos (total: 100)
✅ Found 50 repositories for topic 'python'
  🔄 Skipping already starred: tensorflow/tensorflow
  🔄 Skipping already starred: pytorch/pytorch
  🚫 Skipping ignored repository: home-assistant/core
  🔄 Skipping already starred: microsoft/ML-For-Beginners
  🔄 Skipping already starred: apache/superset
  🚫 Skipping ignored repository: sherlock-project/sherlock
  🔄 Skipping already starred: keras-team/keras
  🔄 Skipping already starred: scikit-learn/scikit-learn
  🔄 Skipping already starred: scrapy/scrapy
  🔄 Skipping already starred: Avik-Jain/100-Days-Of-ML-Code
  🔄 Skipping already starred: pandas-dev/pandas
  🔄 Skipping already starred: aymericdamien/TensorFlow-Examples
🔎 Searching for topic: 'machine-learning' (min 1000 stars)


Processing topics:   1%|          | 2/391 [00:06<20:47,  3.21s/it]

  📦 Page 1: 100 repos (total: 100)
✅ Found 50 repositories for topic 'machine-learning'
  🔄 Skipping already starred: tensorflow/tensorflow
  🔄 Skipping already starred: pytorch/pytorch
  🔄 Skipping already starred: microsoft/ML-For-Beginners
  🔄 Skipping already starred: keras-team/keras
  🔄 Skipping already starred: scikit-learn/scikit-learn
  🔄 Skipping already starred: mlabonne/llm-course
  🔄 Skipping already starred: Avik-Jain/100-Days-Of-ML-Code
  🔄 Skipping already starred: aymericdamien/TensorFlow-Examples
  🔄 Skipping already starred: streamlit/streamlit
  🔄 Skipping already starred: gradio-app/gradio
  🔄 Skipping already starred: ray-project/ray
  🔄 Skipping already starred: AMAI-GmbH/AI-Expert-Roadmap
  🔄 Skipping already starred: fastai/fastai
🔎 Searching for topic: 'data-science' (min 1000 stars)


Processing topics:   1%|          | 3/391 [00:10<22:14,  3.44s/it]

  📦 Page 1: 100 repos (total: 100)
✅ Found 50 repositories for topic 'data-science'
  🔄 Skipping already starred: microsoft/ML-For-Beginners
  🔄 Skipping already starred: apache/superset
  🔄 Skipping already starred: keras-team/keras
  🔄 Skipping already starred: scikit-learn/scikit-learn
  🔄 Skipping already starred: pandas-dev/pandas
  🔄 Skipping already starred: streamlit/streamlit
  🔄 Skipping already starred: gradio-app/gradio
  🔄 Skipping already starred: ray-project/ray
  🔄 Skipping already starred: AMAI-GmbH/AI-Expert-Roadmap
  🔄 Skipping already starred: ml-tooling/best-of-ml-python
  🔄 Skipping already starred: sinaptik-ai/pandas-ai
  🔄 Skipping already starred: ydataai/ydata-profiling
🔎 Searching for topic: 'deep-learning' (min 1000 stars)


Processing topics:   1%|          | 4/391 [00:12<19:56,  3.09s/it]

  📦 Page 1: 100 repos (total: 100)
✅ Found 50 repositories for topic 'deep-learning'
  🔄 Skipping already starred: tensorflow/tensorflow
  🔄 Skipping already starred: pytorch/pytorch
  🔄 Skipping already starred: keras-team/keras
  🔄 Skipping already starred: Avik-Jain/100-Days-Of-ML-Code
  🔄 Skipping already starred: aymericdamien/TensorFlow-Examples
  🔄 Skipping already starred: streamlit/streamlit
  🔄 Skipping already starred: gradio-app/gradio
  🔄 Skipping already starred: ray-project/ray
  🔄 Skipping already starred: AMAI-GmbH/AI-Expert-Roadmap
  🔄 Skipping already starred: fastai/fastai
🔎 Searching for topic: 'data-analysis' (min 1000 stars)


Processing topics:   1%|▏         | 5/391 [00:16<20:47,  3.23s/it]

  📦 Page 1: 100 repos (total: 100)
✅ Found 50 repositories for topic 'data-analysis'
  🔄 Skipping already starred: apache/superset
  🔄 Skipping already starred: scikit-learn/scikit-learn
  🔄 Skipping already starred: pandas-dev/pandas
  🔄 Skipping already starred: metabase/metabase
  🔄 Skipping already starred: streamlit/streamlit
  🔄 Skipping already starred: gradio-app/gradio
  🔄 Skipping already starred: AMAI-GmbH/AI-Expert-Roadmap
  🔄 Skipping already starred: ml-tooling/best-of-ml-python
  🔄 Skipping already starred: sinaptik-ai/pandas-ai
  🚫 Skipping ignored repository: dataease/dataease
  🔄 Skipping already starred: Kanaries/pygwalker
  🔄 Skipping already starred: ydataai/ydata-profiling
🔎 Searching for topic: 'hacktoberfest' (min 1000 stars)


Processing topics:   2%|▏         | 6/391 [00:19<20:48,  3.24s/it]

  📦 Page 1: 100 repos (total: 100)
✅ Found 50 repositories for topic 'hacktoberfest'
  🚫 Skipping ignored repository: home-assistant/core
  🔄 Skipping already starred: sdmg15/Best-websites-a-programmer-should-visit
  🚫 Skipping ignored repository: sherlock-project/sherlock
  🔄 Skipping already starred: scrapy/scrapy
🔎 Searching for topic: 'data-visualization' (min 1000 stars)


Processing topics:   2%|▏         | 7/391 [00:23<21:25,  3.35s/it]

  📦 Page 1: 100 repos (total: 100)
✅ Found 50 repositories for topic 'data-visualization'
  🔄 Skipping already starred: apache/superset
  🔄 Skipping already starred: metabase/metabase
  🔄 Skipping already starred: streamlit/streamlit
  🔄 Skipping already starred: gradio-app/gradio
  🔄 Skipping already starred: ml-tooling/best-of-ml-python
  🔄 Skipping already starred: sinaptik-ai/pandas-ai
  🚫 Skipping ignored repository: dataease/dataease
  🔄 Skipping already starred: Avaiga/taipy
🔎 Searching for topic: 'llm' (min 1000 stars)


Processing topics:   2%|▏         | 8/391 [00:26<20:41,  3.24s/it]

  📦 Page 1: 100 repos (total: 100)
✅ Found 50 repositories for topic 'llm'
  🔄 Skipping already starred: mlabonne/llm-course
  🔄 Skipping already starred: mendableai/firecrawl
  🔄 Skipping already starred: ray-project/ray
  🔄 Skipping already starred: gitleaks/gitleaks
  🔄 Skipping already starred: sinaptik-ai/pandas-ai
🔎 Searching for topic: 'ai' (min 1000 stars)


Processing topics:   2%|▏         | 9/391 [00:29<21:33,  3.39s/it]

  📦 Page 1: 100 repos (total: 100)
✅ Found 50 repositories for topic 'ai'
  🔄 Skipping already starred: supabase/supabase
  🔄 Skipping already starred: mendableai/firecrawl
  🔄 Skipping already starred: AMAI-GmbH/AI-Expert-Roadmap
🔎 Searching for topic: 'analytics' (min 1000 stars)


Processing topics:   3%|▎         | 10/391 [00:32<20:59,  3.31s/it]

  📦 Page 1: 100 repos (total: 100)
✅ Found 50 repositories for topic 'analytics'
  🔄 Skipping already starred: apache/superset
  🔄 Skipping already starred: metabase/metabase
  🔄 Skipping already starred: duckdb/duckdb
  🔄 Skipping already starred: getredash/redash
  🔄 Skipping already starred: dbt-labs/dbt-core
  🔄 Skipping already starred: modin-project/modin
🔎 Searching for topic: 'pandas' (min 1000 stars)


Processing topics:   3%|▎         | 11/391 [00:35<19:39,  3.10s/it]

  📦 Page 1: 99 repos (total: 99)
✅ Found 50 repositories for topic 'pandas'
  🔄 Skipping already starred: pandas-dev/pandas
  🔄 Skipping already starred: sinaptik-ai/pandas-ai
  🔄 Skipping already starred: Kanaries/pygwalker
  🔄 Skipping already starred: dask/dask
  🔄 Skipping already starred: ydataai/ydata-profiling
  🔄 Skipping already starred: modin-project/modin
🔎 Searching for topic: 'pytorch' (min 1000 stars)


Processing topics:   3%|▎         | 12/391 [00:38<18:28,  2.93s/it]

  📦 Page 1: 100 repos (total: 100)
✅ Found 50 repositories for topic 'pytorch'
  🔄 Skipping already starred: keras-team/keras
  🔄 Skipping already starred: ray-project/ray
  🔄 Skipping already starred: fastai/fastai
  🔄 Skipping already starred: ml-tooling/best-of-ml-python
🔎 Searching for topic: 'tensorflow' (min 1000 stars)


Processing topics:   3%|▎         | 13/391 [00:41<19:00,  3.02s/it]

  📦 Page 1: 100 repos (total: 100)
✅ Found 50 repositories for topic 'tensorflow'
  🔄 Skipping already starred: tensorflow/tensorflow
  🔄 Skipping already starred: keras-team/keras
  🔄 Skipping already starred: aymericdamien/TensorFlow-Examples
  🔄 Skipping already starred: ray-project/ray
  🔄 Skipping already starred: ml-tooling/best-of-ml-python
🔎 Searching for topic: 'scikit-learn' (min 1000 stars)


Processing topics:   4%|▎         | 14/391 [00:44<18:22,  2.92s/it]

  📦 Page 1: 91 repos (total: 91)
✅ Found 50 repositories for topic 'scikit-learn'
  🔄 Skipping already starred: microsoft/ML-For-Beginners
  🔄 Skipping already starred: Avik-Jain/100-Days-Of-ML-Code
  🔄 Skipping already starred: ml-tooling/best-of-ml-python
  🔄 Skipping already starred: dask/dask
  🔄 Skipping already starred: autogluon/autogluon
🔎 Searching for topic: 'business-intelligence' (min 1000 stars)


Processing topics:   4%|▍         | 15/391 [00:45<15:32,  2.48s/it]

  📦 Page 1: 30 repos (total: 30)
✅ Found 30 repositories for topic 'business-intelligence'
  🔄 Skipping already starred: apache/superset
  🔄 Skipping already starred: metabase/metabase
  🔄 Skipping already starred: getredash/redash
  🚫 Skipping ignored repository: dataease/dataease
  🔄 Skipping already starred: dbt-labs/dbt-core
  🔄 Skipping already starred: evidence-dev/evidence
🔎 Searching for topic: 'gpu' (min 1000 stars)


Processing topics:   4%|▍         | 15/391 [00:48<20:15,  3.23s/it]


KeyboardInterrupt: 