In [16]:
recommender = GitHubRecommender()
# First load a database
recommender.load_profiles('github_profiles.json')
# Get recommendations for one user, which will compare against other profiles
recommendations = recommender.get_recommendations('404avinotfound')

In [13]:
import json
import os
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import requests
from bs4 import BeautifulSoup

class GitHubRecommender:
    def __init__(self, profiles_file=None):
        self.model = SentenceTransformer('all-mpnet-base-v2')
        self.profiles_file = profiles_file
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        }
        # Profiles will be loaded when a database is selected
        self.stored_profiles = []
        
    def load_profiles(self, profiles_file):
        """Load the database of profiles from JSON file"""
        try:
            with open(profiles_file, 'r') as f:
                self.stored_profiles = json.load(f)
                self.profiles_file = profiles_file
                return True
        except FileNotFoundError:
            print(f"Error: {profiles_file} not found!")
            return False
        except json.JSONDecodeError:
            print(f"Error: {profiles_file} is not a valid JSON file!")
            return False

    def list_available_databases(self):
        """List all JSON files in the current directory"""
        json_files = [f for f in os.listdir() if f.endswith('.json')]
        return json_files

    def scrape_github_profile(self, username):
        """Scrape basic GitHub profile and repository information"""
        profile_url = f'https://github.com/{username}'
        repos_url = f'https://github.com/{username}?tab=repositories'
        
        try:
            # Get profile page
            profile_page = requests.get(profile_url, headers=self.headers)
            profile_soup = BeautifulSoup(profile_page.text, 'html.parser')
            
            # Get repositories page
            repos_page = requests.get(repos_url, headers=self.headers)
            repos_soup = BeautifulSoup(repos_page.text, 'html.parser')
            
            # Extract basic profile info
            bio_div = profile_soup.find('div', {'class': 'p-note'})
            bio = bio_div.text.strip() if bio_div else ""
            
            # Extract repositories
            repos = []
            languages = set()
            repo_list = repos_soup.find_all('li', {'class': 'source'})
            
            for repo in repo_list[:10]:
                name_elem = repo.find('a', {'itemprop': 'name codeRepository'})
                if not name_elem:
                    continue
                    
                desc_elem = repo.find('p', {'class': 'pinned-item-desc'})
                lang_elem = repo.find('span', {'itemprop': 'programmingLanguage'})
                
                name = name_elem.text.strip()
                description = desc_elem.text.strip() if desc_elem else ""
                language = lang_elem.text.strip() if lang_elem else ""
                
                if language:
                    languages.add(language)
                    
                repos.append({
                    'name': name,
                    'description': description,
                    'language': language
                })
            
            return {
                'username': username,
                'bio': bio,
                'repositories': repos,
                'languages': list(languages)
            }
            
        except Exception as e:
            print(f"Error scraping profile for {username}: {e}")
            return None

    def create_profile_embedding(self, profile_data):
        """Create text embedding for a profile"""
        profile_text = f"""
        User {profile_data['username']}
        Bio: {profile_data['bio']}
        Languages: {', '.join(profile_data['languages'])}
        Repositories: {' '.join([f"{r['name']}: {r['description']}" for r in profile_data['repositories']])}
        """
        return self.model.encode([profile_text])[0]

    def get_recommendations(self, username, num_recommendations=5):
        """Get project recommendations for a given username"""
        if not self.stored_profiles:
            print("No database loaded! Please select a database first.")
            return []
            
        # Scrape the user's profile
        user_profile = self.scrape_github_profile(username)
        if not user_profile:
            return []
            
        # Create embedding for user profile
        user_embedding = self.create_profile_embedding(user_profile)
        
        # Create embeddings for stored profiles
        stored_embeddings = np.array([
            self.create_profile_embedding(profile) 
            for profile in self.stored_profiles
        ])
        
        # Calculate similarities
        similarities = cosine_similarity([user_embedding], stored_embeddings)[0]
        
        # Get top similar profiles
        top_indices = np.argsort(similarities)[-num_recommendations:][::-1]
        
        # Prepare recommendations
        recommendations = []
        for idx, similarity in zip(top_indices, similarities[top_indices]):
            similar_profile = self.stored_profiles[idx]
            recommendations.append({
                'username': similar_profile['username'],
                'similarity': float(similarity),
                'repositories': similar_profile['repositories'][:3]
            })
            
        return recommendations

def select_database(recommender):
    """Helper function to handle database selection"""
    while True:
        print("\nAvailable databases:")
        databases = recommender.list_available_databases()
        
        if not databases:
            print("No JSON databases found in the current directory!")
            return False
            
        for i, db in enumerate(databases, 1):
            print(f"{i}. {db}")
            
        print("\nEnter the number of the database you want to use")
        print("Or enter 'q' to quit, 'c' to use custom path")
        
        choice = input("Your choice: ").strip().lower()
        
        if choice == 'q':
            return False
        elif choice == 'c':
            custom_path = input("Enter the full path to your JSON database: ").strip()
            if recommender.load_profiles(custom_path):
                print(f"Successfully loaded database: {custom_path}")
                return True
        else:
            try:
                index = int(choice) - 1
                if 0 <= index < len(databases):
                    if recommender.load_profiles(databases[index]):
                        print(f"Successfully loaded database: {databases[index]}")
                        return True
                else:
                    print("Invalid selection! Please try again.")
            except ValueError:
                print("Invalid input! Please enter a number, 'q', or 'c'.")

def main():
    # Initialize recommender
    recommender = GitHubRecommender()
    
    print("Welcome to GitHub Project Recommender!")
    
    # First, select a database
    if not select_database(recommender):
        print("No database selected. Exiting...")
        return
        
    while True:
        print("\nOptions:")
        print("1. Get recommendations")
        print("2. Change database")
        print("3. Exit")
        
        choice = input("Enter your choice (1-3): ").strip()
        
        if choice == '1':
            username = input("\nEnter a GitHub username: ").strip()
            print(f"\nFinding recommendations for {username}...")
            recommendations = recommender.get_recommendations(username)
            
            if recommendations:
                print("\nRecommended Projects:")
                for rec in recommendations:
                    print(f"\nFrom {rec['username']} (Similarity: {rec['similarity']:.2f}):")
                    for repo in rec['repositories']:
                        print(f"- {repo['name']}")
                        if repo['description']:
                            print(f"  Description: {repo['description']}")
                        print(f"  Language: {repo['language']}")
            else:
                print("Could not find recommendations. Please check the username and try again.")
                
        elif choice == '2':
            if not select_database(recommender):
                print("Database selection cancelled.")
                
        elif choice == '3':
            print("Thank you for using GitHub Project Recommender!")
            break
            
        else:
            print("Invalid choice! Please enter 1, 2, or 3.")

if __name__ == '__main__':
    main()

Welcome to GitHub Project Recommender!

Available databases:
1. github_profiles.json
2. search_feedback.json

Enter the number of the database you want to use
Or enter 'q' to quit, 'c' to use custom path
Successfully loaded database: github_profiles.json

Options:
1. Get recommendations
2. Change database
3. Exit

Finding recommendations for chanakya2006...

Recommended Projects:

From django (Similarity: 0.54):
- django
  Description: The Web framework for perfectionists with deadlines.
  Language: Python
- channels
  Description: Developer-friendly asynchronous Django
  Language: Python

From tensorflow (Similarity: 0.45):
- tensorflow-docs
  Description: TensorFlow documentation
  Language: Jupyter Notebook
- tensorflow
  Description: An Open Source Machine Learning Framework for Everyone
  Language: Python

From pallets (Similarity: 0.44):
- flask
  Description: The Python micro framework for building web applications
  Language: Python
- jinja
  Description: A template engine for 