In [1]:
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.metrics.pairwise import cosine_similarity
import webscraping as wb

# Initialize webscraper
obj = wb.webscrape("chromedriver.exe")

# Get repository URLs
github = "https://github.com"
repo_list = obj.get_repo_names_from_target_name("chanakya2006")
repo_urls = [github + repo for repo in repo_list]

# Get commit data for each repository
commit_data = []
for i, repo_url in enumerate(repo_urls):
    commits = obj.get_commits_from_repo_url(repo_url)
    # Extract repository name from URL
    repo_name = repo_url.split('/')[-1]
    
    # For each contributor in the commits
    for contributor, commit_count in commits.items():
        commit_data.append({
            'developer': contributor.strip('/'),
            'project': repo_name,
            'commits': commit_count
        })
    print(f"Processed {i + 1} of {len(repo_urls)} repositories")

# Create DataFrame
df = pd.DataFrame(commit_data)

# Define weights    
w_commits = 4.0

# Compute interaction scores
df['interaction'] = w_commits * df['commits']

# Create user-item matrix
user_item_matrix = df.pivot_table(
    index='developer', 
    columns='project', 
    values='commits', 
    fill_value=0
)

# Convert to sparse matrix
sparse_matrix = csr_matrix(user_item_matrix.values)

# Compute cosine similarity between developers
similarities = cosine_similarity(sparse_matrix)

# Convert to DataFrame for readability
similarity_df = pd.DataFrame(
    similarities, 
    index=user_item_matrix.index, 
    columns=user_item_matrix.index
)

def recommend_projects(target_developer, user_item_matrix, similarity_df, top_n=2):
    if target_developer not in user_item_matrix.index:
        return f"Developer {target_developer} not found."
    
    # Get similar developers
    similar_devs = similarity_df[target_developer].drop(target_developer).sort_values(ascending=False)

    # Get projects the target developer has already interacted with
    seen_projects = set(user_item_matrix.loc[target_developer][user_item_matrix.loc[target_developer] > 0].index)

    # Get projects from similar developers
    recommendations = {}
    for similar_dev, similarity_score in similar_devs.items():
        similar_projects = user_item_matrix.loc[similar_dev][user_item_matrix.loc[similar_dev] > 0].index
        for project in similar_projects:
            if project not in seen_projects:
                recommendations[project] = recommendations.get(project, 0) + similarity_score

    # Sort recommendations by strength and return top N
    sorted_recommendations = sorted(recommendations.items(), key=lambda x: x[1], reverse=True)
    return [project for project, score in sorted_recommendations[:top_n]]

# Example usage
print("\nUser-Item Matrix:")
print(user_item_matrix)

print("\nSimilarity Matrix:")
print(similarity_df)

# Get recommendations for a specific developer
target_dev = user_item_matrix.index[0]  # Use first developer as example
print(f"\nRecommended projects for {target_dev}:")
print(recommend_projects(target_dev, user_item_matrix, similarity_df))

Processed 1 of 15 repositories
Processed 2 of 15 repositories
Processed 3 of 15 repositories
Processed 4 of 15 repositories
Processed 5 of 15 repositories
Processed 6 of 15 repositories
Processed 7 of 15 repositories
Processed 8 of 15 repositories
Processed 9 of 15 repositories
Processed 10 of 15 repositories
Processed 11 of 15 repositories
Processed 12 of 15 repositories
Processed 13 of 15 repositories
Processed 14 of 15 repositories
Processed 15 of 15 repositories

User-Item Matrix:
project         fitness_api  github-repo-recommendation-on-basis-of-profile  \
developer                                                                     
404avinotfound          0.0                                             2.0   
IamHV856156             0.0                                             0.0   
Jaiveer2525             0.0                                             0.0   
Parulsri1616            0.0                                             1.0   
Pixeler5diti            0.0         