In [56]:
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.metrics.pairwise import cosine_similarity
import webscraping as wb

# Initialize webscraper
obj = wb.webscrape("chromedriver.exe")

# Get repository URLs
github = "https://github.com"
str_l = "chanakya2006"
repo_list = obj.get_repo_names_from_target_name(str_l)
repo_urls = [github + repo for repo in repo_list]



# Get commit data for each repository
commit_data = []
for i, repo_url in enumerate(repo_urls):
    commits = obj.get_commits_from_repo_url(repo_url)
    # Extract repository name from URL
    repo_name = repo_url.split('/')[-1]
    
    # For each contributor in the commits
    for contributor, commit_count in commits.items():
        commit_data.append({
            'developer': contributor.strip('/'),
            'project': repo_url,
            'commits': commit_count
        })
    print(f"Processed {i + 1} of {len(repo_urls)} repositories")

# Create DataFrame
df = pd.DataFrame(commit_data)

# Define weights    
w_commits = 4.0

# Compute interaction scores
df['interaction'] = w_commits * df['commits']



# Create user-item matrix
user_item_matrix = df.pivot_table(
    index='developer', 
    columns='project', 
    values='commits', 
    fill_value=0
)



# Convert to sparse matrix
sparse_matrix = csr_matrix(user_item_matrix.values)


# Compute cosine similarity between developers
similarities = cosine_similarity(sparse_matrix)


# Convert to DataFrame for readability
similarity_df = pd.DataFrame(
    similarities, 
    index=user_item_matrix.index, 
    columns=user_item_matrix.index
)
#why am i doing this ;_;

# for i in user_item_matrix.index:
    # if i != str:
    #     l = []
    #     print(obj.get_repo_names_from_target_name(i))
    #     l.append(obj.get_repo_names_from_target_name(i))
    #     #l.pop(str_l)
    #     print(l)



# sys rec for 1st iteration
def recommend_projects(target_developer, user_item_matrix, similarity_df, top_n=2):
    if target_developer not in user_item_matrix.index:
        return f"Developer {target_developer} not found."
    
    # Get similar developers
    similar_devs = similarity_df[target_developer].drop(target_developer).sort_values(ascending=False)

    # Get projects the target developer has already interacted with
    seen_projects = set(user_item_matrix.loc[target_developer][user_item_matrix.loc[target_developer] > 0].index)

    # Get projects from similar developers
    recommendations = {}
    for similar_dev, similarity_score in similar_devs.items():
        similar_projects = user_item_matrix.loc[similar_dev][user_item_matrix.loc[similar_dev] > 0].index
        for project in similar_projects:
            if project not in seen_projects:
                recommendations[project] = recommendations.get(project, 0) + similarity_score

    # Sort recommendations by strength and return top N
    sorted_recommendations = sorted(recommendations.items(), key=lambda x: x[1], reverse=True)
    return [project for project, score in sorted_recommendations[:top_n]]




# print(user_item_matrix)




#Second Iteration Function:-
only_index = user_item_matrix.index.drop(str_l)
print(only_index)

index_till_5 = only_index[:5]

def sirf_index_5():
    global i
    for i in index_till_5:
        print(i)
        target_names = obj.get_repo_names_from_target_name(i)
        print(target_names[:11])
sirf_index_5()

i

#2nd user matrix
user_item_matrix_2 = df.pivot_table(
    index = only_index[:5],
    columns= i[:5],
    values='commits'
)

# Convert to sparse matrix
sparse_matrix_2 = csr_matrix(user_item_matrix_2.values)

# Compute cosine similarity between developers
similarities_2 = cosine_similarity(sparse_matrix_2)

# Convert to DataFrame for readability
similarity_df_2 = pd.DataFrame(
    similarities_2, 
    index=user_item_matrix_2.index, 
    columns=user_item_matrix_2.index
)



# rec sys for 2nd iteration 
def recommend_projects(target_deve, user_item_matrix_2, similarity_df_2, top_n_2=4):
    if target_deve not in user_item_matrix_2.index:
        return f"Developer {target_deve} not found."
    
    # Get similar developers
    similar_devs_2 = similarity_df[target_deve].drop(target_deve).sort_values(ascending=False)

    # Get projects the target developer has already interacted with
    seen_projects_2 = set(user_item_matrix.loc[target_deve][user_item_matrix.loc[target_deve] > 0].index)

    # Get projects from similar developers
    recommendations = {}
    for similar_dev_2, similarity_score_2 in similar_devs_2.items():
        similar_projects = user_item_matrix_2.loc[similar_dev_2][user_item_matrix_2.loc[similar_dev_2] > 0].index
        for project_2 in similar_projects:
            if project_2 not in seen_projects_2:
                recommendations[project_2] = recommendations.get(project_2, 0) + similarity_score_2

    # Sort recommendations by strength and return top N
    sorted_recommendations_2 = sorted(recommendations.items(), key=lambda x: x[1], reverse=True)
    return [project_2 for project_2, score_2 in sorted_recommendations_2[:top_n_2]]

# Example usage
print("\nUser-Item Matrix:")
print(user_item_matrix_2)

print("\nSimilarity Matrix:")
print(similarity_df_2)

# Get recommendations for a specific developer
target_dev = input("Enter your username: ")  # Use first developer as example
print(target_dev)
print(f"\nRecommended projects for {target_dev}:")
print(recommend_projects(target_dev, user_item_matrix_2, similarity_df_2))



Processed 1 of 15 repositories
Processed 2 of 15 repositories
Processed 3 of 15 repositories
Processed 4 of 15 repositories
Processed 5 of 15 repositories
Processed 6 of 15 repositories
Processed 7 of 15 repositories
Processed 8 of 15 repositories
Processed 9 of 15 repositories
Processed 10 of 15 repositories
Processed 11 of 15 repositories
Processed 12 of 15 repositories
Processed 13 of 15 repositories
Processed 14 of 15 repositories
Processed 15 of 15 repositories
Index(['404avinotfound', 'IamHV856156', 'Jaiveer2525', 'Parulsri1616',
       'Pixeler5diti', 'V8V88V8V88', 'mexanik619', 'shivam8112005'],
      dtype='object', name='developer')
404avinotfound
['/404avinotfound/Coding-and-Decoding', '/404avinotfound?tab=overview&from=2025-02-01&to=2025-02-22', '/404avinotfound?tab=overview&from=2024-12-01&to=2024-12-31', '/pkprajapati7402/AyurvediCure', '/404avinotfound/Coding-and-Decoding', '/404avinotfound/Coding-and-Decoding']
IamHV856156
['/IamHV856156/IamHV856156', 'mailto:Vashisht.p

ValueError: Grouper and axis must be same length

In [49]:
# print(user_item_matrix)
# print(user_item_matrix.index)
# print(user_item_matrix.index.drop(str_l))
# only_index = user_item_matrix.index.drop(str_l)
# print(only_index)

# index_till_5 = only_index[:5]

# def sirf_index():
#     for i in index_till_5:
#         print(i)
#         target_names = obj.get_repo_names_from_target_name(i)
#         print(target_names[:11])

# sirf_index()
# so above snippet would initiate 2nd iteration of the collaborative filtering
#i dont fucking now why its not working ;_;
#fuck this
#UPDATE: Its Working!!!!
#Had to prune Extra Users cuz of mem leaks and mem hogs 

print(user_item_matrix.columns)

sirf_index_5()

Index(['https://github.com/chanakya2006/fitness_api',
       'https://github.com/chanakya2006/github-repo-recommendation-on-basis-of-profile',
       'https://github.com/chanakya2006/python',
       'https://github.com/chanakya2006/thefossclub.github.io',
       'https://github.com/thefossclub/thefossclub.github.io'],
      dtype='object', name='project')
404avinotfound
['/404avinotfound/Coding-and-Decoding', '/404avinotfound?tab=overview&from=2025-02-01&to=2025-02-22', '/404avinotfound?tab=overview&from=2024-12-01&to=2024-12-31', '/pkprajapati7402/AyurvediCure', '/404avinotfound/Coding-and-Decoding', '/404avinotfound/Coding-and-Decoding']
IamHV856156
['/IamHV856156/IamHV856156', 'mailto:Vashisht.project@protonmail.com', '/IamHV856156/IamHV856156', '/IamHV856156/Footstep-Energy-Generator', '/IamHV856156/thefossclub.github.io', '/thefossclub/thefossclub.github.io', '/IamHV856156/thefossclub.github.io/stargazers', '/IamHV856156?tab=overview&from=2025-02-01&to=2025-02-22', '/IamHV856156?t