In [1]:
# importing dependencies
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.metrics.pairwise import cosine_similarity
import webscraping as wb
import time 

In [2]:
# Initialize webscraper
obj = wb.webscrape("chromedriver.exe")

# Get repository URLs
github = "https://github.com"
str_l = "chanakya2006"
repo_list = obj.get_repo_names_from_target_name(str_l)
repo_urls = [github + repo for repo in repo_list]



# Get commit data for each repository
commit_data = []
for i, repo_url in enumerate(repo_urls):
    commits = obj.get_commits_from_repo_url(repo_url)
    # Extract repository name from URL
    repo_name = repo_url.split('/')[-1]
    
    # For each contributor in the commits
    for contributor, commit_count in commits.items():
        commit_data.append({
            'developer': contributor.strip('/'),
            'project': repo_url,
            'commits': commit_count
        })
    print(f"Processed {i + 1} of {len(repo_urls)} repositories")



Processed 1 of 9 repositories
Processed 2 of 9 repositories
Processed 3 of 9 repositories
Processed 4 of 9 repositories
Processed 5 of 9 repositories
Processed 6 of 9 repositories
Processed 7 of 9 repositories
Processed 8 of 9 repositories
Processed 9 of 9 repositories


In [3]:
# Create DataFrame
df = pd.DataFrame(commit_data)

# Define weights    
w_commits = 4.0

# Compute interaction scores
df['interaction'] = w_commits * df['commits']



# Create user-item matrix
user_item_matrix = df.pivot_table(
    index='developer', 
    columns='project', 
    values='commits', 
    fill_value=0
)



# Convert to sparse matrix
sparse_matrix = csr_matrix(user_item_matrix.values)


# Compute cosine similarity between developers
similarities = cosine_similarity(sparse_matrix)


# Convert to DataFrame for readability
similarity_df = pd.DataFrame(
    similarities, 
    index=user_item_matrix.index, 
    columns=user_item_matrix.index
)
#why am i doing this ;_;

# for i in user_item_matrix.index:
    # if i != str:
    #     l = []
    #     print(obj.get_repo_names_from_target_name(i))
    #     l.append(obj.get_repo_names_from_target_name(i))
    #     #l.pop(str_l)
    #     print(l)





In [4]:
# sys rec for 1st iteration
def recommend_projects(target_developer, user_item_matrix, similarity_df, top_n=2):
    if target_developer not in user_item_matrix.index:
        return f"Developer {target_developer} not found."
    
    # Get similar developers
    similar_devs = similarity_df[target_developer].drop(target_developer).sort_values(ascending=False)

    # Get projects the target developer has already interacted with
    seen_projects = set(user_item_matrix.loc[target_developer][user_item_matrix.loc[target_developer] > 0].index)

    # Get projects from similar developers
    recommendations = {}
    for similar_dev, similarity_score in similar_devs.items():
        similar_projects = user_item_matrix.loc[similar_dev][user_item_matrix.loc[similar_dev] > 0].index
        for project in similar_projects:
            if project not in seen_projects:
                recommendations[project] = recommendations.get(project, 0) + similarity_score

    # Sort recommendations by strength and return top N
    sorted_recommendations = sorted(recommendations.items(), key=lambda x: x[1], reverse=True)
    return [project for project, score in sorted_recommendations[:top_n]]

In [5]:
# Supposed 2nd part 
u = df[df["developer"] != str_l]["developer"].unique() 
#Second Iteration Function:-
only_index = user_item_matrix.index.drop(str_l)
print(only_index)

index_till_5 = only_index[:5]


print("Target names with No database target value in form of database:- ", u)
def repo_from_target_2nd_iteration():    
    for i in u:
        print("i = ",i)
        z = obj.get_repo_names_from_target_name(i)
        time.sleep(15)
        print(z)
        print(i)

#Series to list converter:-
l = []
for i in user_item_matrix.index: 
    l.append(i)
l.remove(str_l)
print("CONVERTED LIST BE LIKE:- ",l)


#2nd user matrix
# user_item_matrix_2 = df.pivot_table(
#     index = u,
#     columns= repo_from_target_2nd_iteration(),
#     values='commits'
# )

# 2nd alternative for above
user_item_matrix_2 = df[df["developer"].isin(u)].pivot_table(
    index="developer",  # Ensure this matches df's column
    columns=repo_from_target_2nd_iteration(),
    values="commits"
)


# Convert to sparse matrix
sparse_matrix_2 = csr_matrix(user_item_matrix_2.values)

# Compute cosine similarity between developers
similarities_2 = cosine_similarity(sparse_matrix_2)

# Convert to DataFrame for readability
similarity_df_2 = pd.DataFrame(
    similarities_2, 
    index=user_item_matrix_2.index, 
    columns=user_item_matrix_2.index
)

Index(['404avinotfound', 'IamHV856156', 'Jaiveer2525', 'Parulsri1616',
       'Pixeler5diti', 'V8V88V8V88', 'mexanik619', 'shivam8112005'],
      dtype='object', name='developer')
Target names with No database target value in form of database:-  ['V8V88V8V88' 'Pixeler5diti' 'IamHV856156' 'Jaiveer2525' 'shivam8112005'
 '404avinotfound' 'mexanik619' 'Parulsri1616']
CONVERTED LIST BE LIKE:-  ['404avinotfound', 'IamHV856156', 'Jaiveer2525', 'Parulsri1616', 'Pixeler5diti', 'V8V88V8V88', 'mexanik619', 'shivam8112005']
i =  V8V88V8V88
['/V8V88V8V88?tab=overview&from=2020-12-01&to=2020-12-31', '/V8V88V8V88/face-recognition/stargazers', '/V8V88V8V88/DeepSeek-R1', '/V8V88V8V88/thefossclub-app', '/thefossclub/Passvyn', '/V8V88V8V88/dotfiles', '/V8V88V8V88/Fedorable', '/V8V88V8V88/MarkVue', '/V8V88V8V88/simple-round-robin-scheduler', '/V8V88V8V88/Rust-RISCV-Compiler', '/V8V88V8V88?tab=overview&from=2018-12-01&to=2018-12-31', '/V8V88V8V88/zed', '/V8V88V8V88/SkibidiSpeak', '/thefossclub/thefossclub.

In [11]:
# rec sys for 2nd iteration 
def recommend_projects(target_deve, user_item_matrix_2, similarity_df_2, top_n_2=4):
    if target_deve not in user_item_matrix_2.index:
        return f"Developer {target_deve} not found."
    
    # Get similar developers
    similar_devs_2 = similarity_df[target_deve].drop(target_deve).sort_values(ascending=False)


    # Get projects the target developer has already interacted with
    seen_projects_2 = set(user_item_matrix.loc[target_deve][user_item_matrix.loc[target_deve] > 0].index)

    # Get projects from similar developers
    recommendations_2 = {}
    for similar_dev_2, similarity_score_2 in similar_devs_2.items():
        similar_projects_2 = user_item_matrix_2.loc[similar_dev_2][user_item_matrix_2.loc[similar_dev_2] > 0].index
        for project_2 in similar_projects_2:
            if project_2 not in seen_projects_2:
                recommendations_2[project_2] = recommendations_2.get(project_2, 0) + similarity_score_2

    # Sort recommendations by strength and return top N
    sorted_recommendations_2 = sorted(recommendations_2.items(), key_2=lambda x: x[1], reverse=True)
    return [project_2 for project_2, score_2 in sorted_recommendations_2[:top_n_2]]

In [12]:
#results
# Example usage
print("\nUser-Item Matrix:")
print(user_item_matrix_2)

print("\nSimilarity Matrix:")
print(similarity_df_2)

# Get recommendations for a specific developer
target_dev = input("Enter your username: ")  # Use first developer as example
print(target_dev)
print(f"\nRecommended projects for {target_dev}:")
print(recommend_projects(target_dev, user_item_matrix_2, similarity_df_2))


User-Item Matrix:
                commits
developer              
404avinotfound      5.0
IamHV856156         6.0
Jaiveer2525         1.0
Parulsri1616        7.0
Pixeler5diti        5.0
V8V88V8V88         12.5
mexanik619          6.0
shivam8112005       1.0

Similarity Matrix:
developer       404avinotfound  IamHV856156  Jaiveer2525  Parulsri1616  \
developer                                                                
404avinotfound             1.0          1.0          1.0           1.0   
IamHV856156                1.0          1.0          1.0           1.0   
Jaiveer2525                1.0          1.0          1.0           1.0   
Parulsri1616               1.0          1.0          1.0           1.0   
Pixeler5diti               1.0          1.0          1.0           1.0   
V8V88V8V88                 1.0          1.0          1.0           1.0   
mexanik619                 1.0          1.0          1.0           1.0   
shivam8112005              1.0          1.0          1.

In [14]:
#TEST CELL

# print(user_item_matrix)
# print(user_item_matrix.index)
# print(user_item_matrix.index.drop(str_l))
# only_index = user_item_matrix.index.drop(str_l)
# print(only_index)

# index_till_5 = only_index[:5]

# def sirf_index():
#     for i in index_till_5:
#         print(i)
#         target_names = obj.get_repo_names_from_target_name(i)
#         print(target_names[:11])

# sirf_index()
# so above snippet would initiate 2nd iteration of the collaborative filtering
#i dont fucking now why its not working ;_;
#fuck this
#UPDATE: Its Working!!!!
#Had to prune Extra Users cuz of mem leaks and mem hogs 

# for 1st iteration --> 'project' = repo_url   2nd iteration ---> 'project' = repo_url(but for ones )

# print(u)
# for i in u:
#     print(i)
#     repo_link = obj.get_repo_names_from_target_name(i[:5])

# print(repo_link)

print(user_item_matrix)
print(user_item_matrix_2)

repo_from_target_2nd_iteration()

project         https://github.com/chanakya2006/fitness_api  \
developer                                                     
404avinotfound                                          0.0   
IamHV856156                                             0.0   
Jaiveer2525                                             0.0   
Parulsri1616                                            0.0   
Pixeler5diti                                            0.0   
V8V88V8V88                                              0.0   
chanakya2006                                            2.0   
mexanik619                                              0.0   
shivam8112005                                           0.0   

project         https://github.com/chanakya2006/github-repo-recommendation-on-basis-of-profile  \
developer                                                                                        
404avinotfound                                                5.0                                
IamHV856156 

  """


NoSuchElementException: Message: no such element: Unable to locate element: {"method":"tag name","selector":"turbo-frame"}
  (Session info: chrome=133.0.6943.127); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
	GetHandleVerifier [0x00007FF680536F15+28773]
	(No symbol) [0x00007FF6804A2600]
	(No symbol) [0x00007FF680338FAA]
	(No symbol) [0x00007FF68038F286]
	(No symbol) [0x00007FF68038F4BC]
	(No symbol) [0x00007FF6803E2A27]
	(No symbol) [0x00007FF6803B728F]
	(No symbol) [0x00007FF6803DF6F3]
	(No symbol) [0x00007FF6803B7023]
	(No symbol) [0x00007FF68037FF5E]
	(No symbol) [0x00007FF6803811E3]
	GetHandleVerifier [0x00007FF68088425D+3490733]
	GetHandleVerifier [0x00007FF68089BA43+3586963]
	GetHandleVerifier [0x00007FF68089147D+3544525]
	GetHandleVerifier [0x00007FF6805FC9DA+838442]
	(No symbol) [0x00007FF6804AD04F]
	(No symbol) [0x00007FF6804A9614]
	(No symbol) [0x00007FF6804A97B6]
	(No symbol) [0x00007FF680498CE9]
	BaseThreadInitThunk [0x00007FF88E3D257D+29]
	RtlUserThreadStart [0x00007FF88F9AAA58+40]
