In [13]:
import json
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import euclidean_distances

In [22]:
with open('user_response.json', 'r') as json_file:
    jsondata = json.load(json_file)
    print("Original data:", jsondata)

Original data: [{'PreferredLanguage': 'English', 'TrackLearned': 'Full-stack', 'TechnologyLearned': '.NET Core', 'TrackLevel': 'Intermediate', 'ReferralSource': 'Social media', 'EmploymentStatus': 'Employed', 'BasicProgrammingLanguagesKnowledge': 'C#, JavaScript', 'ProficientProgrammingLanguages': 'C#', 'PreferredLearningStyle': 'Videos', 'LearningFrequency': 'Daily', 'PreferredCommunicationMethod': 'Chat', 'TechnologyOfInterest': 'React', 'ShiftingFromAnotherCareer': False, 'WeeklyHoursDedicatedToLearningAndCollaboration': 15, 'MotivationForLearningAndCollaboration': 'Career advancement', 'GoalsOnThePlatform': 'Learning new skills', 'ComfortLevelWithRemoteWorkOrCollaboration': True, 'ProjectTypeInterest': 'Freelance', 'UserId': 'e46d51bf-3680-4e38-9ee9-9721787317b4'}, {'PreferredLanguage': 'Arabic', 'TrackLearned': 'Frontend', 'TechnologyLearned': 'React', 'TrackLevel': 'Advanced', 'ReferralSource': 'Friend', 'EmploymentStatus': 'Unemployed', 'BasicProgrammingLanguagesKnowledge': 'HTM

In [27]:
le = LabelEncoder()
data = pd.DataFrame(jsondata).dropna().copy()
data.head(5)

Unnamed: 0,PreferredLanguage,TrackLearned,TechnologyLearned,TrackLevel,ReferralSource,EmploymentStatus,BasicProgrammingLanguagesKnowledge,ProficientProgrammingLanguages,PreferredLearningStyle,LearningFrequency,PreferredCommunicationMethod,TechnologyOfInterest,ShiftingFromAnotherCareer,WeeklyHoursDedicatedToLearningAndCollaboration,MotivationForLearningAndCollaboration,GoalsOnThePlatform,ComfortLevelWithRemoteWorkOrCollaboration,ProjectTypeInterest,UserId
0,English,Full-stack,.NET Core,Intermediate,Social media,Employed,"C#, JavaScript",C#,Videos,Daily,Chat,React,False,15,Career advancement,Learning new skills,True,Freelance,e46d51bf-3680-4e38-9ee9-9721787317b4
1,Arabic,Frontend,React,Advanced,Friend,Unemployed,"HTML, CSS, JavaScript",JavaScript,Books,Weekly,Email,Vue.js,True,10,Skill development,Finding job opportunities,True,Open-source,6a42e8d3-3e26-41b7-9a5c-0a1f1f2a5ec0
2,English,Data Science,Python,Beginner,Search engine,Student,Python,,Tutorials,Daily,Video call,Machine Learning,False,20,Personal interest,Networking,True,Personal,7e95cf13-2f18-45af-90d1-02e37cb0e83c
3,English,Backend,.NET Core,Intermediate,Social media,Employed,"C#, SQL",C#,Videos,Weekly,Chat,ASP.NET Core,False,15,Skill development,Learning new skills,True,Freelance,c7f17e01-7a4b-4722-9857-8ef972e6c971
4,English,Full-stack,Angular,Advanced,Friend,Employed,"JavaScript, TypeScript, HTML, CSS","JavaScript, TypeScript",Videos,Daily,Chat,Node.js,False,10,Skill development,Networking,True,Freelance,8e7cb4ae-daae-4969-8f05-35af9ff47d0d


In [28]:
global recommendations, k
def recommend_collaborators(user_id, data, k=5):

    recommendations = {}
    user_data = data[data["UserId"] == user_id]
    if user_data.empty:
        print(f"User with ID {user_id} not found in data.")  # error message, if not necessary make it return a check for the system.
        return recommendations 
    
    user_technology = user_data["TechnologyOfInterest"].tolist()
    
    for technology in user_technology:
        filtered_users = data[data['TechnologyLearned'] == technology]
        filtered_users = filtered_users[filtered_users['UserId'] != user_id]

        if filtered_users.empty:
            recommendations[technology] = "No users found for this technology" # error message, if not necessary make it return a zero.. or a FALSE logic
            continue

        encoded_data = filtered_users.select_dtypes(include=[object]).apply(le.fit_transform)
        similarity_matrix = euclidean_distances(encoded_data)
        similar_users = filtered_users["UserId"].iloc[similarity_matrix[0].argsort()[:k]]
        recommendations[technology] = similar_users.tolist()

    return recommendations

#example
recommend_collaborators('f63b22fc-2de8-41a1-a45a-c34da9e547a5', data)

{'Angular': ['8e7cb4ae-daae-4969-8f05-35af9ff47d0d',
  'e17bda85-98e1-4dc5-a7e5-3fd790116fcf',
  '15c4437b-c205-4e82-9874-15d222f0e60a',
  '02cc0270-0aaf-4cd9-9937-fccf5cd2afcb']}

In [29]:
#another_version
global recommendations, k
def recommend_collaborators(user_id, k=5):

    recommendations = {} 
    if user_id not in data['UserId'].tolist():
        print(f"User with ID {user_id} not found in data.")
        return recommendations  # Empty dictionary

    user_technology = data[data["UserId"] == user_id][["TechnologyOfInterest"]]
    for technology in user_technology["TechnologyOfInterest"].explode().unique():
        filtered_users = data[data['TechnologyLearned'] == technology]
        filtered_users = filtered_users[filtered_users['UserId'] != user_id]

        if len(filtered_users) == 0:
            recommendations[technology] = "No users found for this technology"
            continue

        filtered = filtered_users.drop(columns=['TechnologyLearned', 'TechnologyOfInterest']).copy()
        filtered['UserId'] = filtered['UserId'].astype(str)  # Ensure string user IDs
        encoded_data = filtered.drop(columns=['UserId'])
        encoded_data = encoded_data.apply(le.fit_transform, axis=0)
        similarity_matrix = euclidean_distances(encoded_data)
        similar_df = pd.DataFrame({'UserId': filtered['UserId'], 'Score': similarity_matrix[0]})
        similar_df = similar_df.sort_values(by='Score', ascending=True)
        similar_df = similar_df['UserId'].tolist()  # Select only User IDs
        recommendations.setdefault(technology, similar_df)[:k] = similar_df[:k]

    return recommendations

# Example usage
recommend_collaborators('02cc0270-0aaf-4cd9-9937-fccf5cd2afcb')

{'React': ['6a42e8d3-3e26-41b7-9a5c-0a1f1f2a5ec0',
  'f63b22fc-2de8-41a1-a45a-c34da9e547a5']}