In [1]:
# Calculate document vectors (average of word vectors)
def average_word_vectors(words, model, num_features):
    feature_vector = np.zeros((num_features,), dtype="float32")
    n_words = 0

    for word in words:
        if word in model.wv:
            n_words += 1
            feature_vector = np.add(feature_vector, model.wv[word])

    if n_words > 0:
        feature_vector = np.divide(feature_vector, n_words)
    
    return feature_vector


In [16]:
import numpy as np
from gensim.models import Word2Vec

# Sample user profiles and project descriptions
user_profiles = [
    {
        'mandatory_skills': ['data', 'science', 'machine', 'learning', 'software engineering'],
        'expertise_level': 'intermediate',
        'contributor_type': 'developer'  # Example contributor type
    },
    {
        'mandatory_skills': ['biology', 'research', 'genetics', 'lab'],
        'expertise_level': 'beginner',
        'contributor_type': 'researcher'
    },
    {
        'mandatory_skills': ['environment', 'sustainability', 'climate', 'change'],
        'expertise_level': 'expert',
        'contributor_type': 'environmentalist'
    },
]

project_descriptions = [
    {
        'mandatory_skills': ['data', 'science', 'machine', 'learning'],
        'good_to_have_skills': ['data', 'science', 'machine', 'learning', 'software engineering'],
        'expertise_level': 'intermediate',
        'contributor_type': 'developer'
    },
    {
        'mandatory_skills': ['genomics', 'study', 'gene', 'expression'],
        'good_to_have_skills': ['genome sequencing', 'biological research'],
        'expertise_level': 'beginner',
        'contributor_type': 'researcher'
    },
    {
        'mandatory_skills': ['sustainable', 'solutions', 'carbon', 'offset'],
        'good_to_have_skills': ['renewable energy', 'environmental policy'],
        'expertise_level': 'expert',
        'contributor_type': 'environmentalist'
    },
]

# Define a mapping for expertise levels
expertise_level_mapping = {
    'beginner': 0.2,
    'intermediate': 0.5,
    'expert': 0.8,
}

# Train Word2Vec model
all_good_to_have_skills = [profile['good_to_have_skills'] for profile in project_descriptions]
all_mandatory_skills = [profile['mandatory_skills'] for profile in user_profiles + project_descriptions]

model = Word2Vec(all_mandatory_skills + all_good_to_have_skills, vector_size=100, window=5, min_count=1, sg=0)

# Function to calculate cosine similarity between two vectors
def cosine_similarity(vec1, vec2):
    return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

# Threshold for considering expertise level similarity
similarity_threshold = 0.3

# Iterate through user profiles and project descriptions
for i, user_profile in enumerate(user_profiles):
    for j, project_description in enumerate(project_descriptions):
        # Check if the contributor types match
        if user_profile['contributor_type'] == project_description['contributor_type']:
            # Check if good-to-have skills are a subset of mandatory skills
            if set(project_description['good_to_have_skills']).issubset(set(user_profile['mandatory_skills'])):
                # If a subset relationship exists, set good-to-have skills similarity to 1
                good_to_have_skills_similarity = 1.0
            else:
                # Calculate similarity for good-to-have skills
                good_to_have_skills_similarity = cosine_similarity(average_word_vectors(user_profile['mandatory_skills'], model, 100), average_word_vectors(project_description['good_to_have_skills'], model, 100))

            # Check if mandatory skills are a subset of mandatory skills
            if set(project_description['mandatory_skills']).issubset(set(user_profile['mandatory_skills'])):
                # If a subset relationship exists, set mandatory skills similarity to 1
                mandatory_skills_similarity = 1.0
            else:
                # Calculate similarity for mandatory skills
                mandatory_skills_similarity = cosine_similarity(average_word_vectors(user_profile['mandatory_skills'], model, 100), average_word_vectors(project_description['mandatory_skills'], model, 100))

            # Map expertise levels to numerical values
            user_expertise = expertise_level_mapping.get(user_profile['expertise_level'], 0.0)
            project_expertise = expertise_level_mapping.get(project_description['expertise_level'], 0.0)

            # Calculate similarity for expertise level
            expertise_level_similarity = 1 - abs(user_expertise - project_expertise)

            # Print the similarities for each aspect
            print(f"User Profile {i+1} vs. Project {j+1}:")
            print(f"Mandatory Skills Similarity: {mandatory_skills_similarity:.2f}")
            print(f"Good-to-Have Skills Similarity: {good_to_have_skills_similarity:.2f}")
            print(f"Expertise Level Similarity: {expertise_level_similarity:.2f}")

            # Sum up the similarities for all aspects
            combined_similarity = mandatory_skills_similarity + good_to_have_skills_similarity + expertise_level_similarity
            # Print the combined similarity
            print(f"Combined Similarity: {combined_similarity:.2f}")
            print()  # Add a newline for clarity
        else:
            print(f"User Profile {i+1} vs. Project {j+1}:")
            print("Contributor types do not match, skipping other similarities.")
            print()


User Profile 1 vs. Project 1:
Mandatory Skills Similarity: 0.87
Good-to-Have Skills Similarity: 0.07
Expertise Level Similarity: 0.95
Combined Similarity: 1.89

User Profile 1 vs. Project 2:
Contributor types do not match, skipping other similarities.

User Profile 1 vs. Project 3:
Contributor types do not match, skipping other similarities.

User Profile 2 vs. Project 1:
Contributor types do not match, skipping other similarities.

User Profile 2 vs. Project 2:
Skills similarity below threshold, expertise level similarity not considered.

User Profile 2 vs. Project 3:
Contributor types do not match, skipping other similarities.

User Profile 3 vs. Project 1:
Contributor types do not match, skipping other similarities.

User Profile 3 vs. Project 2:
Contributor types do not match, skipping other similarities.

User Profile 3 vs. Project 3:
Mandatory Skills Similarity: -0.11
Good-to-Have Skills Similarity: 0.51
Expertise Level Similarity: 0.98
Combined Similarity: 1.37

