In [1]:
import os
from dotenv import load_dotenv

load_dotenv()
uri = os.getenv('uri')
user = os.getenv('user')
password = os.getenv('password')

## Tech score similarity function

In [114]:
import os
from neo4j import GraphDatabase


driver = GraphDatabase.driver(uri, auth=(user, password))


def run_query(query):
    with driver.session() as session:
        result = session.run(query)
        records = list(result)
    return records

def fetch_prerequisite_nodes(node_name):
    query = f'''
    MATCH (n:Framework {{name: '{node_name}'}})-[:REQUIRES*1..3]->(b)
    RETURN DISTINCT b.name AS name
    '''
    return [record["name"] for record in run_query(query)]

def fetch_similar_nodes(node_name):
    query = f'''
    MATCH (n:Framework {{name: '{node_name}'}})-[:RELATED_TO*1..3]-(b)
    RETURN DISTINCT b.name AS name
    '''
    return [record["name"] for record in run_query(query)]

def fetch_domain_name(node_name):
    query = f'''
    MATCH (n:Framework {{name: '{node_name}'}})-[:IN_DOMAIN]->(b)
    RETURN DISTINCT b.name AS name
    '''
    records = run_query(query)
    return [record["name"] for record in records] if records else []

def compare_node_domains(domain_lst_1, domain_lst_2):
    if domain_lst_1 and domain_lst_2:
        if set(domain_lst_1) == set(domain_lst_2):
            return True
        return any(domain in domain_lst_2 for domain in domain_lst_1)
    return False


def calculate_similarity_score(skills1, skills2):
    numerator_count = 0
    denominator_count = 0

    for skill1 in skills1:
        if skill1 in skills2:
            numerator_count += 1
            denominator_count += 1
            continue

        same_domain_count = 0
        skill1_prerequisite_nodes = fetch_prerequisite_nodes(skill1)
        skill1_domain = fetch_domain_name(skill1)

        for skill2 in skills2:
            prerequisite_nodes = fetch_prerequisite_nodes(skill2)
            similar_nodes = fetch_similar_nodes(skill2)
            skill2_domain = fetch_domain_name(skill2)

            if skill1 in prerequisite_nodes:
                numerator_count += 1
                denominator_count += 1
                break
            elif skill1 in similar_nodes:
                numerator_count += 0.5
                denominator_count += 1
            elif compare_node_domains(skill1_domain, skill2_domain):
                same_domain_count += 1
            elif skill2 in skill1_prerequisite_nodes:
                numerator_count += 0.5
                denominator_count += 1

        if same_domain_count >= 2:
            numerator_count += 0.4
            denominator_count += 1

        if denominator_count == 0:
            denominator_count += 1

    score = round(numerator_count / denominator_count, 2) if denominator_count > 0 else 0
    return score


In [123]:
skills1 = ['React']
skills2 = ['Ember.js','Backbone.js','Vue.js','JavaScript']

score = calculate_similarity_score(skills1, skills2)
print(f'Score: {score}')

Score: 0.45


In [125]:
driver.close()

In [None]:
# skillset examples

skills1 = ['Express.js', 'JavaScript', 'TypeScript']
skills2 = ['NestJS', 'TypeScript']


skills1 = ['Django', 'Python']
skills2 = ['Flask']

skills1 = ['React']
skills2 = ['Ember.js','Backbone.js','Vue.js']

skills1 = ['React','JavaScript']
skills2 = ['Ember.js','Backbone.js','Vue.js']