In [1]:
from langchain_community.graphs import Neo4jGraph
neo4j_graph = Neo4jGraph(url="bolt://localhost:8687", username="neo4j", password="quanghung2004")

In [2]:
from database.ChromaDB import ChromaDB
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)

In [3]:
model_name = 'BAAI/bge-small-en-v1.5'
model = SentenceTransformerEmbeddings(model_name=model_name)
directory = 'temp'

db_cache_city = ChromaDB(data_path = f'{directory}/city.db', model=model)
db_cache_role = ChromaDB(data_path = f'{directory}/role.db', model=model)
db_cache_language = ChromaDB(data_path = f'{directory}/language.db', model=model)
db_cache_institution = ChromaDB(data_path = f'{directory}/institution.db', model=model)
db_cache_education = ChromaDB(data_path = f'{directory}/education.db', model=model)
db_cache_major = ChromaDB(data_path = f'{directory}/major.db', model=model)
db_cache_skills = ChromaDB(data_path = f'{directory}/skills.db', model=model)
db_cache_programming = ChromaDB(data_path = f'{directory}/programming.db', model=model)

In [4]:
class UnionFind:
    def __init__(self, num_nodes):
        self.parent = [i for i in range(num_nodes)]
        self.rank = [0 for i in range(num_nodes)]
    def find(self, x):
        if self.parent[x] != x:
            self.parent[x] = self.find(self.parent[x])
        return self.parent[x]
    def union(self, x, y):
        root_x = self.find(x)
        root_y = self.find(y)
        if root_x != root_y:
            if self.rank[root_x] > self.rank[root_y]:
                self.parent[root_y] = root_x
            else:
                self.parent[root_x] = root_y
                if self.rank[root_x] == self.rank[root_y]:
                    self.rank[root_y] += 1
    def add_node(self, x, y):
        self.union(x, y)
        
    def get_clusters(self):
        clusters = {}
        for i in range(len(self.parent)):
            root = self.find(i)
            if root not in clusters:
                clusters[root] = []
            clusters[root].append(i)
        return clusters
        
        

In [5]:
cities = neo4j_graph.query(
    """
    MATCH (c:City)
    RETURN c.name as name
    """
)
cities = [city["name"] for city in cities]

In [6]:
cities

['ha noi',
 'ho chi minh',
 'can tho',
 'richardson',
 'da nang',
 'bien hoa',
 'quang ninh',
 'charlotte',
 'thu duc',
 'viet nam',
 'district 5',
 'yên nghĩa']

In [7]:
languages = neo4j_graph.query(
    """
    MATCH (c:Language)
    RETURN c.name as name
    """
)
languages = [language["name"] for language in languages]

In [8]:
institution = neo4j_graph.query(
    """
    MATCH (c:Institution)
    RETURN distinct(c.name) as name
    """
)
institution = [ins["name"] for ins in institution]

In [9]:
len(institution)

115

In [10]:
skills = neo4j_graph.query(
    """
    MATCH (c:Skill)
    RETURN Distinct(c.name) as name
    """
)
skills = [skill["name"] for skill in skills]

In [11]:
len(skills)

543

In [12]:
education = neo4j_graph.query(
    """
    MATCH (c:Academic)
    RETURN c.name as name
    """
)
education = [edu["name"] for edu in education]

In [13]:
programming = neo4j_graph.query(
    """
    MATCH (p:ProgrammingLanguage)
    RETURN p.name as name
    """
)

programming = [p["name"] for p in programming]

In [14]:
len(programming)

166

In [15]:
roles = neo4j_graph.query(
    """
    MATCH (p:Role)
    RETURN p.name as name
    """
)

roles = [p["name"] for p in roles]

In [16]:
len(roles)

293

In [17]:
majors = neo4j_graph.query(
    """
    MATCH (p:Major)
    RETURN p.name as name
    """
)

majors = [p["name"] for p in majors]

In [18]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

def analyze_similarity(texts, db_cache, threshold = 0.9):
    text_id = dict()
    similarity = dict()
    i = 0
    for text in texts:
        text_id[text] = i
        i += 1
    score = np.zeros((len(texts), len(texts)))
    for i in range(len(texts)):
        text = texts[i]
        result = db_cache.similarity_search_with_relevance_scores(text, len(texts))
        for item in result:
            score[i , text_id[item[0].page_content]] = item[1]
            if i != text_id[item[0].page_content]:
                if item[1] > threshold:
                    if text not in similarity:
                        similarity[text] = []
                    similarity[text].append(item[0].page_content)
    
    # sns.heatmap(score, annot=True, cmap="viridis", cbar=True)
    # plt.xticks(np.arange(len(texts)), texts, rotation=90)
    # plt.yticks(np.arange(len(texts)), texts, rotation=0)
    # plt.show()
    return score, similarity
    

In [19]:
def cluster_changes(role_similar):
    role_id = dict()
    node_role = dict()
    id = 0
    items = []
    for k, v in role_similar.items():
        if k not in role_id:
            role_id[k] = id
            items.append(k)
            id += 1
        for item in v:
            if item not in role_id:
                role_id[item] = id
                items.append(item)
                id += 1
                
        node_role[role_id[k]] = [role_id[item] for item in v]
        
    uf = UnionFind(len(role_id))
    for k, v in node_role.items():
        for item in v:
            uf.add_node(k, item)

    cluster = uf.get_clusters()
    cluster_text = dict()
    for k, v in cluster.items():
        cluster_text[items[k]] = [items[item] for item in v]
    return cluster_text


In [20]:
city_score, city_similar = analyze_similarity(cities, db_cache_city, 0.8)

In [21]:
city_change = cluster_changes(city_similar)
city_change

{}

In [22]:
education_score, education_similarity = analyze_similarity(education, db_cache_education)

In [23]:
education_similarity

{}

In [28]:
role_score, role_similar = analyze_similarity(roles, db_cache_role, 0.86)

In [29]:
role_change = cluster_changes(role_similar)
role_change



{'operations manager': ['operations officer',
  'operations manager',
  'operations management'],
 'backend developer ': ['backend developer',
  'backend developer ',
  'back end developer',
  'back-end developer',
  'backend java web developer',
  'node.js backend developer',
  'back-end development',
  'back end web developer',
  'back-end programming'],
 'full stack developer': ['full-stack developer',
  'full stack developer',
  'full-stack web developer',
  'full stack web developer',
  'full-stack java web developer'],
 'event hosting & organization': ['event organization',
  'event hosting & organization',
  'event organizer'],
 'content creator': ['content creation', 'content creator'],
 'social media manager': ['social media management', 'social media manager'],
 'project manager': ['project management',
  'project manager',
  'project manager leader',
  'project leader'],
 'content writing': ['content writer', 'content writing'],
 'hr manager': ['hr admin manager', 'hr manage

In [38]:
programming_score, programming_similar = analyze_similarity(programming, db_cache_programming,0.80)

In [39]:
change_programming = cluster_changes(programming_similar)

In [40]:
change_programming

{'bootstrap 5.0': ['bootstrap 5', 'bootstrap 5.0', 'bootstrap'],
 'angular': ['angular2', 'angular', 'angular 12'],
 'php laravel': ['laravel', 'php laravel', 'laravel framework'],
 'css3': ['css', 'css3'],
 'postgressql': ['postgresql', 'postgressql', 'postgres'],
 'apache kafka': ['kafka', 'apache kafka'],
 'java spring boot': ['spring boot',
  'java spring boot',
  'springboot',
  'java springboot'],
 'sql': ['sql server', 'sql', 'ms sql'],
 'asp.net core mvc': ['asp.net core',
  'asp.net core mvc',
  '.net core',
  'asp.net core api',
  'asp.net web api',
  'asp.net mvc'],
 'c#.net': ['c#', 'c#.net', '.net'],
 'tailwindcss framework': ['tailwindcss',
  'tailwindcss framework',
  'tailwind css'],
 'ajax jquery': ['jquery', 'ajax jquery', 'ajax'],
 'firebase firestore': ['firebase', 'firebase firestore'],
 'apache spark': ['pyspark',
  'apache pyspark',
  'apache hadoop',
  'apache spark',
  'spark'],
 'redux': ['redux toolkit', 'redux', 'redux saga'],
 'flutter': ['dart', 'flutter']

In [41]:
languages_score, languages_similar = analyze_similarity(languages, db_cache_language, 0.9)

In [42]:
institution_score, institution_similar = analyze_similarity(institution, db_cache_institution, 0.92)

In [43]:
institution_similar

{'fpt university ho chi minh city': ['fpt university ho chi minh'],
 'hcmc university of technology': ['hcmc university of technology and education'],
 'ho chi minh city open university': ['open university ho chi minh city'],
 'national economics university': ['national economic university',
  'national economics university (neu)'],
 'national economic university': ['national economics university'],
 'fpt polytechnic': ['fpt polytechnic college'],
 'finance academy': ['academy of finance'],
 'ho chi minh city university of technology': ['ho chi minh city university of technology and education',
  'ho chi minh university of technology'],
 'fpt polytechnic college': ['fpt polytechnic', 'polytechnic fpt colleges'],
 'university of information technology - vietnam national university': ['university of information technology - vietnam national university ho chi minh city'],
 'saigon university': ['the saigon international university'],
 'ho chi minh city university of technology and educati

In [44]:
institution_similar = {'fpt university ho chi minh city': ['fpt university ho chi minh'],
 'ho chi minh city open university': ['open university ho chi minh city'],
 'national economics university': ['national economic university',
  'national economics university (neu)'],
 'national economic university': ['national economics university'],
 'fpt polytechnic': ['fpt polytechnic college'],
 'finance academy': ['academy of finance'],
 'ho chi minh city university of technology': [
  'ho chi minh university of technology'],
 'fpt polytechnic college': ['fpt polytechnic', 'polytechnic fpt colleges'],
 'university of information technology - vietnam national university': ['university of information technology - vietnam national university ho chi minh city'],
 'national economics university (neu)': ['national economics university'],
 'fpt university ho chi minh': ['fpt university ho chi minh city'],
 'polytechnic fpt colleges': ['fpt polytechnic college'],
 'university of information technology - vietnam national university ho chi minh city': ['university of information technology - vietnam national university'],
 'ho chi minh university of technology': ['ho chi minh city university of technology'],
 'academy of finance': ['finance academy'],
 'open university ho chi minh city': ['ho chi minh city open university']}

In [45]:
change_institution = cluster_changes(institution_similar)

In [46]:
change_institution

{'fpt university ho chi minh': ['fpt university ho chi minh city',
  'fpt university ho chi minh'],
 'open university ho chi minh city': ['ho chi minh city open university',
  'open university ho chi minh city'],
 'national economic university': ['national economics university',
  'national economic university',
  'national economics university (neu)'],
 'fpt polytechnic college': ['fpt polytechnic',
  'fpt polytechnic college',
  'polytechnic fpt colleges'],
 'academy of finance': ['finance academy', 'academy of finance'],
 'ho chi minh university of technology': ['ho chi minh city university of technology',
  'ho chi minh university of technology'],
 'university of information technology - vietnam national university ho chi minh city': ['university of information technology - vietnam national university',
  'university of information technology - vietnam national university ho chi minh city']}

In [49]:
majors_score, majors_similar = analyze_similarity(majors, db_cache_major, 0.83)

In [50]:
change_major = cluster_changes(majors_similar)

In [52]:
change_major

{'information and communication technology': ['information technology',
  'information and communication technology'],
 'business administrative': ['business administration',
  'business administrative'],
 'computer science and data science': ['computer science',
  'computer science and data science'],
 'international business - foreign trade': ['international business',
  'international business - foreign trade',
  'international business administration',
  'international business marketing',
  'international bussiness'],
 'data science in economics and business': ['data science in economic and business',
  'data science in economics and business',
  'data science and artificial intelligence'],
 'business analytics and marketing': ['business analytics',
  'business analytics and marketing'],
 'computer programming - mobile devices': ['mobile device computer programming',
  'computer programming - mobile devices',
  'computer programming for mobile devices',
  'mobile programming'],
 '

In [53]:
skills_score, skills_similar = analyze_similarity(skills, db_cache_skills, 0.81)

In [55]:
change_skills = cluster_changes(skills_similar)

In [56]:
change_skills

{'content writing': ['writing', 'content writing', 'seo content writing'],
 'data analysis': ['analysis',
  'data analysis',
  'statistical analysis',
  'financial analysis',
  'business analysis',
  'system analysis',
  'regression analysis',
  'data analysis skills',
  'data analytics',
  'financial statement analysis'],
 'teamwork skills': ['teamwork',
  'teamwork skills',
  'team work',
  'teamwork abilities',
  'good teamwork'],
 'organization': ['work organization', 'organization'],
 'task management': ['task coordination',
  'task management',
  'functional coordination'],
 'team leadership': ['team management',
  'team leadership',
  'automation team management',
  'leadership'],
 'legal compliance': ['compliance', 'legal compliance'],
 'analytical thinking': ['analytical skills', 'analytical thinking'],
 'communicating': ['communication',
  'communicating',
  'effective communication',
  'business communication'],
 'scripting': ['scriptwriting', 'scripting'],
 'video recording

In [57]:
query = """
        MATCH (a: ProgrammingLanguage {name: $form.node_val1}), (b: ProgrammingLanguage {name: $form.node_val2})

            MATCH (other)-[r:PROGRAMMING]->(b)
            MERGE (other)-[r2:PROGRAMMING]->(a)
            SET r2 = r
        
        DETACH delete b
        return r2.exp
        """
# neo4j_graph.query(query)

In [92]:
# form = {
#     # "node_type": "Award",
#     "node_val1": "spring boot",
#     "node_val2": "java spring boot"
#     # "relationship": "AWARD"
# }
# neo4j_graph.query(query, params={"form": form})

In [58]:
change_institution

{'fpt university ho chi minh': ['fpt university ho chi minh city',
  'fpt university ho chi minh'],
 'open university ho chi minh city': ['ho chi minh city open university',
  'open university ho chi minh city'],
 'national economic university': ['national economics university',
  'national economic university',
  'national economics university (neu)'],
 'fpt polytechnic college': ['fpt polytechnic',
  'fpt polytechnic college',
  'polytechnic fpt colleges'],
 'academy of finance': ['finance academy', 'academy of finance'],
 'ho chi minh university of technology': ['ho chi minh city university of technology',
  'ho chi minh university of technology'],
 'university of information technology - vietnam national university ho chi minh city': ['university of information technology - vietnam national university',
  'university of information technology - vietnam national university ho chi minh city']}

In [60]:
i = 0
for keep, changes in change_institution.items():
    for change in changes:
        
        query = """
        MATCH (a: Institution  {name: $form.node_val1}), (b: Institution  {name: $form.node_val2})

            MATCH (other)-[r:STUDY]->(b)
            MERGE (other)-[r2:STUDY]->(a)
            SET r2 = r
            DELETE r
        
        DETACH delete b
        return a
        """
        
        if keep != change:
            form = {

                "node_val1": keep,
                "node_val2": change,

            }
            # print(keep, change)
            result = neo4j_graph.query(query, params={"form": form})
            print(result)
            i+=1

[]
[]
[]
[]
[]
[]
[]
[]
[]


In [61]:
i = 0
for keep, changes in change_programming.items():
    for change in changes:
        
        query = """
        MATCH (a: ProgrammingLanguage {name: $form.node_val1}), (b: ProgrammingLanguage {name: $form.node_val2})

            MATCH (other)-[r:PROGRAMMING]->(b)
            MERGE (other)-[r2:PROGRAMMING]->(a)
            SET r2 = r
            DELETE r
        
        DETACH delete b
        return a.name, r2.exp
        """

            
        
        if keep != change:
            form = {

                "node_val1": keep,
                "node_val2": change,

            }
            result = neo4j_graph.query(query, params={"form": form})
            print(result)
            i+=1

[{'a.name': 'bootstrap 5.0', 'r2.exp': 1}, {'a.name': 'bootstrap 5.0', 'r2.exp': 1}, {'a.name': 'bootstrap 5.0', 'r2.exp': 1.0}]
[{'a.name': 'bootstrap 5.0', 'r2.exp': 0.5}, {'a.name': 'bootstrap 5.0', 'r2.exp': 0.5}, {'a.name': 'bootstrap 5.0', 'r2.exp': 0.75}, {'a.name': 'bootstrap 5.0', 'r2.exp': 2.5}, {'a.name': 'bootstrap 5.0', 'r2.exp': None}, {'a.name': 'bootstrap 5.0', 'r2.exp': 1.0}, {'a.name': 'bootstrap 5.0', 'r2.exp': 1}, {'a.name': 'bootstrap 5.0', 'r2.exp': 1}, {'a.name': 'bootstrap 5.0', 'r2.exp': 1}, {'a.name': 'bootstrap 5.0', 'r2.exp': 0.5}]
[{'a.name': 'angular', 'r2.exp': 1.0}]
[{'a.name': 'angular', 'r2.exp': 0.5}]
[{'a.name': 'php laravel', 'r2.exp': 0.5}, {'a.name': 'php laravel', 'r2.exp': 0.5}, {'a.name': 'php laravel', 'r2.exp': 1}, {'a.name': 'php laravel', 'r2.exp': None}, {'a.name': 'php laravel', 'r2.exp': 0.5}, {'a.name': 'php laravel', 'r2.exp': 1.0}, {'a.name': 'php laravel', 'r2.exp': 1.5}, {'a.name': 'php laravel', 'r2.exp': None}, {'a.name': 'php lar

In [62]:
i = 0
for keep, changes in change_major.items():
    for change in changes:
        
        query = """
        MATCH (a: Major {name: $form.node_val1}), (b: Major {name: $form.node_val2})

            MATCH (other)-[r:MAJOR ]->(b)
            MERGE (other)-[r2:MAJOR ]->(a)
            SET r2 = r
            DELETE r
        
        DETACH delete b
        return r2.level
        """

            
        
        if keep != change:
            form = {

                "node_val1": keep,
                "node_val2": change,

            }
            result = neo4j_graph.query(query, params={"form": form})
            print(result)
            i+=1

[{'r2.level': 'bachelor'}, {'r2.level': 'bachelor'}, {'r2.level': 'bachelor'}, {'r2.level': 'bachelor'}, {'r2.level': 'bachelor'}, {'r2.level': 'bachelor'}, {'r2.level': 'undergraduate'}, {'r2.level': 'bachelor'}, {'r2.level': 'bachelor'}, {'r2.level': 'bachelor'}, {'r2.level': 'bachelor'}, {'r2.level': 'undergraduate'}, {'r2.level': 'undergraduate'}, {'r2.level': 'bachelor'}, {'r2.level': 'undergraduate'}, {'r2.level': 'bachelor'}, {'r2.level': 'bachelor'}, {'r2.level': 'bachelor'}, {'r2.level': 'bachelor'}, {'r2.level': 'bachelor'}, {'r2.level': 'undergraduate'}, {'r2.level': 'bachelor'}, {'r2.level': 'bachelor'}, {'r2.level': 'bachelor'}, {'r2.level': 'bachelor'}, {'r2.level': 'bachelor'}, {'r2.level': 'bachelor'}, {'r2.level': 'undergraduate'}, {'r2.level': 'bachelor'}, {'r2.level': 'bachelor'}, {'r2.level': 'bachelor'}]
[{'r2.level': 'bachelor'}, {'r2.level': 'bachelor'}, {'r2.level': 'bachelor'}, {'r2.level': 'certificate'}, {'r2.level': 'bachelor'}, {'r2.level': 'bachelor'}, {'r

In [63]:
i = 0
for keep, changes in change_skills.items():
    for change in changes:
        
        query = """
        MATCH (a: Skill {name: $form.node_val1}), (b: Skill {name: $form.node_val2})

            MATCH (other)-[r:SKILL ]->(b)
            MERGE (other)-[r2:SKILL ]->(a)
            SET r2 = r
            DELETE r
        
        DETACH delete b
        return a
        """

            
        
        if keep != change:
            form = {

                "node_val1": keep,
                "node_val2": change,

            }
            result = neo4j_graph.query(query, params={"form": form})
            print(result)
            i+=1

[{'a': {'name': 'content writing'}}, {'a': {'name': 'content writing'}}, {'a': {'name': 'content writing'}}, {'a': {'name': 'content writing'}}]
[{'a': {'name': 'content writing'}}]
[{'a': {'name': 'data analysis'}}, {'a': {'name': 'data analysis'}}]
[{'a': {'name': 'data analysis'}}]
[{'a': {'name': 'data analysis'}}]
[{'a': {'name': 'data analysis'}}, {'a': {'name': 'data analysis'}}]
[{'a': {'name': 'data analysis'}}]
[{'a': {'name': 'data analysis'}}]
[{'a': {'name': 'data analysis'}}]
[{'a': {'name': 'data analysis'}}]
[{'a': {'name': 'data analysis'}}, {'a': {'name': 'data analysis'}}]
[{'a': {'name': 'teamwork skills'}}, {'a': {'name': 'teamwork skills'}}, {'a': {'name': 'teamwork skills'}}, {'a': {'name': 'teamwork skills'}}, {'a': {'name': 'teamwork skills'}}, {'a': {'name': 'teamwork skills'}}, {'a': {'name': 'teamwork skills'}}, {'a': {'name': 'teamwork skills'}}, {'a': {'name': 'teamwork skills'}}, {'a': {'name': 'teamwork skills'}}, {'a': {'name': 'teamwork skills'}}, {'a'

In [64]:
i = 0
for keep, changes in role_change.items():
    for change in changes:
        
        query = """
        MATCH (a: Role {name: $form.node_val1}), (b: Role {name: $form.node_val2})
        With a,b
            OPTIONAL MATCH (other)-[r:ROLE]->(b)
            FOREACH (r in CASE WHEN other IS NULL THEN [] ELSE [r] END |
                MERGE (other)-[r2:ROLE]->(a)
                SET r2 = r
                DELETE r
            )
            
        
        With a,b
            OPTIONAL MATCH (other2)-[r3:SUITABLE]->(b)
            FOREACH (r3 in CASE WHEN other2 IS NULL THEN [] ELSE [r3] END |
                MERGE (other2)-[r4:SUITABLE]->(a)
                SET r4 = r3
                DELETE r3
            )
        
        DETACH delete b
        return a
        """


        if keep != change:
            form = {

                "node_val1": keep,
                "node_val2": change,

            }
            result = neo4j_graph.query(query, params={"form": form})
            print(result)
            i+=1

[{'a': {'name': 'operations manager'}}]
[{'a': {'name': 'operations manager'}}]
[{'a': {'name': 'backend developer '}}, {'a': {'name': 'backend developer '}}, {'a': {'name': 'backend developer '}}, {'a': {'name': 'backend developer '}}, {'a': {'name': 'backend developer '}}, {'a': {'name': 'backend developer '}}, {'a': {'name': 'backend developer '}}, {'a': {'name': 'backend developer '}}, {'a': {'name': 'backend developer '}}, {'a': {'name': 'backend developer '}}, {'a': {'name': 'backend developer '}}, {'a': {'name': 'backend developer '}}, {'a': {'name': 'backend developer '}}, {'a': {'name': 'backend developer '}}, {'a': {'name': 'backend developer '}}, {'a': {'name': 'backend developer '}}, {'a': {'name': 'backend developer '}}, {'a': {'name': 'backend developer '}}, {'a': {'name': 'backend developer '}}, {'a': {'name': 'backend developer '}}, {'a': {'name': 'backend developer '}}, {'a': {'name': 'backend developer '}}, {'a': {'name': 'backend developer '}}, {'a': {'name': 'backen

In [65]:
['national economics university',
  'national economic university',
  'national economics university (neu)']

['national economics university',
 'national economic university',
 'national economics university (neu)']

In [66]:
query = """
MATCH (n:Application )-[r:STUDY]-> (a:Institution  {name: "national economic university"})
return id(n)
"""
neo4j_graph.query(query)

[{'id(n)': 1067},
 {'id(n)': 972},
 {'id(n)': 1670},
 {'id(n)': 1220},
 {'id(n)': 748},
 {'id(n)': 1425},
 {'id(n)': 730},
 {'id(n)': 1525},
 {'id(n)': 1786},
 {'id(n)': 679},
 {'id(n)': 1739},
 {'id(n)': 1388},
 {'id(n)': 1319},
 {'id(n)': 637},
 {'id(n)': 1936},
 {'id(n)': 1437},
 {'id(n)': 1542},
 {'id(n)': 919},
 {'id(n)': 1104},
 {'id(n)': 1640},
 {'id(n)': 1939},
 {'id(n)': 269},
 {'id(n)': 996},
 {'id(n)': 282}]

In [67]:
query = """
MATCH (n:Application)-[r:PROGRAMMING]-> (a:ProgrammingLanguage {name: "spring boot"})
return id(n), r.exp
"""
neo4j_graph.query(query)

[]

In [68]:
query = """
MATCH  (n:Application)-[r:PROGRAMMING]->(a:ProgrammingLanguage {name: "java spring boot"})
return id(n), r.exp

"""
neo4j_graph.query(query)

[{'id(n)': 1753, 'r.exp': 0.5},
 {'id(n)': 549, 'r.exp': 0.5},
 {'id(n)': 114, 'r.exp': 1},
 {'id(n)': 1200, 'r.exp': 2},
 {'id(n)': 1956, 'r.exp': 0.5},
 {'id(n)': 1675, 'r.exp': 2},
 {'id(n)': 1846, 'r.exp': 0.75},
 {'id(n)': 1837, 'r.exp': 0.5},
 {'id(n)': 1017, 'r.exp': 0.5},
 {'id(n)': 1398, 'r.exp': 0.5},
 {'id(n)': 946, 'r.exp': 1},
 {'id(n)': 570, 'r.exp': 1.5}]

In [100]:
certifications = neo4j_graph.query(
    """
    MATCH (p:Certification)
    RETURN p.name as name
    """
)

certifications = [p["name"] for p in certifications]

awards = neo4j_graph.query(
    """
    MATCH (p:Award)
    RETURN p.name as name
    """
)

awards = [p["name"] for p in awards]

In [101]:
awards

['First prize for scientific research at the city level with the topic "Building a business model for a youth-oriented enterprise"',
 'Problem Solving Skills (PACE Institute of Management)',
 'Management for Middle Managers',
 '3Ps - Deployment techniques & salary system construction (Personal course)',
 'HUST EXCELLENT STUDENT 20221',
 'FPT Entrepreneurial Hackathon 2022 - Most Promising Group',
 'President of VSB - Vietsoc Brighton (Vietnamese Student Association in Brighton, UK)',
 'Leader of the Media Team at Sonder x Masterdating',
 'DataCamp | SQL Server Developer',
 'DataCamp | SQL Server for Database Administrators',
 'Microsoft | Microsoft Azure Data Engineer Associate (DP-203)',
 'DeepLearning.AI | Natural Language Processing in TensorFlow',
 'CFA Institute | Data Science for Investment Professionals',
 'HackerRank SQL, Python (Basic) Certificate',
 'HackerRank Problem Solving (Intermediate) Certificate',
 'Coursera Google Data Analytics Certificate',
 'Coursera IBM Data Anal

In [102]:
db_cache_certifications = ChromaDB(data_path = f'{directory}/certifications.db', model=model)
db_cache_awards = ChromaDB(data_path = f'{directory}/awards.db', model=model)

In [103]:
neo4j_graph.refresh_schema()
print(neo4j_graph.schema)

Node properties:
Application {name: STRING, phone: STRING, email: STRING, summary: STRING, work_summary: STRING, education_summary: STRING, project_summary: STRING, file: STRING, birth: INTEGER, address: STRING}
Language {name: STRING}
Institution {name: STRING}
Academic {name: STRING}
Major {name: STRING}
Role {name: STRING}
Award {name: STRING}
Company {name: STRING}
Skill {name: STRING}
ProgrammingLanguage {name: STRING}
Certification {name: STRING}
City {name: STRING}
Relationship properties:
ACADEMIC {GPA: FLOAT, is_GPA: BOOLEAN}
MAJOR {level: STRING}
WORK {duration: FLOAT}
ROLE {exp: FLOAT}
PROGRAMMING {exp: FLOAT}
The relationships:
(:Application)-[:SKILL]->(:Skill)
(:Application)-[:LIVE_IN]->(:City)
(:Application)-[:SPEAK]->(:Language)
(:Application)-[:STUDY]->(:Institution)
(:Application)-[:ACADEMIC]->(:Academic)
(:Application)-[:MAJOR]->(:Major)
(:Application)-[:SUITABLE]->(:Role)
(:Application)-[:WORK]->(:Company)
(:Application)-[:ROLE]->(:Role)
(:Application)-[:PROGRAMMING]

In [104]:
# db_cache_certifications.add_texts(certifications)
# db_cache_awards.add_texts(awards)

In [105]:
awards_score, awards_similar = analyze_similarity(awards, db_cache_awards, 0.9)

In [106]:
awards_similar

{}

In [107]:
certifications_score, certifications_similar = analyze_similarity(certifications, db_cache_certifications, 0.9)

In [108]:
certifications_similar

{}