## Evaluation of Knowledge Graph for RAG

In [1]:
from database.GraphDB import KnowledgeGraphDB
from retrieval.retrieval_hub import RetrievalHub
from query_and_ranking.query import Query
from query_and_ranking.rerank import Reranker
from agentic import Agent
from tqdm import tqdm

2024-06-15 01:39:40.704289: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-15 01:39:40.704322: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-15 01:39:40.705231: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
import os 
from dotenv import load_dotenv
load_dotenv()

neo4j_uri = os.getenv('NEO4J_URI')
neo4j_user = os.getenv('NEO4J_USER')
neo4j_password = os.getenv('NEO4J_PASSWORD')

In [3]:
kg = KnowledgeGraphDB(uri=neo4j_uri, user=neo4j_user, password=neo4j_password)
retrieval_hub = RetrievalHub(kg)
reranker = Reranker()
cv_query = Query(kg, retrieval_hub)

In [4]:
chat_agent = Agent(kg, retrieval_hub, reranker, cv_query, routing='aws',jd_extraction='gemini')

In [5]:
import json
questions = json.loads(open('data/benchmark/qa_cv.json').read())

In [6]:
answers = []
i = 0

for question in tqdm(questions):
    prompt = question['prompt']
    answer = dict()
    answer['prompt'] = prompt
    answer['answer'] = question['answer']
    try:
        res = chat_agent.get_cv_from_jd(prompt)
        ans = res['files']
        answer['predict'] = [a.replace('data/raw/data/clean4.0/', '') for a in ans]
    except:
        print('Error')
        answer['predict'] = []
    answers.append(answer)
    with open('data/benchmark/qa_cv_kg.json', 'w') as f:
        f.write(json.dumps(answers))
    i += 1


  0%|          | 0/39 [00:00<?, ?it/s]

Extracting Job Description
End LLM
New Query:  {'cityList': ['ha noi'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma']}
Extended Num Application:  4
Reduce CV  61  ->  4  CV
New Query:  {'cityList': ['ha noi'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma']}
Still more -> Reduce CV
Risk:  True
Num CV ok:  4
Rerank
Reranking
[1593, 222, 2004]


  3%|▎         | 1/39 [00:06<03:53,  6.14s/it]

Time: 6.134795188903809
Extracting Job Description
End LLM
New Query:  {'cityList': ['ho chi minh'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['marketing manager', 'marketing officer', 'marketing assistant', 'marketing analyst'], 'suitableList': ['marketing manager', 'marketing officer', 'digital marketing manager', 'e-commerce specialist', 'marketing assistant', 'marketing analyst', 'marketing executive', 'digital marketing specialist', 'media specialist', 'sales coordinator', 'digital marketing ', 'marketing staff', 'sales associate']}
Extended Num Application:  4


  5%|▌         | 2/39 [00:11<03:40,  5.95s/it]

Not enough application
New Query:  {'cityList': ['ho chi minh'], 'suitableList': ['marketing manager', 'marketing officer', 'marketing team leader', 'digital marketing manager', 'e-commerce specialist', 'marketing assistant', 'marketing analyst', 'marketing executive', 'digital marketing specialist', 'media specialist', 'sales coordinator', 'digital marketing ', 'research associate', 'marketing staff', 'sales associate']}
[841, 618, 1560, 1629, 1958, 468]
More -> Reduce CV
Num CV ok:  4
Rerank
Reranking
[1958, 618, 841]
Time: 5.827220439910889
Extracting Job Description


  8%|▊         | 3/39 [00:16<03:09,  5.27s/it]

End LLM
New Query:  {'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['marketing manager', 'marketing assistant', 'marketing analyst', 'marketing executive'], 'role_exp': 0.0, 'suitableList': ['marketing manager', 'marketing officer', 'marketing assistant', 'marketing analyst', 'marketing executive', 'marketing staff']}
Extended Num Application:  10
Num CV ok:  7
Rerank
[1139, 1513, 1749, 468, 786, 8, 972]
Time: 4.462765455245972
Extracting Job Description
End LLM
New Query:  {'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['odoo developer'], 'role_exp': 2.5, 'skillList': ['api development', 'frontend development', 'ar development', 'game development', 'software development', 'tool development'], 'suitableList': ['net developer', 'java developer', 'php developer', ' web developer', 'software engineer', 'android developer', 'python developer', 'it developer', 'software developer', 'develop

 10%|█         | 4/39 [00:22<03:09,  5.42s/it]

Not enough application
New Query:  {'roleList': ['unity developer', 'java developer', 'python developer', 'odoo developer', 'developer'], 'role_exp': 2.25, 'skillList': ['chatbot development', 'api development', 'frontend development', 'ar development', 'creative concept development', 'game development', 'software development', 'tool development'], 'suitableList': ['net developer', 'java developer', 'php developer', ' web developer', 'software engineer', 'android developer', 'python developer', 'it developer', 'software developer', 'developer']}
[]
Less -> Sematic Search
Using Sematic Search
Num CV ok:  2
Rerank
[1092, 776]
Time: 5.631318092346191
Extracting Job Description
End LLM
New Query:  {'institutionList': ['national economics university (neu)'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma']}
Extended Num Application:  7
Reduce CV  24  ->  7  CV
New Query:  {'institutionList': ['national economics university (neu)'], 'eduList': ['phd', 'bach

 13%|█▎        | 5/39 [00:27<03:06,  5.49s/it]

[495, 463, 1274, 406, 1690]
Time: 5.632444620132446
Extracting Job Description
End LLM
New Query:  {'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['data science / artificial intelligence', 'data scientist', 'data analyst', 'research scientist'], 'role_exp': 0.5, 'suitableList': ['nlp engineer', 'ai engineer', 'data analysis', 'financial market data analyst', 'research engineer', 'analyst', 'statistician', 'data science / artificial intelligence', 'machine learning engineer', 'data scientist', 'data analyst', 'research analyst']}
Extended Num Application:  14
Reduce CV  19  ->  14  CV
New Query:  {'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['data scientist'], 'role_exp': 0.5, 'suitableList': ['ai engineer', 'data analysis', 'financial market data analyst', 'analyst', 'statistician', 'machine learning engineer', 'data scientist', 'data analyst']}
Risk:  True
Not enough application
Red

 15%|█▌        | 6/39 [00:32<02:56,  5.34s/it]

[1690, 606, 1428, 280, 270, 1456, 1604, 1291, 1165, 638, 1847]
Time: 5.038708209991455
Extracting Job Description


 18%|█▊        | 7/39 [00:37<02:43,  5.12s/it]

End LLM
New Query:  {'institutionList': ['hanoi university of science and technology', 'hanoi university of industry'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma']}
Extended Num Application:  7
Reduce CV  11  ->  7  CV
New Query:  {'institutionList': ['hanoi university of industry'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma']}
Risk:  True
Num CV ok:  5
Rerank
[1802, 1896, 2004, 651, 658]
Time: 4.655791282653809
Extracting Job Description


 21%|██        | 8/39 [00:41<02:28,  4.78s/it]

End LLM
New Query:  {}
Extended Num Application:  4
Error
Extracting Job Description
End LLM
New Query:  {'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['accountant', 'finance', 'auditor', 'account management']}
Extended Num Application:  3
Reduce CV  8  ->  3  CV
New Query:  {'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['accountant', 'auditor']}
Still more -> Reduce CV
Risk:  True
Num CV ok:  3
Rerank
Reranking


 23%|██▎       | 9/39 [00:46<02:24,  4.82s/it]

[138, 1857]
Time: 4.9114089012146
Extracting Job Description


 26%|██▌       | 10/39 [00:50<02:16,  4.72s/it]

End LLM
New Query:  {'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['editor', 'developer', 'trainee', 'research assistant'], 'role_exp': 0.0}
Extended Num Application:  1
Reduce CV  27  ->  1  CV
New Query:  {'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['editor', 'trainee'], 'role_exp': 0.0}
Still more -> Reduce CV
Risk:  True
Num CV ok:  1
Rerank
[345]
Time: 4.5090012550354
Extracting Job Description
End LLM
New Query:  {'institutionList': ['national economics university (neu)'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['analyst', 'data analyst', 'financial market data analyst', 'data analysis'], 'suitableList': ['data analysis', 'financial market data analyst', 'analyst', 'data scientist', 'data analyst', 'research analyst']}
Extended Num Application:  12
Not enough application
New Query:  {'institutionList': ['university of 

 28%|██▊       | 11/39 [00:56<02:22,  5.10s/it]

New Query:  {'institutionList': ['national economics university (neu)'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['data science / artificial intelligence', 'data scientist', 'data analyst', 'research scientist'], 'suitableList': ['financial market data analyst', 'research scientist', 'data science / artificial intelligence', 'machine learning engineer', 'data scientist', 'data analyst']}
Extended Num Application:  12
Not enough application
New Query:  {'institutionList': ['university of economics', 'national economics university (neu)'], 'suitableList': ['data analysis', 'financial market data analyst', 'research scientist', 'data science / artificial intelligence', 'machine learning engineer', 'data scientist', 'data analyst', 'scientist']}
[1214, 1690, 463, 1941, 2013, 495, 561, 732, 606, 280, 1291]
Num CV ok:  11
Rerank
Reranking
[1214, 124, 1291, 1690, 1941, 732, 2013, 561, 495, 463, 280, 606]
Time: 5.9673004150390625
Extract

 31%|███       | 12/39 [01:03<02:30,  5.59s/it]

Time: 6.685256481170654
Extracting Job Description


 33%|███▎      | 13/39 [01:07<02:16,  5.24s/it]

End LLM
New Query:  {'roleList': ['marketing manager', 'marketing officer', 'content marketing manager', 'digital marketing manager'], 'suitableList': ['marketing manager', 'marketing officer', 'brand manager', 'digital marketing manager', 'marketing executive', 'content marketing manager']}
Extended Num Application:  4
Num CV ok:  4
Rerank
Reranking
[972, 468, 786]
Time: 4.445738315582275
Extracting Job Description
End LLM
New Query:  {'institutionList': ['national economics university (neu)'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['machine learning engineer', 'dataops engineer', 'data analyst', 'data scientist']}
Extended Num Application:  4
Reduce CV  5  ->  4  CV
New Query:  {'institutionList': ['national economics university (neu)'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['machine learning engineer', 'dataops engineer', 'data analyst', 'data scientist']}
Still mor

 36%|███▌      | 14/39 [01:13<02:10,  5.24s/it]

[1690, 561, 1291]
Time: 5.229144096374512
Extracting Job Description
End LLM
New Query:  {'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['back-end developer', 'developer', 'front end developer', 'full stack developer'], 'role_exp': 3.0, 'suitableList': ['net developer', 'nlp engineer', 'ai engineer', 'front end developer', 'full stack developer', 'embedded systems engineer', 'software engineer', 'it developer', 'back-end developer', 'software developer', 'developer']}
Extended Num Application:  3
Reduce CV  5  ->  3  CV
New Query:  {'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['back-end developer', 'front end developer'], 'role_exp': 3.0, 'suitableList': ['front end developer', 'full stack developer', 'developer', 'back-end developer', 'software developer', 'software engineer']}
Risk:  True
Num CV ok:  3
Rerank
Reranking
[1579, 104]


 38%|███▊      | 15/39 [01:18<02:05,  5.24s/it]

Time: 5.231488466262817
Extracting Job Description


 41%|████      | 16/39 [01:25<02:10,  5.66s/it]

End LLM
New Query:  {'cityList': ['ho chi minh'], 'institutionList': ['ho chi minh city industry and trade college', 'ho chi minh city university of industry and trade'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma']}
Extended Num Application:  6
Not enough application
New Query:  {'cityList': ['ho chi minh'], 'institutionList': ['ho chi minh city industry and trade college', 'ho chi minh city university of industry and trade']}
[178, 191]
Less -> Sematic Search
Using Sematic Search
Num CV ok:  4
Rerank
[1530, 178, 1841, 191]
Time: 6.642096281051636
Extracting Job Description


 44%|████▎     | 17/39 [01:30<02:04,  5.68s/it]

End LLM
New Query:  {'institutionList': ['ho chi minh city open university'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma']}
Extended Num Application:  7
Num CV ok:  6
Rerank
Reranking
[548, 1240, 1669, 1604, 1193]
Time: 5.718532085418701
Extracting Job Description
End LLM
New Query:  {'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['data analysis', 'financial market data analyst', 'finance analyst', 'business analyst', 'analyst', 'sales analyst', 'data analyst'], 'role_exp': 0.5, 'suitableList': ['data analysis', 'financial market data analyst', 'finance analyst', 'business analyst', 'analyst', 'marketing analyst', 'sales analyst', 'data scientist', 'data analyst', 'research analyst']}
Extended Num Application:  4
Reduce CV  18  ->  4  CV
New Query:  {'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['business analyst', 'data analyst', 'financial mark

 46%|████▌     | 18/39 [01:35<01:52,  5.34s/it]

[548, 1428, 135]
Time: 4.551875591278076
Extracting Job Description
End LLM
New Query:  {'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['embedded systems engineer', 'developer', 'embedded developer', 'it developer'], 'suitableList': ['ai engineer', 'fullstack engineer', 'embedded systems engineer', 'it developer', 'software developer', 'software engineer', 'embedded developer', 'supplier quality engineer']}
Extended Num Application:  3
Reduce CV  12  ->  3  CV
New Query:  {'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['embedded developer'], 'suitableList': ['ai engineer', 'fullstack engineer', 'embedded systems engineer', 'software developer', 'software engineer', 'embedded developer', 'supplier quality engineer']}
Risk:  True
Not enough application
Reduce Risk -> Get more CV
New Query:  {'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': 

 49%|████▊     | 19/39 [01:40<01:45,  5.26s/it]

[642, 2073]
Time: 5.090407133102417
Extracting Job Description


 51%|█████▏    | 20/39 [01:45<01:37,  5.14s/it]

End LLM
New Query:  {'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['it support'], 'suitableList': ['customer service staff', 'change management specialist', 'e-commerce specialist', 'communication specialist', 'digital marketing specialist', 'office assistant', 'research assistance', 'it developer', 'receptionist', 'customer service manager', 'it support']}
Extended Num Application:  3
Num CV ok:  2
Rerank
[1530, 178]
Time: 4.8510096073150635
Extracting Job Description
End LLM
New Query:  {'cityList': ['ha noi'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['mobile app developer', 'android developer', ' web developer', 'developer'], 'suitableList': ['flutter developer', 'unity developer', 'java developer', 'mobile app developer', ' web developer', 'android developer', 'software developer', 'developer']}
Extended Num Application:  4
Reduce CV  13  ->  4  CV
New Query:  {'cityList': [

 54%|█████▍    | 21/39 [01:50<01:32,  5.14s/it]

[651, 222, 2004]
Time: 5.1399126052856445
Extracting Job Description
End LLM
New Query:  {'cityList': ['ha noi'], 'institutionList': ['hanoi university of science and technology', 'hanoi university of business and technology'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma']}
Extended Num Application:  8
Num CV ok:  7
Rerank
Reranking


 56%|█████▋    | 22/39 [01:55<01:29,  5.26s/it]

[345, 222, 2083, 19, 1733, 1104]
Time: 5.553864002227783
Extracting Job Description
End LLM
New Query:  {'cityList': ['ho chi minh'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['back-end developer', 'developer', 'front end developer', 'full stack developer'], 'role_exp': 0.25, 'suitableList': ['net developer', 'front end developer', 'full stack developer', 'it developer', 'node.js developer', 'back-end developer', 'developer', 'embedded developer']}
Extended Num Application:  7
Reduce CV  12  ->  7  CV


 59%|█████▉    | 23/39 [02:01<01:26,  5.43s/it]

New Query:  {'cityList': ['ho chi minh'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['back-end developer', 'developer', 'front end developer', 'full stack developer'], 'role_exp': 0.25, 'suitableList': ['front end developer', 'full stack developer', 'it developer', 'node.js developer', 'back-end developer', 'developer']}
Still more -> Reduce CV
Risk:  True
Num CV ok:  7
Rerank
Reranking
[37, 1715, 595, 439, 751]
Time: 5.829308748245239
Extracting Job Description


 62%|██████▏   | 24/39 [02:06<01:18,  5.24s/it]

End LLM
New Query:  {'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['back-end developer', 'developer', 'front end developer', 'full stack developer'], 'role_exp': 5.0}
Extended Num Application:  6
Not enough application
New Query:  {'roleList': ['front end developer', 'full stack developer', 'it developer', 'back-end developer', 'developer'], 'role_exp': 4.5}
[502, 708, 23]
Less -> Sematic Search
Using Sematic Search
Num CV ok:  4
Rerank
[1953, 23, 502, 708]
Time: 4.794861555099487
Extracting Job Description


 64%|██████▍   | 25/39 [02:11<01:12,  5.18s/it]

End LLM
New Query:  {'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'skillList': ['tool development', 'frontend development', 'automated program development', 'software development', 'devops', 'jenkins'], 'suitableList': ['nlp engineer', 'website administrator', 'ai engineer', 'hr administrator', 'software engineer', 'systems administrator', 'it developer', 'devops', 'network administrator', 'back-end developer', 'machine learning engineer', 'dataops engineer', 'developer', 'admin executive']}
Extended Num Application:  4
Reduce CV  15  ->  4  CV
New Query:  {'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'skillList': ['devops'], 'suitableList': ['nlp engineer', 'ai engineer', 'software engineer', 'systems administrator', 'it developer', 'devops', 'network administrator', 'machine learning engineer', 'dataops engineer', 'developer']}
Risk:  True
Num CV ok:  3
Rerank
[1488, 502, 850]
Time: 5.016902446746826
Extr

 67%|██████▋   | 26/39 [02:16<01:06,  5.14s/it]

End LLM
New Query:  {'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['php developer', ' web developer', 'developer', 'java developer'], 'role_exp': 2.5, 'suitableList': ['net developer', 'java developer', 'php developer', ' web developer', 'android developer', 'developer']}
Extended Num Application:  4
Num CV ok:  3
Rerank
[1579, 2023, 587]
Time: 5.071263551712036
Extracting Job Description
End LLM
New Query:  {'cityList': ['ha noi'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['strategic planning', 'production planner', 'training ', 'financial planner'], 'role_exp': 1.5, 'suitableList': ['financial planner', 'project coordinator', 'event coordinator', 'strategic planning', 'production planner', 'insurance advisor']}
Extended Num Application:  6
Not enough application
New Query:  {'cityList': ['ha noi'], 'roleList': ['financial planner', 'strategic planning', 'production planner', 't

 69%|██████▉   | 27/39 [02:26<01:17,  6.44s/it]

New Query:  {'cityList': ['ha noi'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['content marketing', 'marketing', 'brand marketing', 'marketing communications'], 'role_exp': 1.5, 'suitableList': ['marketing manager', 'marketing officer', 'marketing analyst', 'marketing assistant', 'digital marketing specialist', 'marketing executive']}
Extended Num Application:  6
Not enough application
New Query:  {'cityList': ['ha noi'], 'roleList': ['content marketing', 'marketing', 'brand marketing', 'marketing executive', 'marketing communications'], 'role_exp': 1.35, 'suitableList': ['marketing manager', 'marketing officer', 'marketing', 'marketing analyst', 'marketing assistant', 'digital marketing specialist', 'marketing executive', 'marketing staff']}
[1371, 1513, 1749, 2109, 424]
Num CV ok:  5
New Query:  {'cityList': ['ha noi'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['content cre



New Query:  {'cityList': ['ho chi minh'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['content marketing', 'marketing', 'brand marketing', 'marketing communications'], 'role_exp': 1.5, 'skillList': ['email marketing', 'marketing', 'content marketing', 'social media marketing', 'digital marketing', 'marketing strategy'], 'suitableList': ['marketing manager', 'marketing officer', 'content writer', 'e-commerce specialist', 'digital marketing manager', 'content marketing', 'marketing assistant', 'marketing analyst', 'marketing executive', 'digital marketing specialist', 'media specialist', 'social media marketing', 'communications manager', 'social media manager', 'digital marketing ', 'content marketing manager', 'marketing staff', 'sales associate']}
Extended Num Application:  4
Not enough application
New Query:  {'cityList': ['ho chi minh'], 'roleList': ['content marketing', 'marketing', 'brand marketing', 'marketing executive', 'mar

 72%|███████▏  | 28/39 [02:34<01:17,  7.08s/it]

New Query:  {'cityList': ['ho chi minh'], 'roleList': ['content creator', 'content marketing', 'content writer', 'content marketing manager'], 'role_exp': 1.35, 'skillList': ['content writing', 'value creation', 'content marketing', 'creative concept development', 'game development', 'content management', 'content creation', 'information synthesis'], 'suitableList': ['content marketing', 'communication specialist', 'marketing analyst', 'digital marketing specialist', 'marketing executive', 'social media marketing', 'marketing staff', 'research associate', 'sales associate', 'digital marketing manager', 'media specialist', 'qa manager', 'marketing officer', 'brand manager', 'marketing assistant', 'communications manager', 'social media manager', 'digital marketing ', 'marketing manager', 'content writer', 'e-commerce specialist', 'content marketing manager']}
[841, 1629]
Less -> Sematic Search
Using Sematic Search
Num CV ok:  3
Rerank
[1513, 1749, 841]
Time: 8.567307472229004
Extracting

 74%|███████▍  | 29/39 [02:40<01:05,  6.55s/it]

End LLM
New Query:  {'institutionList': ['national economics university (neu)'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma']}
Extended Num Application:  30
Not enough application
New Query:  {'institutionList': ['university of economics', 'national economics university (neu)']}
[1214, 1274, 1690, 424, 463, 8, 1941, 2013, 495, 324, 227, 1783, 310, 561, 732, 406, 606, 280, 738, 124, 1761, 1993, 2109, 1722, 1291]
Num CV ok:  25
Rerank
[1214, 124, 1274, 1291, 1690, 1722, 1761, 1783, 1941, 1993, 2013, 2109, 227, 280, 310, 324, 406, 424, 463, 495, 561, 606, 732, 738, 8]
Time: 5.298303127288818
Extracting Job Description


 77%|███████▋  | 30/39 [02:44<00:53,  5.96s/it]

End LLM
New Query:  {'institutionList': ['university of economics', 'university of economics and law'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma']}
Extended Num Application:  4
Num CV ok:  4
Rerank
Reranking
[692, 443, 1901]
Time: 4.603946924209595
Extracting Job Description


 79%|███████▉  | 31/39 [02:49<00:45,  5.68s/it]

End LLM
New Query:  {'institutionList': ['ho chi minh city open university', 'hcmc open university'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma']}
Extended Num Application:  11
Not enough application
New Query:  {'institutionList': ['international university - ho chi minh city national university', 'ho chi minh city open university', 'hcmc open university']}
[456, 618, 533, 548, 1669, 1604, 1240, 1193]
Num CV ok:  8
Rerank
[1193, 1240, 1604, 1669, 456, 533, 548, 618]
Time: 5.01920223236084
Extracting Job Description


 82%|████████▏ | 32/39 [02:55<00:40,  5.76s/it]

End LLM
New Query:  {'institutionList': ['ho chi minh city university of technology and education', 'ho chi minh city university of technology'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma']}
Extended Num Application:  4
Reduce CV  6  ->  4  CV
New Query:  {'institutionList': ['ho chi minh city university of technology and education', 'ho chi minh city university of technology'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma']}
Still more -> Reduce CV
Risk:  True
Num CV ok:  4
Rerank
Reranking
[2027, 2103, 262]
Time: 5.948086500167847
Extracting Job Description


 85%|████████▍ | 33/39 [03:00<00:33,  5.55s/it]

End LLM
New Query:  {'institutionList': ['university of economics', 'university of economics and law'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma']}
Extended Num Application:  3
Reduce CV  4  ->  3  CV
New Query:  {'institutionList': ['university of economics and law'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma']}
Risk:  True
Num CV ok:  3
Rerank
Reranking
[692, 443]
Time: 5.0704591274261475
Extracting Job Description


 87%|████████▋ | 34/39 [03:06<00:28,  5.61s/it]

End LLM
New Query:  {'institutionList': ['ho chi minh city university of foreign languages and information technology', 'hcmc open university'], 'majorList': ['information technology and digital transformation in economics', 'information systems', 'information security', 'it engineer', 'computer science', 'information and communication technology', 'information technology - software application', 'electronics and telecommunication'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma']}
Extended Num Application:  4
Not enough application
New Query:  {'institutionList': ['ho chi minh city university of foreign languages and information technology', 'hcmc open university']}
[456, 555]
Less -> Sematic Search
Using Sematic Search
Num CV ok:  3
Rerank
[1488, 456, 555]
Time: 5.732226848602295
Extracting Job Description


 90%|████████▉ | 35/39 [03:10<00:20,  5.14s/it]

End LLM
New Query:  {'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma'], 'roleList': ['business analyst', 'sales analyst', 'analyst', 'finance analyst']}
Extended Num Application:  6
Num CV ok:  6
Rerank
Reranking
[138, 443, 1847, 1193]
Time: 4.048253059387207
Extracting Job Description


 92%|█████████▏| 36/39 [03:14<00:14,  4.77s/it]

End LLM
New Query:  {'roleList': ['consultant', 'investment consultant', 'legal consultant', 'financial consultant']}
Extended Num Application:  6
Not enough application
New Query:  {'roleList': ['consultant', 'legal consultant', 'investment consultant', 'implementation consultant', 'financial consultant']}
[669, 548, 1193, 456, 1783]
Num CV ok:  5
Rerank
Reranking
[1193, 1783, 456, 548, 669]
Time: 3.91168212890625
Extracting Job Description


 95%|█████████▍| 37/39 [03:18<00:09,  4.73s/it]

End LLM
New Query:  {'institutionList': ['ho chi minh city university of technology and food industry', 'ho chi minh city university of industry and trade'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma']}
Extended Num Application:  3
Num CV ok:  3
Rerank
Reranking
[1066, 1841]
Time: 4.619842767715454
Extracting Job Description


 97%|█████████▋| 38/39 [03:25<00:05,  5.33s/it]

End LLM
New Query:  {'institutionList': ['ho chi minh city university of science', 'university of economics', 'ho chi minh city university of technology', 'university of economics and finance'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma']}
Extended Num Application:  11
Num CV ok:  8
Rerank
[1165, 1214, 1253, 1428, 1530, 2103, 262, 587]
Time: 6.735535144805908
Extracting Job Description
End LLM
New Query:  {'institutionList': ['vnu-hcm university of science', 'hanoi university of business and technology', 'posts and telecommunications institute of technology', 'university of technology information - vnu', 'hanoi university of science and technology'], 'eduList': ['phd', 'bachelor', 'master', 'certificate', 'undergraduate', 'diploma']}
Extended Num Application:  12
Num CV ok:  10
Rerank
Reranking


100%|██████████| 39/39 [03:31<00:00,  5.42s/it]

[1410, 345, 222, 2083, 1104, 19, 1733, 1488, 751]
Time: 5.844655275344849





In [7]:
recall = 0
precision = 0
long_recall = 0
long_precision = 0
len_long = 0
short_recall = 0
short_precision = 0
len_short = 0

for answer in answers:
    num_ans = len(answer['answer'])
    correct = 0
    ans = set(answer['answer'])
    for p in answer['predict']:
        if p in ans:
            correct += 1
    recall += correct / num_ans
    precision += correct / (len(answer['predict'])+1e-5)
    if num_ans > 7:
        long_recall += correct / num_ans
        long_precision += correct / (len(answer['predict'])+1e-5)
        len_long+= 1
    else:
        short_recall += correct / num_ans
        short_precision += correct / (len(answer['predict'])+1e-5)
        len_short += 1

In [8]:
recall /= len(answers)
precision /= len(answers)
long_recall /= len_long
long_precision /= len_long
short_recall /= len_short
short_precision /= len_short
f1 = 2 * recall * precision / (recall + precision + 1e-5)
short_f1 = 2 * short_recall * short_precision / (short_recall + short_precision + 1e-5)
long_f1 = 2 * long_recall * long_precision / (long_recall + long_precision+1e-5)

In [9]:
print(f"Recall: {recall}")
print(f"Precision: {precision}")
print(f"F1: {f1}")
print("=====================================")

print(f"Short Recall: {short_recall}")
print(f"Short Precision: {short_precision}")
print(f"Short F1: {short_f1}")
print("=====================================")

print(f"Long Recall: {long_recall}")
print(f"Long Precision: {long_precision}")
print(f"Long F1: {long_f1}")
print("=====================================")

Recall: 0.4493628593628594
Precision: 0.45235304354246697
F1: 0.4508479936814607
Short Recall: 0.4182795698924731
Short Precision: 0.43010605197705226
Short F1: 0.4241053814791576
Long Recall: 0.5698106060606061
Long Precision: 0.5385601358584485
Long F1: 0.5537398220800127


## Haiku

Recall: 0.4582659932659932 \
Precision: 0.45441863442709507 \
F1: 0.45632920485280953

=====================================

Short Recall: 0.4209677419354839 \
Short Precision: 0.42419216192259934 \
Short F1: 0.4225688012224169

=====================================

Long Recall: 0.6027967171717171 \
Long Precision: 0.5715462153820161 \
Long F1: 0.5867506654834951

=====================================

## Llama 3 8b

Recall: 0.4816278166278167 \
Precision: 0.4387250300532934 \
F1: 0.4591714650075473

=====================================

Short Recall: 0.4360215053763441 \
Short Precision: 0.39011169349691877 \
Short F1: 0.41178596663490963

=====================================

Long Recall: 0.6583522727272727 \
Long Precision: 0.6271017092092451 \
Long F1: 0.6423421290530603 

=====================================

## Phi 3 Small
Recall: 0.4114801864801865 \
Precision: 0.42942769302887573 \
F1: 0.4202574151721883

=====================================

Short Recall: 0.410752688172043 \
Short Precision: 0.42526743934168454 \
Short F1: 0.4178790647679619

=====================================

Long Recall: 0.4142992424242424 \
Long Precision: 0.4455486760667415 \
Long F1: 0.4293511171741584

=====================================

## Llama 3 70b