# Config

In [1]:
print('Hello world')

Hello world


In [2]:
import pandas as pd
import numpy as np
import spacy
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import re


SPACY_MODEL = 'en_core_web_sm'
SBERT_MODEL = 'all-mpnet-base-v2'


In [3]:
job_offers = {
    'poor': {
        'text': """JOB OFFER: Certified Commercial Plumbing Technician

We are seeking a reliable and physically capable Certified Commercial Plumbing Technician to join our field service team.
Required Technical Skills:
- Current state certification in commercial plumbing installation and repair.
- Expertise in using and maintaining standard plumbing tools (e.g., pipe cutters, soldering torches).
- Proven experience with various piping materials (e.g., copper, PVC, PEX).
- Ability to read and interpret construction blueprints for pipe routing and material specifications.

Soft Skills & Team Requirements:
- A strong sense of **physical endurance** and comfort working in confined spaces.
- Excellent **time management** and ability to meet strict project deadlines.
- **Customer service focus** for interacting with clients on-site.""",
        'score': '0-10'
    },
    'medium': {
        'text': """JOB OFFER: Junior Front-End Web Developer

We are looking for a creative Junior Front-End Web Developer to assist in building and maintaining our client-facing applications.
Required Technical Skills:
- Proficiency in core web technologies: **HTML5, CSS3, and JavaScript**.
- Familiarity with a modern front-end framework (e.g., React or Vue.js).
- Basic experience querying data via APIs and managing simple back-end data using **SQL**.
- Knowledge of UI/UX principles and responsive design best practices.

Soft Skills & Team Requirements:
- A strong sense of **design aesthetics** and visual appeal.
- Proven ability to **collaborate effectively** with design and back-end teams.
- **Desire for knowledge** of emerging web standards.""",
        'score': '40-60'
    },
    'perfect': {
        'text': """JOB OFFER: Data Science Intern / Junior Data Analyst

We are seeking an enthusiastic and technically proficient graduate to join our Data Science team for a junior role or internship.
Required Technical Skills:
- **Expert level coding** proficiency in **Python** for data manipulation and analysis.
- Deep knowledge of relational databases, with proven experience managing data using **Structured Query Language (SQL)**.
- Hands-on experience with specific database environments, preferably **Postgres**.
- Successful application of **statistical methods** for generating business insights.

Soft Skills & Team Requirements:
- Proven ability to **work together effectively** in cross-functional teams.
- Highly **articulate** and able to clearly explain complex technical results to business stakeholders.
- Driven by a **desire for knowledge** and continuous learning within the Data Science domain.""",
        'score': '90-100'
    }
}

In [4]:
cv_candidate = """
CANDIDATE PROFILE: Data Science Graduate

Summary: Enthusiastic graduate with a passion for transforming complex data into actionable insights and analysing).
Technical Expertise:
- **Expert level coding** in Python, used for ETL and complex calculations.
- Deep knowledge of relational databases, managing data using **Structured Query Language (SQL)**.
- Hands-on experience with **Postgres**.
- Successfully applied **statistical methods** in university projects.

Personal and Team Skills:
- Proven ability to **work together effectively** in cross-functional teams.
- Highly **articulate** and able to clearly explain technical results to non-technical stakeholders.
- Driven by a **desire for knowledge** and continuous improvement.
"""

# Transform

In [59]:
from spacy import Language
from spacy.tokens import Doc
from typing import List, Tuple, Dict
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy.sparse import csr_matrix

## Transform for TF-IDF

In [60]:
def set_up_obj():
    try:
        nlp_ = spacy.load(SPACY_MODEL)
    except OSError:
        raise Exception(f'Model {SPACY_MODEL} is not installed, use python -m spacy download')

    tfidf_vectorizer = TfidfVectorizer(
        stop_words='english',
        # ngram_range=(1, 2),
        # min_df=2,
    )
    return nlp_, tfidf_vectorizer


In [61]:
def get_lemmas_from_doc(doc: Doc) -> List[str]:
    return [token.lemma_.lower() for token in doc if token.is_alpha]

In [62]:
def lemmatize_and_clean_texts(texts: List[str], nlp: Language) -> List[str]:
    processed_texts = []

    for doc in nlp.pipe(texts):
        lemmas_list = get_lemmas_from_doc(doc)
        processed_texts.append(' '.join(lemmas_list))

    return processed_texts


In [63]:
def vectorize_lemmas(processed_texts: List[str], vectorizer: TfidfVectorizer) -> csr_matrix:
    return vectorizer.fit_transform(processed_texts)

## Transform for SBERT

In [64]:
def set_up_obj_sbert():
    try:
        nlp = spacy.load(SPACY_MODEL)
    except OSError:
        raise Exception(f'Model {SPACY_MODEL} is not installed, use python -m spacy download')
    return nlp

In [65]:
def clean_texts_for_sbert(job_offer: str, cv: str, nlp: Language,
                          window_size: int = 3, overlap: int = 1) -> Dict[str, List[str]]:
    all_chunks = {'job_offer': [], 'cv': []}

    for key, text in zip(('job_offer', 'cv'), (job_offer, cv)):
        
        if not isinstance(text, str):
            continue

        flat_text = re.sub(r'\s+', ' ', text).strip()
        doc = nlp(flat_text)
        sentences = [sent.text.strip() for sent in doc.sents]

        if len(sentences) <= window_size:
            all_chunks[key].append(flat_text)
            continue

        step = window_size - overlap
        if step < 1: step = 1

        for i in range(0, len(sentences), step):
            group = sentences[i:i + window_size]
            chunk_text = ' '.join(group)
            all_chunks[key].append(chunk_text)

    return all_chunks

In [66]:
sbert_text_clean_poor = clean_texts_for_sbert(
    job_offers['poor']['text'], cv_candidate, set_up_obj_sbert())
sbert_text_clean_medium = clean_texts_for_sbert(
    job_offers['medium']['text'], cv_candidate, set_up_obj_sbert())
sbert_text_clean_perfect = clean_texts_for_sbert(
    job_offers['perfect']['text'], cv_candidate, set_up_obj_sbert())

In [67]:
sbert_text_clean_poor['job_offer'][0]

'JOB OFFER: Certified Commercial Plumbing Technician We are seeking a reliable and physically capable Certified Commercial Plumbing Technician to join our field service team. Required Technical Skills: - Current state certification in commercial plumbing installation and repair.'

In [68]:
job_offers['poor']['text'][:51]

'JOB OFFER: Certified Commercial Plumbing Technician'

# Calculations

## TF-IDF

In [69]:
from sklearn.metrics.pairwise import cosine_similarity


def tf_idf_pipeline(job_offer: str, cv: str) -> Tuple[csr_matrix, TfidfVectorizer]:
    nlp, tfidf_vectorizer = set_up_obj()
    clean_texts = lemmatize_and_clean_texts([job_offer, cv], nlp)
    return vectorize_lemmas(clean_texts, tfidf_vectorizer), tfidf_vectorizer

In [70]:
matrix_poor, tfidf_vec_poor = tf_idf_pipeline(job_offers['poor']['text'], cv_candidate)
matrix_medium, tfidf_vec_medium = tf_idf_pipeline(job_offers['medium']['text'], cv_candidate)
matrix_perfect, tfidf_vec_perfect = tf_idf_pipeline(job_offers['perfect']['text'], cv_candidate)

In [71]:
similarity_poor = cosine_similarity(
    matrix_poor[0:1],
    matrix_poor[1:2]
)
print(f'Similarity TF-IDF poor (Lemmatized, no N-grams): {similarity_poor[0][0] * 100:.2f}%')
print('Score expected:', job_offers['poor']['score'])

Similarity TF-IDF poor (Lemmatized, no N-grams): 10.26%
Score expected: 0-10


In [72]:
similarity_medium = cosine_similarity(
    matrix_medium[0:1],
    matrix_medium[1:2]
)
print(f'Similarity TF-IDF medium (Lemmatized, no N-grams): {similarity_medium[0][0] * 100:.2f}%')
print('Score expected:', job_offers['medium']['score'])

Similarity TF-IDF medium (Lemmatized, no N-grams): 14.84%
Score expected: 40-60


In [73]:
similarity_perfect = cosine_similarity(
    matrix_perfect[0:1],
    matrix_perfect[1:2]
)
print(f'Similarity TF-IDF perfect (Lemmatized, no N-grams): {similarity_perfect[0][0] * 100:.2f}%')
print('Score expected:', job_offers['perfect']['score'])

Similarity TF-IDF perfect (Lemmatized, no N-grams): 56.40%
Score expected: 90-100


In [74]:
def explain_tf_idf(tfidf_vectorizer: TfidfVectorizer, matrix: csr_matrix):
    feature_names = tfidf_vectorizer.get_feature_names_out()
    dense = matrix.todense()

    denselist = dense.tolist()
    df = pd.DataFrame(denselist, columns=feature_names, index=['job_offer', 'cv_candidate'])

    common_words = df.loc['job_offer'] * df.loc['cv_candidate']
    common_words = common_words[common_words > 0].sort_values(ascending=False)

    return common_words



In [75]:
common_words_poor = explain_tf_idf(tfidf_vec_poor, matrix_poor)
print('\nCommon words TF-IDF poor (Lemmatized, no N-grams):')
print(common_words_poor)


Common words TF-IDF poor (Lemmatized, no N-grams):
team          0.022796
technical     0.017097
skills        0.011398
ability       0.011398
use           0.011398
experience    0.005699
prove         0.005699
project       0.005699
expertise     0.005699
work          0.005699
dtype: float64


In [76]:
common_words_medium = explain_tf_idf(tfidf_vec_medium, matrix_medium)
print('\nCommon words TF-IDF medium (Lemmatized, no N-grams):')
print(common_words_medium)


Common words TF-IDF medium (Lemmatized, no N-grams):
datum          0.022836
team           0.022836
knowledge      0.022836
technical      0.017127
skills         0.011418
use            0.011418
effectively    0.005709
desire         0.005709
ability        0.005709
experience     0.005709
manage         0.005709
query          0.005709
sql            0.005709
dtype: float64


In [77]:
common_words_perfect = explain_tf_idf(tfidf_vec_perfect, matrix_perfect)
print('\nCommon words TF-IDF perfect (Lemmatized, no N-grams):')
print(common_words_perfect)


Common words TF-IDF perfect (Lemmatized, no N-grams):
technical       0.046353
team            0.046353
knowledge       0.030902
datum           0.030902
data            0.030902
science         0.023177
graduate        0.015451
experience      0.015451
prove           0.015451
skills          0.015451
database        0.015451
complex         0.015451
use             0.015451
desire          0.007726
deep            0.007726
continuous      0.007726
cross           0.007726
able            0.007726
articulate      0.007726
clearly         0.007726
cod             0.007726
ability         0.007726
hands           0.007726
functional      0.007726
effectively     0.007726
enthusiastic    0.007726
expert          0.007726
explain         0.007726
drive           0.007726
method          0.007726
manage          0.007726
level           0.007726
language        0.007726
insight         0.007726
highly          0.007726
query           0.007726
postgre         0.007726
relational      0.00

## SBERT

In [78]:
def get_match_status(score: float) -> str:
    """Pomocnicza funkcja do etykietowania"""
    if score > 0.75: return "≈öwietne dopasowanie ‚úÖ"
    if score > 0.55: return "Dobre dopasowanie ‚ö†Ô∏è"
    if score > 0.35: return "S≈Çabe dopasowanie üî∏"
    return "Brak dopasowania ‚ùå"

In [79]:
def calculate_precise_match(chunks_data: Dict[str, List[str]]) -> Dict[str, any]:
    model = SentenceTransformer(SBERT_MODEL)
    job_offer_chunks = chunks_data['job_offer']
    cv_chunks = chunks_data['cv']

    job_offer_embeddings = model.encode(job_offer_chunks)
    cv_embeddings = model.encode(cv_chunks)

    similarity_scores = cosine_similarity(job_offer_embeddings, cv_embeddings)

    explanation_details = []
    for i, job_offer_chunk in enumerate(job_offer_chunks):
        row_scores = similarity_scores[i]
        max_score = np.max(row_scores)
        best_match_index = np.argmax(row_scores)
        matching_cv_text = cv_chunks[best_match_index]
        explanation_details.append({
            'requirement': job_offer_chunk,
            'best_match_in_cv': matching_cv_text,
            'score': float(max_score),
            'status': get_match_status(max_score)
        })

    overall_similarity = float(np.mean([item['score'] for item in explanation_details]))

    return {
        "overall_score": round(overall_similarity, 4),
        "breakdown": explanation_details,
    }

In [80]:
def sbert_pipeline(job_offer: str, cv: str) -> any:
    chunks_data = clean_texts_for_sbert(job_offer, cv, set_up_obj_sbert())
    report = calculate_precise_match(chunks_data)
    return report

In [81]:
sbert_score_poor = sbert_pipeline(job_offers['poor']['text'], cv_candidate)
print('\n=== POOR MATCH REPORT ===')
print(f"Overall Score: {sbert_score_poor['overall_score']:.4f}")
print(f"Expected Score Range: {job_offers['poor']['score']}")
print("\nBreakdown:")
for i, item in enumerate(sbert_score_poor['breakdown'], 1):
    print(f"\n{i}. Requirement: {item['requirement'][:80]}...")
    print(f"   Best Match: {item['best_match_in_cv'][:80]}...")
    print(f"   Score: {item['score']:.4f}")
    print(f"   Status: {item['status']}")


=== POOR MATCH REPORT ===
Overall Score: 0.4664
Expected Score Range: 0-10

Breakdown:

1. Requirement: JOB OFFER: Certified Commercial Plumbing Technician We are seeking a reliable an...
   Best Match: Personal and Team Skills: - Proven ability to **work together effectively** in c...
   Score: 0.3538
   Status: S≈Çabe dopasowanie üî∏

2. Requirement: Required Technical Skills: - Current state certification in commercial plumbing ...
   Best Match: Personal and Team Skills: - Proven ability to **work together effectively** in c...
   Score: 0.4200
   Status: S≈Çabe dopasowanie üî∏

3. Requirement: - Proven experience with various piping materials (e.g., copper, PVC, PEX). - Ab...
   Best Match: Personal and Team Skills: - Proven ability to **work together effectively** in c...
   Score: 0.5432
   Status: S≈Çabe dopasowanie üî∏

4. Requirement: Soft Skills & Team Requirements: - A strong sense of **physical endurance** and ...
   Best Match: Personal and Team Skills: - Proven abili

In [82]:
sbert_score_medium = sbert_pipeline(job_offers['medium']['text'], cv_candidate)
print('\n=== MEDIUM MATCH REPORT ===')
print(f"Overall Score: {sbert_score_medium['overall_score']:.4f}")
print(f"Expected Score Range: {job_offers['medium']['score']}")
print("\nBreakdown:")
for i, item in enumerate(sbert_score_medium['breakdown'], 1):
    print(f"\n{i}. Requirement: {item['requirement'][:80]}...")
    print(f"   Best Match: {item['best_match_in_cv'][:80]}...")
    print(f"   Score: {item['score']:.4f}")
    print(f"   Status: {item['status']}")


=== MEDIUM MATCH REPORT ===
Overall Score: 0.5534
Expected Score Range: 40-60

Breakdown:

1. Requirement: JOB OFFER: Junior Front-End Web Developer We are looking for a creative Junior F...
   Best Match: Personal and Team Skills: - Proven ability to **work together effectively** in c...
   Score: 0.3848
   Status: S≈Çabe dopasowanie üî∏

2. Requirement: Required Technical Skills: - Proficiency in core web technologies: **HTML5, CSS3...
   Best Match: Technical Expertise: - **Expert level coding** in Python, used for ETL and compl...
   Score: 0.5495
   Status: S≈Çabe dopasowanie üî∏

3. Requirement: - Knowledge of UI/UX principles and responsive design best practices. Soft Skill...
   Best Match: Personal and Team Skills: - Proven ability to **work together effectively** in c...
   Score: 0.6146
   Status: Dobre dopasowanie ‚ö†Ô∏è

4. Requirement: - Proven ability to **collaborate effectively** with design and back-end teams. ...
   Best Match: Personal and Team Skills: - Proven a

In [83]:
sbert_score_medium

{'overall_score': 0.5534,
 'breakdown': [{'requirement': 'JOB OFFER: Junior Front-End Web Developer We are looking for a creative Junior Front-End Web Developer to assist in building and maintaining our client-facing applications. Required Technical Skills: - Proficiency in core web technologies: **HTML5, CSS3, and JavaScript**. - Familiarity with a modern front-end framework (e.g., React or Vue.js).',
   'best_match_in_cv': 'Personal and Team Skills: - Proven ability to **work together effectively** in cross-functional teams. - Highly **articulate** and able to clearly explain technical results to non-technical stakeholders. - Driven by a **desire for knowledge** and continuous improvement.',
   'score': 0.38478192687034607,
   'status': 'S≈Çabe dopasowanie üî∏'},
  {'requirement': 'Required Technical Skills: - Proficiency in core web technologies: **HTML5, CSS3, and JavaScript**. - Familiarity with a modern front-end framework (e.g., React or Vue.js). - Basic experience querying dat

In [84]:
# perfect
sbert_score_perfect = sbert_pipeline(job_offers['perfect']['text'], cv_candidate)
print('\n=== PERFECT MATCH REPORT ===')
print(f"Overall Score: {sbert_score_perfect['overall_score']:.4f}")
print(f"Expected Score Range: {job_offers['perfect']['score']}")
print("\nBreakdown:")
for i, item in enumerate(sbert_score_perfect['breakdown'], 1):
    print(f"\n{i}. Requirement: {item['requirement'][:80]}...")
    print(f"   Best Match: {item['best_match_in_cv'][:80]}...")
    print(f"   Score: {item['score']:.4f}")
    print(f"   Status: {item['status']}")


=== PERFECT MATCH REPORT ===
Overall Score: 0.7034
Expected Score Range: 90-100

Breakdown:

1. Requirement: JOB OFFER: Data Science Intern / Junior Data Analyst We are seeking an enthusias...
   Best Match: CANDIDATE PROFILE: Data Science Graduate Summary: Enthusiastic graduate with a p...
   Score: 0.5551
   Status: Dobre dopasowanie ‚ö†Ô∏è

2. Requirement: Required Technical Skills: - **Expert level coding** proficiency in **Python** f...
   Best Match: Technical Expertise: - **Expert level coding** in Python, used for ETL and compl...
   Score: 0.7745
   Status: ≈öwietne dopasowanie ‚úÖ

3. Requirement: - Successful application of **statistical methods** for generating business insi...
   Best Match: Technical Expertise: - **Expert level coding** in Python, used for ETL and compl...
   Score: 0.7731
   Status: ≈öwietne dopasowanie ‚úÖ

4. Requirement: - Highly **articulate** and able to clearly explain complex technical results to...
   Best Match: Personal and Team Skills: - Prov

# Join and final test

In [2]:
def calculate_final_score(job_offer: str, cv: str, sbert_model: SentenceTransformer,
                          nlp: Language, tfidf_vectorizer: TfidfVectorizer,
                          alpha = 0.7) -> Dict[str, any]:
    # Get scores and explaination tf-idf
    tfidf_clean = lemmatize_and_clean_texts([job_offer, cv], nlp)
    tfidf_matrix = tfidf_vectorizer.fit_transform(tfidf_clean)
    tfidf_similarity = cosine_similarity(
        tfidf_matrix[0:1],
        tfidf_matrix[1:2]
    )[0][0]
    explaination_tfidf = explain_tf_idf(tfidf_vectorizer, tfidf_matrix)

    # Get scores and explaination sbert
    sbert_chunks = clean_texts_for_sbert(job_offer, cv, nlp)
    sbert_report = calculate_precise_match(sbert_chunks)

    score_final = alpha * sbert_report['overall_score'] + (1 - alpha) * tfidf_similarity
    return {
        "final_score": round(score_final, 4),
        "sbert_report": sbert_report,
        "tfidf_similarity": round(tfidf_similarity, 4),
        "explaination_tfidf": explaination_tfidf
    }

NameError: name 'SentenceTransformer' is not defined

In [None]:
final_result_poor = calculate_final_score(
    job_offers['poor']['text'], cv_candidate,
    SentenceTransformer(SBERT_MODEL), spacy.load(SPACY_MODEL),
    TfidfVectorizer(stop_words='english')
)
final_result_poor

{'final_score': np.float64(0.3573),
 'sbert_report': {'overall_score': 0.4664,
  'breakdown': [{'requirement': 'JOB OFFER: Certified Commercial Plumbing Technician We are seeking a reliable and physically capable Certified Commercial Plumbing Technician to join our field service team. Required Technical Skills: - Current state certification in commercial plumbing installation and repair.',
    'best_match_in_cv': 'Personal and Team Skills: - Proven ability to **work together effectively** in cross-functional teams. - Highly **articulate** and able to clearly explain technical results to non-technical stakeholders. - Driven by a **desire for knowledge** and continuous improvement.',
    'score': 0.35376501083374023,
    'status': 'S≈Çabe dopasowanie üî∏'},
   {'requirement': 'Required Technical Skills: - Current state certification in commercial plumbing installation and repair. - Expertise in using and maintaining standard plumbing tools (e.g., pipe cutters, soldering torches). - Prov

In [None]:
final_score_medium = calculate_final_score(
    job_offers['medium']['text'], cv_candidate,
    SentenceTransformer(SBERT_MODEL), spacy.load(SPACY_MODEL),
    TfidfVectorizer(stop_words='english')
)
final_score_medium

{'final_score': np.float64(0.4319),
 'sbert_report': {'overall_score': 0.5534,
  'breakdown': [{'requirement': 'JOB OFFER: Junior Front-End Web Developer We are looking for a creative Junior Front-End Web Developer to assist in building and maintaining our client-facing applications. Required Technical Skills: - Proficiency in core web technologies: **HTML5, CSS3, and JavaScript**. - Familiarity with a modern front-end framework (e.g., React or Vue.js).',
    'best_match_in_cv': 'Personal and Team Skills: - Proven ability to **work together effectively** in cross-functional teams. - Highly **articulate** and able to clearly explain technical results to non-technical stakeholders. - Driven by a **desire for knowledge** and continuous improvement.',
    'score': 0.38478192687034607,
    'status': 'S≈Çabe dopasowanie üî∏'},
   {'requirement': 'Required Technical Skills: - Proficiency in core web technologies: **HTML5, CSS3, and JavaScript**. - Familiarity with a modern front-end framewor

In [None]:
final_result_perfect = calculate_final_score(
    job_offers['perfect']['text'], cv_candidate,
    SentenceTransformer(SBERT_MODEL), spacy.load(SPACY_MODEL),
    TfidfVectorizer(stop_words='english')
)
final_result_perfect

{'final_score': np.float64(0.6616),
 'sbert_report': {'overall_score': 0.7034,
  'breakdown': [{'requirement': 'JOB OFFER: Data Science Intern / Junior Data Analyst We are seeking an enthusiastic and technically proficient graduate to join our Data Science team for a junior role or internship. Required Technical Skills: - **Expert level coding** proficiency in **Python** for data manipulation and analysis.',
    'best_match_in_cv': 'CANDIDATE PROFILE: Data Science Graduate Summary: Enthusiastic graduate with a passion for transforming complex data into actionable insights and analysing). Technical Expertise: - **Expert level coding** in Python, used for ETL and complex calculations.',
    'score': 0.5551204085350037,
    'status': 'Dobre dopasowanie ‚ö†Ô∏è'},
   {'requirement': 'Required Technical Skills: - **Expert level coding** proficiency in **Python** for data manipulation and analysis. - Deep knowledge of relational databases, with proven experience managing data using **Structur

# Explainability

In [1]:
def explain_tf_idf(tfidf_vectorizer: TfidfVectorizer, matrix: csr_matrix):
    feature_names = tfidf_vectorizer.get_feature_names_out()
    dense = matrix.todense()

    denselist = dense.tolist()
    df = pd.DataFrame(denselist, columns=feature_names, index=['job_offer', 'cv_candidate'])

    common_words = df.loc['job_offer'] * df.loc['cv_candidate']
    common_words = common_words[common_words > 0].sort_values(ascending=False)

    return common_words



NameError: name 'TfidfVectorizer' is not defined

In [None]:
common_words_poor = explain_tf_idf(tfidf_vec_poor, matrix_poor)
print('\nCommon words TF-IDF poor (Lemmatized, no N-grams):')
print(common_words_poor)


Common words TF-IDF poor (Lemmatized, no N-grams):
team          0.022796
technical     0.017097
skills        0.011398
ability       0.011398
use           0.011398
experience    0.005699
prove         0.005699
project       0.005699
expertise     0.005699
work          0.005699
dtype: float64


In [None]:
common_words_medium = explain_tf_idf(tfidf_vec_medium, matrix_medium)
print('\nCommon words TF-IDF medium (Lemmatized, no N-grams):')
print(common_words_medium)


Common words TF-IDF medium (Lemmatized, no N-grams):
datum          0.022836
team           0.022836
knowledge      0.022836
technical      0.017127
skills         0.011418
use            0.011418
effectively    0.005709
desire         0.005709
ability        0.005709
experience     0.005709
manage         0.005709
query          0.005709
sql            0.005709
dtype: float64


In [None]:
common_words_perfect = explain_tf_idf(tfidf_vec_perfect, matrix_perfect)
print('\nCommon words TF-IDF perfect (Lemmatized, no N-grams):')
print(common_words_perfect)


Common words TF-IDF perfect (Lemmatized, no N-grams):
technical       0.046353
team            0.046353
knowledge       0.030902
datum           0.030902
data            0.030902
science         0.023177
graduate        0.015451
experience      0.015451
prove           0.015451
skills          0.015451
database        0.015451
complex         0.015451
use             0.015451
desire          0.007726
deep            0.007726
continuous      0.007726
cross           0.007726
able            0.007726
articulate      0.007726
clearly         0.007726
cod             0.007726
ability         0.007726
hands           0.007726
functional      0.007726
effectively     0.007726
enthusiastic    0.007726
expert          0.007726
explain         0.007726
drive           0.007726
method          0.007726
manage          0.007726
level           0.007726
language        0.007726
insight         0.007726
highly          0.007726
query           0.007726
postgre         0.007726
relational      0.00

# Final object

In [1]:
# --- IGNORE ---
	

# Another approach

## Text parser

In [5]:
import sys
sys.path.append('..')  # Adjust path to reach the src module from notebook

from src.parsers import CVParser

cv_parser = CVParser()
cv_full = """AI & Data Engineering Specialist
Summary
AI-focused Data Engineer with professional experience in Python, SQL, and cloud ecosystems. Expert in transforming complex datasets into actionable insights, developing machine learning models, and architecting Generative AI solutions (including LangChain agents with RAG). Proven track record in building interactive dashboards and streamlining enterprise data workflows through automation and modern web technologies.

Core Skills
Programming & ML: Python, Scikit-learn, LangChain, TensorFlow, TypeScript

Data Engineering: Pandas, NumPy, SQL, Azure Databricks, ETL Pipeline Development

Visualization & Web: Streamlit, Matplotlib, Django REST, Flask, React, Power BI

Cloud & DevOps: Microsoft Azure, Docker, Git, CI/CD, Azure DevOps, GitHub Actions

Languages: Polish (Native), English (C1/Advanced), German (Intermediate/B1+)

Professional Experience
Data Engineer | Global Professional Services Firm Feb 2024 ‚Äì Feb 2025

Architected ETL processes for large-scale datasets on Azure using Python, SQL, and PySpark.

Implemented predictive models (Random Forest, Logistic Regression) to enhance analytical accuracy and business decision-making.

Developed AI agents using LangChain (RAG, custom tool integration) to automate internal workflows and optimize LLM-driven document querying.

Designed interactive Streamlit dashboards to communicate complex data findings to non-technical stakeholders.

Data Annotation Specialist | Software Engineering Services Dec 2022 ‚Äì Nov 2023

Managed high-precision 2D/3D image and video annotation for AI/ML model training sets.

Optimized annotation protocols to ensure dataset quality and model reliability.

Authored technical reports regarding data accuracy and compliance standards.

Education
B.Sc. in Computer Science | Major: Cloud Application Development (In Progress)

Master of Arts | Graduated with Distinction (5.0/5.0)

Certifications
Azure Data Engineer Associate (DP-203) ‚Äì Microsoft

Azure Data Fundamentals ‚Äì Microsoft

Machine Learning Specialization ‚Äì DeepLearning.AI & Stanford

Technical Projects
Recommendation Engine: Developed a machine-learning game recommender utilizing TF-IDF and Nearest Neighbors via Scikit-learn and Streamlit.

AI EdTech Platform: Built an AI-powered language learning application featuring spaced repetition and generative AI storytelling (LangChain, OpenAI, Django REST, React).

Adversarial AI Systems: Created a Checkers AI employing minimax alpha-beta pruning and Deep Q-Learning (TensorFlow, Flask).

GDPR CONSENT
I hereby give consent for my personal data to be processed for the purpose of conducting
recruitment for the position for which I am applying and future recruitment processes."""
parsed_cv = cv_parser.parse(cv_full)
parsed_cv

{'skills': 'Programming & ML: Python, Scikit-learn, LangChain, TensorFlow, TypeScript\nData Engineering: Pandas, NumPy, SQL, Azure Databricks, ETL Pipeline Development\nVisualization & Web: Streamlit, Matplotlib, Django REST, Flask, React, Power BI\nCloud & DevOps: Microsoft Azure, Docker, Git, CI/CD, Azure DevOps, GitHub Actions\nLanguages: Polish (Native), English (C1/Advanced), German (Intermediate/B1+)',
 'experience': 'Data Engineer | Global Professional Services Firm Feb 2024 ‚Äì Feb 2025\nArchitected ETL processes for large-scale datasets on Azure using Python, SQL, and PySpark.\nImplemented predictive models (Random Forest, Logistic Regression) to enhance analytical accuracy and business decision-making.\nDeveloped AI agents using LangChain (RAG, custom tool integration) to automate internal workflows and optimize LLM-driven document querying.\nDesigned interactive Streamlit dashboards to communicate complex data findings to non-technical stakeholders.\nData Annotation Speciali

In [6]:
parsed_cv = cv_parser.parse(cv_candidate)
parsed_cv

{'skills': '- **Expert level coding** in Python, used for ETL and complex calculations.\n- Deep knowledge of relational databases, managing data using **Structured Query Language (SQL)**.\n- Hands-on experience with **Postgres**.\n- Successfully applied **statistical methods** in university projects.\n- Proven ability to **work together effectively** in cross-functional teams.\n- Highly **articulate** and able to clearly explain technical results to non-technical stakeholders.\n- Driven by a **desire for knowledge** and continuous improvement.',
 'summary': 'Summary: Enthusiastic graduate with a passion for transforming complex data into actionable insights and analysing).'}

In [7]:
cv_parser.parse("""Experience working with Python and AWS in production.
Built scalable systems.

""")

{'summary': 'Experience working with Python and AWS in production.\nBuilt scalable systems.'}

In [8]:
job_offer_real_1 = """üöÄ Join Us in Revolutionizing Construction Technology as a Mid AI Engineer (CV)!

Are you a developer passionate about machine learning, computer vision, and solving real-world problems with code? Are you eager to grow, learn from experienced engineers, and contribute to impactful AI solutions? If that sounds like you, we‚Äôd love to meet you! üëã

About the Company:
We are a European-based startup on a mission to transform construction sites around the world using AI üåç. Our platform is trusted by industry leaders and we already work on the world's largest construction sites, helping build solar farms, pipelines, and many other projects! We combine a flat structure with a collaborative, high-growth culture, creating a space where you can drive real impact üí•.

What You‚Äôll Do:
üß† Work on Computer Vision and Deep Learning models used to analyze geospatial and construction site data.
üì¶ Collaborate on model training, evaluation, and deployment pipelines.
üõ† Write clean, production-ready code in a modern ML development workflow.
üõ∞Ô∏è Turn drone and satellite data into actionable insights for our clients.
ü§ù Work closely with other engineers to design and deliver real-world AI solutions.

What We‚Äôre Looking For:
üî• Curiosity & Drive: You‚Äôre excited to learn, experiment, and grow in the field of machine learning.
üß† Analytical Thinking: You enjoy debugging, understanding data, and solving technical challenges.
üéØ Focus on Impact: You care about building things that actually get used and make a difference.

Requirements:
3+ years of professional experience as an AI Engineer or in a similar position.
Solid Python programming skills.
Hands-on experience with PyTorch and Deep Learning, preferably in Computer Vision projects.
Good English communication skills (written and spoken).
Willing to work at least 3 days a week in our office.

Nice to Have:
‚ú® Experience with Docker, MLflow, or similar tools for experiment tracking and reproducibility.
üó∫ Exposure to geospatial data (e.g., drone imagery, orthophotos, satellite data).
üß™ Machine Learning projects portfolio.

What Do We Offer:
üìà Competitive Compensation: 18k - 23k PLN net/month - B2B/mandate contract, plus an employee stock option plan.
üè• Health & Wellness: Private healthcare, a Multi-sport card, and corporate retreat events.
‚è∞ Flexibility: Remote working days and flexible hours for work-life balance.

Ready to Join?
If you‚Äôre excited to make a difference in a high-impact role, apply today! üåü Send your resume and any portfolio links through the form. Let‚Äôs build the future of construction technology together!

Tech stack:
English: C1
Machine Learning: regular
Git: regular
Computer Vision: regular
PyTorch: regular
Deep Learning: regular
Python: regular
Docker: nice to have
MLflow: nice to have"""

job_offer_real_2 = """AI Transformation Consultant
AI/ML
Central Europe / Remote
Professional Services & Consulting Firm
Full-time
B2B
Senior
Hybrid

Job description:
We are a consulting‚Äìtechnology company specializing in process automation and transformations based on Agentic AI and RAG systems. Organizations around the world ‚Äì from the USA, through Europe, to the Middle East ‚Äì rely on our expertise to move from AI experimentation to real implementations that deliver measurable business results.

Our innovative approach to applying artificial intelligence in business has been recognized by global auditing and business media firms. We collaborate with global brands such as major tech hardware manufacturers, automotive leaders, and leading self-publishing organizations.

We combine strategic consulting with AI engineering, delivering solutions that automate decision-making, accelerate operations, and boost organizational efficiency. We are now expanding our consulting team and looking for an AI Transformation Consultant ‚Äì someone who can combine a technological perspective with a deep understanding of people and the processes behind change.

Why join us?
- You‚Äôll grow in the Agentic AI niche, co-creating projects that shape the future of automation.
- You‚Äôll work with clients from all over the world ‚Äì from Silicon Valley to the Middle East.
- You‚Äôll collaborate with top-tier AI engineers with academic backgrounds, including Ph.D. holders.
- You‚Äôll have real influence ‚Äì we value initiative and strong reasoning over rigid procedures.

In this role you will:
- Participate in early conversations with clients to uncover real needs and transformation directions.
- Lead advisory and strategic discussions, co-creating the vision for AI-based solutions.
- Identify areas with the highest automation potential and propose actionable directions.
- Co-run transformation projects together with a Project Manager and AI engineering team.
- Support clients throughout the entire transformation journey ‚Äì from diagnosis to evaluation.
- Translate complex technological concepts into the language of business value.
- Work in a flexible collaboration model, with availability for client calls across different time zones.

We‚Äôll be excited to talk to you if:
- You are able to conduct conversations with decision-makers and understand the broader business context.
- You think strategically and analyze needs with real impact.
- You view AI trends critically and can separate facts from the noise.
- You communicate fluently in English (C1+) ‚Äì daily work with international clients is natural for you.
- You thrive in an environment that requires initiative, independence, and ownership.
- You are interested in Agentic AI and want to deepen your expertise.
- You understand technical concepts and have worked with Enterprise-grade solutions.
- (Nice to have) You have a programming background that helps you collaborate with AI engineers.

Joining our team means:
- B2B cooperation with flexibility and real project influence.
- Mentoring from founders ‚Äì practitioners combining advisory and engineering experience.
- A variety of projects ‚Äì from business process automation to strategic initiatives.
- Collaboration with global brands shaping the direction of AI development.

Recruitment process:
1. Short intro (15 min) - quick conversation to confirm mutual expectations.
2. Culture Fit Interview (45‚Äì60 min) - meeting about the role, work specifics, and company culture.
3. On-site/Hybrid meeting (60 min) - opportunity to meet the team.
4. Meeting with the AI Tech Lead (30 min) - technical discussion on current projects.
5. Offer.

Join now and see what it‚Äôs like to work with the best.

Tech stack:
English: C2
AI Strategy: senior"""

In [9]:
from src.parsers import JobOfferParser

job_offer_parser = JobOfferParser()
job_offer_parser.parse(job_offer_real_1)

{'requirements': '3+ years of professional experience as an AI Engineer or in a similar position.\nSolid Python programming skills.\nHands-on experience with PyTorch and Deep Learning, preferably in Computer Vision projects.\nGood English communication skills (written and spoken).\nWilling to work at least 3 days a week in our office.\n‚ú® Experience with Docker, MLflow, or similar tools for experiment tracking and reproducibility.\nüó∫ Exposure to geospatial data (e.g., drone imagery, orthophotos, satellite data).\nüß™ Machine Learning projects portfolio.\nWhat Do We Offer:\nüìà Competitive Compensation: 18k - 23k PLN net/month - B2B/mandate contract, plus an employee stock option plan.\nüè• Health & Wellness: Private healthcare, a Multi-sport card, and corporate retreat events.\n‚è∞ Flexibility: Remote working days and flexible hours for work-life balance.\nReady to Join?\nIf you‚Äôre excited to make a difference in a high-impact role, apply today! üåü Send your resume and any p

In [10]:
job_offer_parser.parse(job_offer_real_2)

{'requirements': 'English: C2\nAI Strategy: senior',
 'responsibilities': '- Participate in early conversations with clients to uncover real needs and transformation directions.\n- Lead advisory and strategic discussions, co-creating the vision for AI-based solutions.\n- Identify areas with the highest automation potential and propose actionable directions.\n- Co-run transformation projects together with a Project Manager and AI engineering team.\n- Support clients throughout the entire transformation journey ‚Äì from diagnosis to evaluation.\n- Translate complex technological concepts into the language of business value.\n- Work in a flexible collaboration model, with availability for client calls across different time zones.\nWe‚Äôll be excited to talk to you if:\n- You are able to conduct conversations with decision-makers and understand the broader business context.\n- You think strategically and analyze needs with real impact.\n- You view AI trends critically and can separate fact

In [11]:
job_offer_parser.parse(job_offers['perfect']['text'])

{'requirements': '- **Expert level coding** proficiency in **Python** for data manipulation and analysis.\n- Deep knowledge of relational databases, with proven experience managing data using **Structured Query Language (SQL)**.\n- Hands-on experience with specific database environments, preferably **Postgres**.\n- Successful application of **statistical methods** for generating business insights.\n- Proven ability to **work together effectively** in cross-functional teams.\n- Highly **articulate** and able to clearly explain complex technical results to business stakeholders.\n- Driven by a **desire for knowledge** and continuous learning within the Data Science domain.',
 'uncategorized': 'JOB OFFER: Data Science Intern / Junior Data Analyst\nWe are seeking an enthusiastic and technically proficient graduate to join our Data Science team for a junior role or internship.'}

## Entity Ruler

### Code Snippet

In [12]:
import spacy
from sentence_transformers import SentenceTransformer, util
from typing import Set, List, Dict, Tuple
# from src.text_parser import CVParser, JobOfferParser

# 1. Configuration & Models
SECTION_WEIGHTS = {
    'experience': 1.0,
    'projects': 0.8,
    'summary': 0.8,
    'skills': 0.5,
    'education': 0.6,
    'uncategorized': 0.5
}

nlp = spacy.load("en_core_web_sm", disable=["ner"])
sbert = SentenceTransformer("all-MiniLM-L6-v2")

def setup_skill_ruler(nlp_model):
    if "entity_ruler" not in nlp_model.pipe_names:
        ruler = nlp_model.add_pipe("entity_ruler", before="parser")
    else:
        ruler = nlp_model.get_pipe("entity_ruler")

    skills = ["Python", "SQL", "NoSQL", "Postgres", "ETL", "Data Engineering",
              "Pandas", "NumPy", "Matplotlib", "Seaborn", "Scikit-learn",
              "TensorFlow", "PyTorch", "Keras", "LangChain", "RAG", "LLM",
              "Django", "Flask", "FastAPI", "React", "TypeScript", "JavaScript",
              "AWS", "Azure", "GCP", "Docker", "Kubernetes", "Terraform", "Git",
              "Agile", "Scrum", "Communication", "Leadership"]

    patterns = [{"label": "SKILL", "pattern": [{"LOWER": s.lower()}]} for s in skills]
    ruler.add_patterns(patterns)
    return nlp_model

def extract_skills(text: str, nlp_model) -> Set[str]:
    if not text or not text.strip(): return set()
    doc = nlp_model(text)
    return {ent.text.lower() for ent in doc.ents if ent.label_ == "SKILL"}

def perform_gap_analysis(cv_text: str, job_text: str, nlp_model):
    cv_skills = extract_skills(cv_text, nlp_model)
    job_skills = extract_skills(job_text, nlp_model)

    common = sorted(list(cv_skills.intersection(job_skills)))
    missing = sorted(list(job_skills.difference(cv_skills)))
    score = len(common) / len(job_skills) if job_skills else 0.0

    return score, common, missing

def calculate_semantic_score(job_text: str, cv_text: str, nlp_model, sbert_model):
    job_doc = nlp_model(job_text)
    job_sents = [s.text for s in job_doc.sents if len(s.text.split()) > 3]

    cv_doc = nlp_model(cv_text)
    cv_sents = [s.text for s in cv_doc.sents if len(s.text.split()) > 2]

    if not job_sents or not cv_sents:
        return 0.0

    job_emb = sbert_model.encode(job_sents, convert_to_tensor=True)
    cv_emb = sbert_model.encode(cv_sents, convert_to_tensor=True)

    cosine_scores = util.cos_sim(job_emb, cv_emb)
    max_scores = cosine_scores.max(dim=1).values
    return float(max_scores.mean())

def calculate_weighted_semantic_score(job_text: str, cv_sections: Dict[str, str], nlp_model, sbert_model):
    job_doc = nlp_model(job_text)
    job_sentences = [s.text for s in job_doc.sents if len(s.text.split()) > 3]
    if not job_sentences: return 0.0

    job_embeddings = sbert_model.encode(job_sentences, convert_to_tensor=True)

    cv_embeddings_map = {}
    for section, text in cv_sections.items():
        sec_doc = nlp_model(text)
        sents = [s.text for s in sec_doc.sents if len(s.text.split()) > 2]
        if sents:
            cv_embeddings_map[section] = sbert_model.encode(sents, convert_to_tensor=True)

    if not cv_embeddings_map: return 0.0

    total_weighted_similarity = 0.0
    for j_emb in job_embeddings:
        best_score_for_sentence = 0.0
        for section, cv_embs in cv_embeddings_map.items():
            similarities = util.cos_sim(j_emb, cv_embs)[0]
            max_sim = float(similarities.max())
            weight = SECTION_WEIGHTS.get(section, 0.5)
            weighted_sim = max_sim * weight

            if weighted_sim > best_score_for_sentence:
                best_score_for_sentence = weighted_sim
        total_weighted_similarity += best_score_for_sentence

    return total_weighted_similarity / len(job_sentences)

def analyze_action_verbs(text: str, nlp_model) -> float:
    if not text or not text.strip(): return 0.5
    doc = nlp_model(text)
    strong_verbs = {"lead", "manage", "create", "develop", "design", "implement", "optimize", "build", "achieve", "solve"}
    verb_count = 0
    action_verb_count = 0
    for token in doc:
        if token.pos_ == "VERB":
            verb_count += 1
            if token.lemma_.lower() in strong_verbs or (token.dep_ == "ROOT" and token.lemma_ not in ["be", "have"]):
                action_verb_count += 1
    return action_verb_count / verb_count if verb_count > 0 else 0.0

# Initialize Ruler
nlp = setup_skill_ruler(nlp)

In [13]:
# Inputs
cv_content = cv_candidate
job_description_good = job_offers['perfect']['text']
job_description_medium = job_offers['medium']['text']
job_description_weak = job_offers['poor']['text']

job_descriptions = {
    'good': job_description_good,
    'medium': job_description_medium,
    'weak': job_description_weak
}

for level, job_description in job_descriptions.items():
    print(f"\n=== Analysis for {level.upper()} JOB DESCRIPTION ===")
    # 1. Gap Analysis
    keyword_score, matched, missing = perform_gap_analysis(cv_content, job_description, nlp)

    # 2. Semantic Analysis
    semantic_score = calculate_semantic_score(job_description, cv_content, nlp, sbert)

    # 3. Results
    print(f"--- Gap Analysis ---")
    print(f"Score: {keyword_score:.2%}")
    print(f"Matched: {matched}")
    print(f"Missing: {missing}")
    print(f"\n--- Semantic Analysis ---")
    print(f"Semantic Match: {semantic_score:.4f}")


=== Analysis for GOOD JOB DESCRIPTION ===
--- Gap Analysis ---
Score: 100.00%
Matched: ['postgres', 'python', 'sql']
Missing: []

--- Semantic Analysis ---
Semantic Match: 0.7336

=== Analysis for MEDIUM JOB DESCRIPTION ===
--- Gap Analysis ---
Score: 33.33%
Matched: ['sql']
Missing: ['javascript', 'react']

--- Semantic Analysis ---
Semantic Match: 0.4786

=== Analysis for WEAK JOB DESCRIPTION ===
--- Gap Analysis ---
Score: 0.00%
Matched: []
Missing: []

--- Semantic Analysis ---
Semantic Match: 0.3923


In [14]:
# 1. Initialize Parsers
cv_parser = CVParser()
job_parser = JobOfferParser()

for level, job_description in job_descriptions.items():
    print(f"\n=== HYBRID MATCHING for {level.upper()} JOB DESCRIPTION ===")
    # 2. Raw Texts
    cv_raw = cv_candidate
    job_raw = job_description

    # 3. Parsing
    cv_sections = cv_parser.parse(cv_raw)
    job_sections = job_parser.parse(job_raw)

    # Prepare texts
    cv_full_text = " ".join(cv_sections.values())
    job_signal_text = " ".join([v for k, v in job_sections.items() if k != 'about'])

    # 4. Hybrid Matching
    # A. Keywords (NER)
    cv_skills = extract_skills(cv_full_text, nlp)
    job_skills = extract_skills(job_signal_text, nlp)
    keyword_score = len(cv_skills.intersection(job_skills)) / len(job_skills) if job_skills else 0.0

    # B. Weighted Semantics
    semantic_score = calculate_weighted_semantic_score(job_signal_text, cv_sections, nlp, sbert)

    # C. Action Verbs
    narrative = cv_sections.get('experience', '') + " " + cv_sections.get('projects', '')
    action_score = analyze_action_verbs(narrative, nlp)

    # 5. Final Aggregation (Alpha = 0.5)
    alpha = 0.5
    base_score = (alpha * semantic_score) + ((1.0 - alpha) * keyword_score)
    final_score = base_score * 0.95 + (action_score * 0.05)

    print(f"Final Weighted Match: {final_score:.2%}")
    print(f"Keywords Score: {keyword_score:.2%}")
    print(f"Semantic Score (Weighted): {semantic_score:.2%}")
    print(f"Matched Skills: {cv_skills.intersection(job_skills)}")


=== HYBRID MATCHING for GOOD JOB DESCRIPTION ===
Final Weighted Match: 70.39%
Keywords Score: 100.00%
Semantic Score (Weighted): 42.93%
Matched Skills: {'sql', 'python', 'postgres'}

=== HYBRID MATCHING for MEDIUM JOB DESCRIPTION ===
Final Weighted Match: 29.90%
Keywords Score: 33.33%
Semantic Score (Weighted): 24.36%
Matched Skills: {'sql'}

=== HYBRID MATCHING for WEAK JOB DESCRIPTION ===
Final Weighted Match: 10.92%
Keywords Score: 0.00%
Semantic Score (Weighted): 17.72%
Matched Skills: set()


## Test and compare

In [15]:
cv_candidate

'\nCANDIDATE PROFILE: Data Science Graduate\n\nSummary: Enthusiastic graduate with a passion for transforming complex data into actionable insights and analysing).\nTechnical Expertise:\n- **Expert level coding** in Python, used for ETL and complex calculations.\n- Deep knowledge of relational databases, managing data using **Structured Query Language (SQL)**.\n- Hands-on experience with **Postgres**.\n- Successfully applied **statistical methods** in university projects.\n\nPersonal and Team Skills:\n- Proven ability to **work together effectively** in cross-functional teams.\n- Highly **articulate** and able to clearly explain technical results to non-technical stakeholders.\n- Driven by a **desire for knowledge** and continuous improvement.\n'

### Poor

In [16]:
job_offers['poor']['text']

'JOB OFFER: Certified Commercial Plumbing Technician\n\nWe are seeking a reliable and physically capable Certified Commercial Plumbing Technician to join our field service team.\nRequired Technical Skills:\n- Current state certification in commercial plumbing installation and repair.\n- Expertise in using and maintaining standard plumbing tools (e.g., pipe cutters, soldering torches).\n- Proven experience with various piping materials (e.g., copper, PVC, PEX).\n- Ability to read and interpret construction blueprints for pipe routing and material specifications.\n\nSoft Skills & Team Requirements:\n- A strong sense of **physical endurance** and comfort working in confined spaces.\n- Excellent **time management** and ability to meet strict project deadlines.\n- **Customer service focus** for interacting with clients on-site.'

```
{'final_score': np.float64(0.3573),
 'sbert_report': {'overall_score': 0.4664,
  'breakdown': [{'requirement': 'JOB OFFER: Certified Commercial Plumbing Technician We are seeking a reliable and physically capable Certified Commercial Plumbing Technician to join our field service team. Required Technical Skills: - Current state certification in commercial plumbing installation and repair.',
    'best_match_in_cv': 'Personal and Team Skills: - Proven ability to **work together effectively** in cross-functional teams. - Highly **articulate** and able to clearly explain technical results to non-technical stakeholders. - Driven by a **desire for knowledge** and continuous improvement.',
    'score': 0.35376501083374023,
    'status': 'S≈Çabe dopasowanie üî∏'},
   {'requirement': 'Required Technical Skills: - Current state certification in commercial plumbing installation and repair. - Expertise in using and maintaining standard plumbing tools (e.g., pipe cutters, soldering torches). - Proven experience with various piping materials (e.g., copper, PVC, PEX).',
    'best_match_in_cv': 'Personal and Team Skills: - Proven ability to **work together effectively** in cross-functional teams. - Highly **articulate** and able to clearly explain technical results to non-technical stakeholders. - Driven by a **desire for knowledge** and continuous improvement.',
    'score': 0.4199908971786499,
    'status': 'S≈Çabe dopasowanie üî∏'},
   {'requirement': '- Proven experience with various piping materials (e.g., copper, PVC, PEX). - Ability to read and interpret construction blueprints for pipe routing and material specifications. Soft Skills & Team Requirements: - A strong sense of **physical endurance** and comfort working in confined spaces.',
    'best_match_in_cv': 'Personal and Team Skills: - Proven ability to **work together effectively** in cross-functional teams. - Highly **articulate** and able to clearly explain technical results to non-technical stakeholders. - Driven by a **desire for knowledge** and continuous improvement.',
    'score': 0.5432001352310181,
    'status': 'S≈Çabe dopasowanie üî∏'},
   {'requirement': 'Soft Skills & Team Requirements: - A strong sense of **physical endurance** and comfort working in confined spaces. - Excellent **time management** and ability to meet strict project deadlines. - **Customer service focus** for interacting with clients on-site.',
    'best_match_in_cv': 'Personal and Team Skills: - Proven ability to **work together effectively** in cross-functional teams. - Highly **articulate** and able to clearly explain technical results to non-technical stakeholders. - Driven by a **desire for knowledge** and continuous improvement.',
    'score': 0.6156630516052246,
    'status': 'Dobre dopasowanie ‚ö†Ô∏è'},
   {'requirement': '- **Customer service focus** for interacting with clients on-site.',
    'best_match_in_cv': '- Driven by a **desire for knowledge** and continuous improvement.',
    'score': 0.3994218111038208,
    'status': 'S≈Çabe dopasowanie üî∏'}]},
 'tfidf_similarity': np.float64(0.1026),
 'explaination_tfidf': team          0.022796
 technical     0.017097
 skills        0.011398
 ability       0.011398
 use           0.011398
 experience    0.005699
 prove         0.005699
 project       0.005699
 expertise     0.005699
 work          0.005699
 dtype: float64}

### Medium


In [17]:
job_offers['medium']['text']

'JOB OFFER: Junior Front-End Web Developer\n\nWe are looking for a creative Junior Front-End Web Developer to assist in building and maintaining our client-facing applications.\nRequired Technical Skills:\n- Proficiency in core web technologies: **HTML5, CSS3, and JavaScript**.\n- Familiarity with a modern front-end framework (e.g., React or Vue.js).\n- Basic experience querying data via APIs and managing simple back-end data using **SQL**.\n- Knowledge of UI/UX principles and responsive design best practices.\n\nSoft Skills & Team Requirements:\n- A strong sense of **design aesthetics** and visual appeal.\n- Proven ability to **collaborate effectively** with design and back-end teams.\n- **Desire for knowledge** of emerging web standards.'

```
{'final_score': np.float64(0.4319),
 'sbert_report': {'overall_score': 0.5534,
  'breakdown': [{'requirement': 'JOB OFFER: Junior Front-End Web Developer We are looking for a creative Junior Front-End Web Developer to assist in building and maintaining our client-facing applications. Required Technical Skills: - Proficiency in core web technologies: **HTML5, CSS3, and JavaScript**. - Familiarity with a modern front-end framework (e.g., React or Vue.js).',
    'best_match_in_cv': 'Personal and Team Skills: - Proven ability to **work together effectively** in cross-functional teams. - Highly **articulate** and able to clearly explain technical results to non-technical stakeholders. - Driven by a **desire for knowledge** and continuous improvement.',
    'score': 0.38478192687034607,
    'status': 'S≈Çabe dopasowanie üî∏'},
   {'requirement': 'Required Technical Skills: - Proficiency in core web technologies: **HTML5, CSS3, and JavaScript**. - Familiarity with a modern front-end framework (e.g., React or Vue.js). - Basic experience querying data via APIs and managing simple back-end data using **SQL**. - Knowledge of UI/UX principles and responsive design best practices.',
    'best_match_in_cv': 'Technical Expertise: - **Expert level coding** in Python, used for ETL and complex calculations. - Deep knowledge of relational databases, managing data using **Structured Query Language (SQL)**. - Hands-on experience with **Postgres**. - Successfully applied **statistical methods** in university projects. Personal and Team Skills: - Proven ability to **work together effectively** in cross-functional teams.',
    'score': 0.5494816899299622,
    'status': 'S≈Çabe dopasowanie üî∏'},
   {'requirement': '- Knowledge of UI/UX principles and responsive design best practices. Soft Skills & Team Requirements: - A strong sense of **design aesthetics** and visual appeal. - Proven ability to **collaborate effectively** with design and back-end teams.',
    'best_match_in_cv': 'Personal and Team Skills: - Proven ability to **work together effectively** in cross-functional teams. - Highly **articulate** and able to clearly explain technical results to non-technical stakeholders. - Driven by a **desire for knowledge** and continuous improvement.',
    'score': 0.6145654916763306,
    'status': 'Dobre dopasowanie ‚ö†Ô∏è'},
   {'requirement': '- Proven ability to **collaborate effectively** with design and back-end teams. - **Desire for knowledge** of emerging web standards.',
    'best_match_in_cv': 'Personal and Team Skills: - Proven ability to **work together effectively** in cross-functional teams. - Highly **articulate** and able to clearly explain technical results to non-technical stakeholders. - Driven by a **desire for knowledge** and continuous improvement.',
    'score': 0.6645981073379517,
    'status': 'Dobre dopasowanie ‚ö†Ô∏è'}]},
 'tfidf_similarity': np.float64(0.1484),
 'explaination_tfidf': datum          0.022836
 team           0.022836
 knowledge      0.022836
 technical      0.017127
 skills         0.011418
 use            0.011418
 effectively    0.005709
 desire         0.005709
 ability        0.005709
 experience     0.005709
 manage         0.005709
 query          0.005709
 sql            0.005709
 dtype: float64}

### Perfect

```
{'final_score': np.float64(0.6616),
 'sbert_report': {'overall_score': 0.7034,
  'breakdown': [{'requirement': 'JOB OFFER: Data Science Intern / Junior Data Analyst We are seeking an enthusiastic and technically proficient graduate to join our Data Science team for a junior role or internship. Required Technical Skills: - **Expert level coding** proficiency in **Python** for data manipulation and analysis.',
    'best_match_in_cv': 'CANDIDATE PROFILE: Data Science Graduate Summary: Enthusiastic graduate with a passion for transforming complex data into actionable insights and analysing). Technical Expertise: - **Expert level coding** in Python, used for ETL and complex calculations.',
    'score': 0.5551204085350037,
    'status': 'Dobre dopasowanie ‚ö†Ô∏è'},
   {'requirement': 'Required Technical Skills: - **Expert level coding** proficiency in **Python** for data manipulation and analysis. - Deep knowledge of relational databases, with proven experience managing data using **Structured Query Language (SQL)**. - Hands-on experience with specific database environments, preferably **Postgres**. - Successful application of **statistical methods** for generating business insights.',
    'best_match_in_cv': 'Technical Expertise: - **Expert level coding** in Python, used for ETL and complex calculations. - Deep knowledge of relational databases, managing data using **Structured Query Language (SQL)**. - Hands-on experience with **Postgres**. - Successfully applied **statistical methods** in university projects. Personal and Team Skills: - Proven ability to **work together effectively** in cross-functional teams.',
    'score': 0.7745201587677002,
    'status': '≈öwietne dopasowanie ‚úÖ'},
   {'requirement': '- Successful application of **statistical methods** for generating business insights. Soft Skills & Team Requirements: - Proven ability to **work together effectively** in cross-functional teams. - Highly **articulate** and able to clearly explain complex technical results to business stakeholders.',
    'best_match_in_cv': 'Technical Expertise: - **Expert level coding** in Python, used for ETL and complex calculations. - Deep knowledge of relational databases, managing data using **Structured Query Language (SQL)**. - Hands-on experience with **Postgres**. - Successfully applied **statistical methods** in university projects. Personal and Team Skills: - Proven ability to **work together effectively** in cross-functional teams.',
    'score': 0.7730634212493896,
    'status': '≈öwietne dopasowanie ‚úÖ'},
   {'requirement': '- Highly **articulate** and able to clearly explain complex technical results to business stakeholders. - Driven by a **desire for knowledge** and continuous learning within the Data Science domain.',
    'best_match_in_cv': 'Personal and Team Skills: - Proven ability to **work together effectively** in cross-functional teams. - Highly **articulate** and able to clearly explain technical results to non-technical stakeholders. - Driven by a **desire for knowledge** and continuous improvement.',
    'score': 0.7108926177024841,
    'status': 'Dobre dopasowanie ‚ö†Ô∏è'}]},
 'tfidf_similarity': np.float64(0.564),
 'explaination_tfidf': technical       0.046353
 team            0.046353
 knowledge       0.030902
 datum           0.030902
 data            0.030902
 science         0.023177
 graduate        0.015451
 experience      0.015451
 prove           0.015451
 skills          0.015451
 database        0.015451
 complex         0.015451
 use             0.015451
 desire          0.007726
 deep            0.007726
 continuous      0.007726
 cross           0.007726
 able            0.007726
 articulate      0.007726
 clearly         0.007726
 cod             0.007726
 ability         0.007726
 hands           0.007726
 functional      0.007726
 effectively     0.007726
 enthusiastic    0.007726
 expert          0.007726
 explain         0.007726
 drive           0.007726
 method          0.007726
 manage          0.007726
 level           0.007726
 language        0.007726
 insight         0.007726
 highly          0.007726
 query           0.007726
 postgre         0.007726
 relational      0.007726
 result          0.007726
 python          0.007726
 sql             0.007726
 structured      0.007726
 statistical     0.007726
 stakeholder     0.007726
 work            0.007726
 dtype: float64}