In [None]:
import os
import mlflow
import traceback
from sklearn.metrics.pairwise import cosine_similarity
from collections_mongo import collection_applicants, collection_vagas
from collection_qdrant import qdrant
from tf_idf_cache import cache_applicants, cache_vagas



# with open("app\\vectorizer.pkl", "rb") as f:
#     vectorizer_new = pickle.load(f)


# model = SentenceTransformer("distiluse-base-multilingual-cased-v1")

mlflow.set_tracking_uri("databricks")
mlflow.set_registry_uri("databricks-uc")
mlflow.set_experiment(os.getenv("MLFLOW_EXPERIMENT_PATH", "/Users/contact.adams.souza@gmail.com/Matching_Experiment"))


vectorizer_new = mlflow.sklearn.load_model("models:/workspace.default.tfidfvectorizer@champion")
model = mlflow.pyfunc.load_model("models:/workspace.default.sentencetransformermodel@champion")

print(f"ENV MLFLOW_TRACKING_URI: {os.getenv('MLFLOW_TRACKING_URI')}")
print(f"Current MLflow tracking URI: {mlflow.get_tracking_uri()}")
print(f"Current MLflow registry URI: {mlflow.get_registry_uri()}")

def vagas_match(job_id:str, model:object=model, vectorizer:object = vectorizer_new, alpha:float=0.3, top_n:int=5, version:str="1.0"):
    """
    Analisa o corpus das vagas e associa aos top-n candidatos com mais adequação
    """
    try:
        # coletando dados do MongoDB
        job_doc = collection_vagas.find_one({"_id":str(job_id)}, {
                                            "_id": 1,
                                            "informacoes_basicas.cliente":1,
                                            "informacoes_basicas.titulo_vaga":1 ,
                                            "perfil_vaga.pais":1,
                                            "perfil_vaga.estado":1,
                                            "perfil_vaga.cidade":1,
                                            "perfil_vaga.nivel profissional":1,
                                            "perfil_vaga.nivel_academico" :1,
                                            "perfil_vaga.nivel_ingles":1,
                                            "perfil_vaga.nivel_espanhol":1,
                                            "perfil_vaga.areas_atuacao":1,
                                            "perfil_vaga.principais_atividades":1
                                            })

        if not job_doc:
            return []

        id = job_doc.get('_id', "")
        cliente = job_doc.get('informacoes_basicas', {}).get('cliente',"")
        titulo_vaga = job_doc.get('informacoes_basicas', {}).get('titulo_vaga', "")
        solicitante_cliente = job_doc.get('informacoes_basicas', {}).get("solicitante_cliente",""),
        pais = job_doc.get('perfil_vaga', {}).get("pais","")
        estado = job_doc.get('perfil_vaga', {}).get("estado","")
        cidade = job_doc.get('perfil_vaga', {}).get("cidade","")
        n_prof = job_doc.get('perfil_vaga', {}).get("nivel profissional","")
        n_acad = job_doc.get('perfil_vaga', {}).get("nivel_academico","")
        n_en = job_doc.get('perfil_vaga', {}).get("nivel_ingles","")
        n_es = job_doc.get('perfil_vaga', {}).get("nivel_espanhol","")
        area = job_doc.get('perfil_vaga', {}).get("areas_atuacao","")
        atividades = job_doc.get('perfil_vaga', {}).get("principais_atividades","")

        # corpus para embedding
        job_text = f"{id} {pais} {estado} {cidade} {n_prof} {n_acad} {n_en} {n_es} {area} {atividades}"

        # vectorização para tf-idf + consine similarity
        vectorized_job = vectorizer.transform([job_text])
        similar_job = cosine_similarity(cache_applicants["vectorized"], vectorized_job)

        # dicionario de similaridade tf-idf
        tfidf_scores = {
            str(cache_applicants["id_applicants"][i]): similar_job[i] for i in range(len(similar_job))
        }

        # vectorização do corpus para sentence transformer + Qdrant
        qdrant_vector_job = model.predict([job_text])[0].tolist()
        results = qdrant.query_points(collection_name="applicants", query=qdrant_vector_job, limit=100)

        # normalizando os scores
        tfidf_max = max(tfidf_scores.values()) if tfidf_scores else 1
        tfidf_scores = {k: v / tfidf_max for k, v in tfidf_scores.items()}

        qdrant_scores = {str(item.payload["_id"]):item.score for item in results.points}

        qdrant_max = max(qdrant_scores.values()) if qdrant_scores else 1
        qdrant_scores = {k: v / qdrant_max for k, v in qdrant_scores.items()}

        #combinando os scores
        combined_scores = []
        for _id in set(tfidf_scores) & set(qdrant_scores):
            combined = alpha * tfidf_scores[_id] + (1- alpha) * qdrant_scores[_id]
            combined_scores.append((_id,combined.item()))

        # sorting and display os scores combinados
        combined_scores.sort(key=lambda x: x[1], reverse=True)
        top_matches = combined_scores[:top_n]

        # Log parameters
        if mlflow.active_run() is None:
            with mlflow.start_run(run_name="Vagas Match - SUCCESS") as run:
                mlflow.log_param("job_id", job_id)
                mlflow.log_param("job_id", job_id)
                mlflow.log_param("alpha", alpha)
                mlflow.log_param("top_n", top_n)
                mlflow.log_param("version", version)

                # Log metrics
                mlflow.log_metric("tfidf_max", tfidf_max.item() if hasattr(tfidf_max, 'item') else tfidf_max)
                mlflow.log_metric("qdrant_max", qdrant_max)
                if combined_scores:
                    mlflow.log_metric("combined_max", max(combined_scores, key=lambda x: x[1])[1])

                #log models
                #mlflow.sklearn.log_model(sk_model=vectorizer_new, name="TFIDFVectorizer")
                #mlflow.sentence_transformers.log_model(model=model, name="SentenceTransformerModel")

                # Set tags
                mlflow.set_tag("status", "success")

        # payload do Qdrant
        qdrant_map = {
            str(p.payload["_id"]): p.payload for p in results.points
        }

        # resposta
        vaga_info = {
            "_id": id,
            "titulo_vaga": titulo_vaga,
            "cliente": cliente,
            "solicitante_cliente": solicitante_cliente,
        }

        top_applicants = []
        for _id, score in top_matches:
            payload = qdrant_map.get(_id, {})
            top_applicants.append({
                "_id" : _id,
                "nome" : payload.get("nome", ""),
                "email" : payload.get("email", ""),
                "telefone":payload.get("telefone",""),
                "score" : round(score, 3)
            })

        output = [{"vaga":vaga_info, "top_applicants":top_applicants}]
        return output

    except Exception as e:
        with mlflow.start_run(run_name="Vagas Match - FAILED") as run:
            mlflow.log_param("job_id", job_id)
            mlflow.log_param("alpha", alpha)
            mlflow.log_param("top_n", top_n)
            mlflow.log_param("version", version)
            mlflow.set_tag("status", "failed")
            mlflow.set_tag("error_message", str(e))
            mlflow.set_tag("error_traceback", traceback.format_exc())
        raise e

def applicants_match(applicant_id:str, model:object=model, vectorizer:object=vectorizer_new, alpha:float=0.3, top_n:int=5, version:str="1.0"):
    """
    Analisa o corpus dos candidatos e os associa às top-n vagas com mais adequação
    """
    try:
        # coletando dados do MongoDB
        applicants_doc = collection_applicants.find_one({"_id":str(applicant_id)}, {
                                                        "_id": 1,
                                                        "infos_basicas.nome":1,
                                                        "infos_basicas.email":1,
                                                        "infos_basicas.telefone":1,
                                                        "informacoes_profissionais.certificacoes":1,
                                                        "formacao_e_idiomas.nivel_ingles": 1,
                                                        "formacao_e_idiomas.nivel_espanhol":1,
                                                        "cv_pt":1
                                                        })

        if not applicants_doc:
            return []

        id = applicants_doc.get('_id', "")
        nome = applicants_doc.get('infos_basicas', {}).get("nome","")
        email = applicants_doc.get('infos_basicas', {}).get("email","")
        telefone = applicants_doc.get('infos_basicas', {}).get("telefone", "")
        certification = applicants_doc.get('informacoes_profissionais', {}).get("certificacoes","")
        education_en = applicants_doc.get('formacao_e_idiomas', {}).get("nivel_ingles","")
        education_es = applicants_doc.get('formacao_e_idiomas', {}).get("nivel_espanhol","")
        cv = applicants_doc.get('cv_pt', "")

        # corpus para embedding
        applicant_text = f"{certification} {education_en} {education_es} {cv}"

        # vectorização para tf-idf + consine similarity
        vectorized_applicant = vectorizer.transform([applicant_text])
        similar_applicant = cosine_similarity(cache_vagas["vectorized"], vectorized_applicant)

        # dicionario de similaridade tf-idf
        tfidf_scores = {
            str(cache_vagas["id_vagas"][i]):similar_applicant[i] for i in range(len(similar_applicant))
        }

        # vectorização do corpus para sentence transformer + Qdrant
        qdrant_vector_applicant = model.predict([applicant_text])[0].tolist()
        results = qdrant.query_points(collection_name="vagas", query=qdrant_vector_applicant, limit=100)

        # normalizando os scores
        tfidf_max = max(tfidf_scores.values()) if tfidf_scores else 1
        tfidf_scores = {k: v / tfidf_max for k, v in tfidf_scores.items()}

        qdrant_scores = {str(item.payload["_id"]):item.score for item in results.points}

        qdrant_max = max(qdrant_scores.values()) if qdrant_scores else 1
        qdrant_scores = {k: v / qdrant_max for k, v in qdrant_scores.items()}

        #combinando os scores
        combined_scores = []
        for _id in set(tfidf_scores) & set(qdrant_scores):
            combined = alpha * tfidf_scores[_id] + (1- alpha) * qdrant_scores[_id]
            combined_scores.append((_id,combined.item()))

        # sorting and display os scores combinados
        combined_scores.sort(key=lambda x: x[1], reverse=True)
        top_matches = combined_scores[:top_n]

        # tracking com MLFlow
        if mlflow.active_run() is None:
            with mlflow.start_run(run_name="Applicants Match - SUCCESS") as run:
            # Log parameters
                mlflow.log_param("applicant_id", applicant_id)
                mlflow.log_param("alpha", alpha)
                mlflow.log_param("top_n", top_n)
                mlflow.log_param("version", version)
                #mlflow.log_param("model_name_qdrant",model)
                #mlflow.log_param("model_name_tfidf",vectorizer)


                # Log metrics
                mlflow.log_metric("tfidf_max", tfidf_max.item() if hasattr(tfidf_max, 'item') else tfidf_max)
                mlflow.log_metric("qdrant_max", qdrant_max)
                if combined_scores:
                    mlflow.log_metric("combined_max", max(combined_scores, key=lambda x: x[1])[1])
                
                #log models
                #mlflow.sklearn.log_model(sk_model=vectorizer_new, name="TFIDFVectorizer")
                #mlflow.sentence_transformers.log_model(model=model, name="SentenceTransformerModel")

                # Set tags
                mlflow.set_tag("status", "success")

        # payload do Qdrant
        qdrant_map = {
            str(p.payload["_id"]): p.payload for p in results.points
        }

        # resposta
        applicant_info = {
                "_id" : id,
                "nome" : nome,
                "email" : email,
                "telefone":telefone,
        }

        top_vagas = []
        for _id, score in top_matches:
            payload = qdrant_map.get(_id, {})
            top_vagas.append({
                 "_id": _id,
                "titulo_vaga": payload.get("titulo_vaga", ""),
                "cliente": payload.get("cliente", ""),
                "score": round(score, 3)
            })

        output = [{"applicant":applicant_info, "top_vagas":top_vagas}]
        return output

    except Exception as e:
        with mlflow.start_run(run_name="Applicants Match - FAILED") as run:
            mlflow.log_param("applicant_id", applicant_id)
            mlflow.log_param("alpha", alpha)
            mlflow.log_param("top_n", top_n)
            mlflow.log_param("version", version)
            mlflow.set_tag("status", "failed")
            mlflow.set_tag("error_message", str(e))
            mlflow.set_tag("error_traceback", traceback.format_exc())
        raise e

print(f"MLflow tracking URI: {mlflow.get_tracking_uri()}")
print(f"MLflow registry URI: {mlflow.get_registry_uri()}")

# Add this debug line to your matching_functions.py:


  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 5/5 [00:06<00:00,  1.30s/it]
Downloading artifacts: 100%|██████████| 5/5 [00:05<00:00,  1.11s/it]
Downloading artifacts:  28%|██▊       | 5/18 [00:01<00:04,  2.61it/s]

[A
Downloading artifacts:  89%|████████▉ | 16/18 [00:24<00:06,  3.27s/it]
[A
[A
[A
Downloading C:\Users\adams\AppData\Local\Temp\tmp7ntyj8_y\artifacts/distiluse-base-multilingual-cased-v1/model.safetensors:  78%|███████▊  | 400M/514M [02:13<00:37, 3.15MiB/s]

[A
[A
[A
Downloading C:\Users\adams\AppData\Local\Temp\tmp7ntyj8_y\python_model.pkl:  97%|█████████▋| 500M/517M [02:17<00:04, 3.81MiB/s]
Downloading artifacts: 100%|██████████| 18/18 [03:36<00:00, 12.05s/it]


ENV MLFLOW_TRACKING_URI: databricks
Current MLflow tracking URI: databricks
Current MLflow registry URI: databricks-uc
MLflow tracking URI: databricks
MLflow registry URI: databricks-uc


In [2]:
import os
import mlflow
from dotenv import load_dotenv

load_dotenv()
print("track uri")
mlflow.set_tracking_uri("databricks")
print("registry uri")
mlflow.set_registry_uri("databricks-uc")
print("tokens")
os.environ["DATABRICKS_TOKEN"] = os.getenv("DATABRICKS_TOKEN")
#mlflow.set_experiment(os.getenv("MLFLOW_EXPERIMENT_PATH"))
print("experiment")
mlflow.set_experiment(os.getenv("MLFLOW_EXPERIMENT_PATH", "/Users/contact.adams.souza@gmail.com/Matching_Experiment"))

track uri
registry uri
tokens
experiment


<Experiment: artifact_location='dbfs:/databricks/mlflow-tracking/857129996570473', creation_time=1753270783714, experiment_id='857129996570473', last_update_time=1753297708867, lifecycle_stage='active', name='/Users/contact.adams.souza@gmail.com/Matching_Experiment', tags={'mlflow.databricks.filesystem.experiment_permissions_check': 'test',
 'mlflow.experiment.sourceName': '/Users/contact.adams.souza@gmail.com/Matching_Experiment',
 'mlflow.experimentKind': 'custom_model_development',
 'mlflow.experimentType': 'MLFLOW_EXPERIMENT',
 'mlflow.ownerEmail': 'contact.adams.souza@gmail.com',
 'mlflow.ownerId': '70444113622605'}>

In [5]:
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

import mlflow
import time
from sklearn.metrics.pairwise import cosine_similarity
from app.collections_mongo import collection_applicants, collection_vagas
from app.collection_qdrant import qdrant
from app.tf_idf_cache import cache_applicants, cache_vagas

from dotenv import load_dotenv


# with open("app\\vectorizer.pkl", "rb") as f:
#     vectorizer_new = pickle.load(f)

# model = SentenceTransformer("distiluse-base-multilingual-cased-v1")

# === Load environment variables from .env ===
load_dotenv()
mlflow.set_tracking_uri("databricks")
mlflow.set_registry_uri("databricks-uc")
os.environ["DATABRICKS_TOKEN"] = os.getenv("DATABRICKS_TOKEN")
mlflow.set_experiment(os.getenv("MLFLOW_EXPERIMENT_PATH", "/Users/contact.adams.souza@gmail.com/Matching_Experiment"))

# Add a small delay to ensure authentication is fully established
time.sleep(5)

vectorizer_new = mlflow.sklearn.load_model("models:/workspace.default.tfidfvectorizer@champion")
model = mlflow.pyfunc.load_model("models:/workspace.default.sentencetransformermodel@champion")


def vagas_match(job_id:str, model:object=model, vectorizer:object = vectorizer_new, alpha:float=0.3, top_n:int=5, version:str="1.0"):
    """
    Analisa o corpus das vagas e associa aos top-n candidatos com mais adequação
    """
    try:
        # coletando dados do MongoDB
        job_doc = collection_vagas.find_one({"_id":str(job_id)}, {
                                            "_id": 1,
                                            "informacoes_basicas.cliente":1,
                                            "informacoes_basicas.titulo_vaga":1 ,
                                            "perfil_vaga.pais":1,
                                            "perfil_vaga.estado":1,
                                            "perfil_vaga.cidade":1,
                                            "perfil_vaga.nivel profissional":1,
                                            "perfil_vaga.nivel_academico" :1,
                                            "perfil_vaga.nivel_ingles":1,
                                            "perfil_vaga.nivel_espanhol":1,
                                            "perfil_vaga.areas_atuacao":1,
                                            "perfil_vaga.principais_atividades":1
                                            })

        if not job_doc:
            return []

        id = job_doc.get('_id', "")
        cliente = job_doc.get('informacoes_basicas', {}).get('cliente',"")
        titulo_vaga = job_doc.get('informacoes_basicas', {}).get('titulo_vaga', "")
        solicitante_cliente = job_doc.get('informacoes_basicas', {}).get("solicitante_cliente",""),
        pais = job_doc.get('perfil_vaga', {}).get("pais","")
        estado = job_doc.get('perfil_vaga', {}).get("estado","")
        cidade = job_doc.get('perfil_vaga', {}).get("cidade","")
        n_prof = job_doc.get('perfil_vaga', {}).get("nivel profissional","")
        n_acad = job_doc.get('perfil_vaga', {}).get("nivel_academico","")
        n_en = job_doc.get('perfil_vaga', {}).get("nivel_ingles","")
        n_es = job_doc.get('perfil_vaga', {}).get("nivel_espanhol","")
        area = job_doc.get('perfil_vaga', {}).get("areas_atuacao","")
        atividades = job_doc.get('perfil_vaga', {}).get("principais_atividades","")

        # corpus para embedding
        job_text = f"{id} {pais} {estado} {cidade} {n_prof} {n_acad} {n_en} {n_es} {area} {atividades}"

        # vectorização para tf-idf + consine similarity
        vectorized_job = vectorizer.transform([job_text])
        similar_job = cosine_similarity(cache_applicants["vectorized"], vectorized_job)

        # dicionario de similaridade tf-idf
        tfidf_scores = {
            str(cache_applicants["id_applicants"][i]): similar_job[i] for i in range(len(similar_job))
        }

        # vectorização do corpus para sentence transformer + Qdrant
        qdrant_vector_job = model.predict([job_text])[0].tolist()
        results = qdrant.query_points(collection_name="applicants", query=qdrant_vector_job, limit=100)

        # normalizando os scores
        tfidf_max = max(tfidf_scores.values()) if tfidf_scores else 1
        tfidf_scores = {k: v / tfidf_max for k, v in tfidf_scores.items()}

        qdrant_scores = {str(item.payload["_id"]):item.score for item in results.points}

        qdrant_max = max(qdrant_scores.values()) if qdrant_scores else 1
        qdrant_scores = {k: v / qdrant_max for k, v in qdrant_scores.items()}

        #combinando os scores
        combined_scores = []
        for _id in set(tfidf_scores) & set(qdrant_scores):
            combined = alpha * tfidf_scores[_id] + (1- alpha) * qdrant_scores[_id]
            combined_scores.append((_id,combined.item()))

        # sorting and display os scores combinados
        combined_scores.sort(key=lambda x: x[1], reverse=True)
        top_matches = combined_scores[:top_n]

        # Log parameters
        if mlflow.active_run() is None:
            with mlflow.start_run(run_name="Vagas Match - SUCCESS") as run:
                mlflow.log_param("job_id", job_id)
                mlflow.log_param("job_id", job_id)
                mlflow.log_param("alpha", alpha)
                mlflow.log_param("top_n", top_n)
                mlflow.log_param("version", version)

                # Log metrics
                mlflow.log_metric("tfidf_max", tfidf_max.item() if hasattr(tfidf_max, 'item') else tfidf_max)
                mlflow.log_metric("qdrant_max", qdrant_max)
                if combined_scores:
                    mlflow.log_metric("combined_max", max(combined_scores, key=lambda x: x[1])[1])

                #log models
                #mlflow.sklearn.log_model(sk_model=vectorizer_new, name="TFIDFVectorizer")
                #mlflow.sentence_transformers.log_model(model=model, name="SentenceTransformerModel")

                # Set tags
                mlflow.set_tag("status", "success")

        # payload do Qdrant
        qdrant_map = {
            str(p.payload["_id"]): p.payload for p in results.points
        }

        # resposta
        vaga_info = {
            "_id": id,
            "titulo_vaga": titulo_vaga,
            "cliente": cliente,
            "solicitante_cliente": solicitante_cliente,
        }

        top_applicants = []
        for _id, score in top_matches:
            payload = qdrant_map.get(_id, {})
            top_applicants.append({
                "_id" : _id,
                "nome" : payload.get("nome", ""),
                "email" : payload.get("email", ""),
                "telefone":payload.get("telefone",""),
                "score" : round(score, 3)
            })

        output = [{"vaga":vaga_info, "top_applicants":top_applicants}]
        return output

    except Exception as e:
        with mlflow.start_run(run_name="Vagas Match - FAILED") as run:
            mlflow.log_param("job_id", job_id)
            mlflow.log_param("alpha", alpha)
            mlflow.log_param("top_n", top_n)
            mlflow.log_param("version", version)
            mlflow.set_tag("status", "failed")
            mlflow.set_tag("error_message", str(e))
            mlflow.set_tag("error_traceback", traceback.format_exc())
        raise e

def applicants_match(applicant_id:str, model:object=model, vectorizer:object=vectorizer_new, alpha:float=0.3, top_n:int=5, version:str="1.0"):
    """
    Analisa o corpus dos candidatos e os associa às top-n vagas com mais adequação
    """
    try:
        # coletando dados do MongoDB
        applicants_doc = collection_applicants.find_one({"_id":str(applicant_id)}, {
                                                        "_id": 1,
                                                        "infos_basicas.nome":1,
                                                        "infos_basicas.email":1,
                                                        "infos_basicas.telefone":1,
                                                        "informacoes_profissionais.certificacoes":1,
                                                        "formacao_e_idiomas.nivel_ingles": 1,
                                                        "formacao_e_idiomas.nivel_espanhol":1,
                                                        "cv_pt":1
                                                        })

        if not applicants_doc:
            return []

        id = applicants_doc.get('_id', "")
        nome = applicants_doc.get('infos_basicas', {}).get("nome","")
        email = applicants_doc.get('infos_basicas', {}).get("email","")
        telefone = applicants_doc.get('infos_basicas', {}).get("telefone", "")
        certification = applicants_doc.get('informacoes_profissionais', {}).get("certificacoes","")
        education_en = applicants_doc.get('formacao_e_idiomas', {}).get("nivel_ingles","")
        education_es = applicants_doc.get('formacao_e_idiomas', {}).get("nivel_espanhol","")
        cv = applicants_doc.get('cv_pt', "")

        # corpus para embedding
        applicant_text = f"{certification} {education_en} {education_es} {cv}"

        # vectorização para tf-idf + consine similarity
        vectorized_applicant = vectorizer.transform([applicant_text])
        similar_applicant = cosine_similarity(cache_vagas["vectorized"], vectorized_applicant)

        # dicionario de similaridade tf-idf
        tfidf_scores = {
            str(cache_vagas["id_vagas"][i]):similar_applicant[i] for i in range(len(similar_applicant))
        }

        # vectorização do corpus para sentence transformer + Qdrant
        qdrant_vector_applicant = model.predict([applicant_text])[0].tolist()
        results = qdrant.query_points(collection_name="vagas", query=qdrant_vector_applicant, limit=100)

        # normalizando os scores
        tfidf_max = max(tfidf_scores.values()) if tfidf_scores else 1
        tfidf_scores = {k: v / tfidf_max for k, v in tfidf_scores.items()}

        qdrant_scores = {str(item.payload["_id"]):item.score for item in results.points}

        qdrant_max = max(qdrant_scores.values()) if qdrant_scores else 1
        qdrant_scores = {k: v / qdrant_max for k, v in qdrant_scores.items()}

        #combinando os scores
        combined_scores = []
        for _id in set(tfidf_scores) & set(qdrant_scores):
            combined = alpha * tfidf_scores[_id] + (1- alpha) * qdrant_scores[_id]
            combined_scores.append((_id,combined.item()))

        # sorting and display os scores combinados
        combined_scores.sort(key=lambda x: x[1], reverse=True)
        top_matches = combined_scores[:top_n]

        # tracking com MLFlow
        if mlflow.active_run() is None:
            with mlflow.start_run(run_name="Applicants Match - SUCCESS") as run:
            # Log parameters
                mlflow.log_param("applicant_id", applicant_id)
                mlflow.log_param("alpha", alpha)
                mlflow.log_param("top_n", top_n)
                mlflow.log_param("version", version)
                #mlflow.log_param("model_name_qdrant",model)
                #mlflow.log_param("model_name_tfidf",vectorizer)


                # Log metrics
                mlflow.log_metric("tfidf_max", tfidf_max.item() if hasattr(tfidf_max, 'item') else tfidf_max)
                mlflow.log_metric("qdrant_max", qdrant_max)
                if combined_scores:
                    mlflow.log_metric("combined_max", max(combined_scores, key=lambda x: x[1])[1])
                
                #log models
                #mlflow.sklearn.log_model(sk_model=vectorizer_new, name="TFIDFVectorizer")
                #mlflow.sentence_transformers.log_model(model=model, name="SentenceTransformerModel")

                # Set tags
                mlflow.set_tag("status", "success")

        # payload do Qdrant
        qdrant_map = {
            str(p.payload["_id"]): p.payload for p in results.points
        }

        # resposta
        applicant_info = {
                "_id" : id,
                "nome" : nome,
                "email" : email,
                "telefone":telefone,
        }

        top_vagas = []
        for _id, score in top_matches:
            payload = qdrant_map.get(_id, {})
            top_vagas.append({
                 "_id": _id,
                "titulo_vaga": payload.get("titulo_vaga", ""),
                "cliente": payload.get("cliente", ""),
                "score": round(score, 3)
            })

        output = [{"applicant":applicant_info, "top_vagas":top_vagas}]
        return output

    except Exception as e:
        with mlflow.start_run(run_name="Applicants Match - FAILED") as run:
            mlflow.log_param("applicant_id", applicant_id)
            mlflow.log_param("alpha", alpha)
            mlflow.log_param("top_n", top_n)
            mlflow.log_param("version", version)
            mlflow.set_tag("status", "failed")
            mlflow.set_tag("error_message", str(e))
            mlflow.set_tag("error_traceback", traceback.format_exc())
        raise e


NameError: name '__file__' is not defined