In [3]:
import pandas as pd 
import numpy as np

In [None]:
from backend.utils.encode_latlon import GeoFourierEncoder
import torch
import os

In [None]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
MODEL_PATH = os.path.join(BASE_DIR, "model", "nn.pt")


In [11]:
D = 8
scales_km = (1200, 400, 150, 50)
seed = 0

enc = GeoFourierEncoder(D=D, scales_km=scales_km, seed=seed)
enc.B = np.load("./backend/utils/geo_B.npy") 

In [12]:
lat = 4.641518
lon = -74.062047
user_feat = enc.transform([[lat, lon]])
user_feat

array([[ 0.21255764, -0.47415477,  0.38863727,  0.27267995,  0.4525696 ,
         0.15867338, -0.31458077,  0.419101  ]], dtype=float32)

In [None]:
from backend.utils.chunker import chunker
from sqlalchemy import Column, BigInteger, Text, Numeric, Boolean, Float
from sqlalchemy.dialects.postgresql import ARRAY
from transformers import AutoTokenizer
import os 

class VacancyDB(Base):
    __tablename__ = "vacancies"

    id = Column(BigInteger, primary_key=True, index=True)
    title = Column(Text, nullable=False)
    description = Column(Text, nullable=False)
    salary = Column(Numeric(12, 2))
    skills = Column(ARRAY(Text))
    sectors = Column(ARRAY(Text))
    lat = Column(Float)
    lon = Column(Float)
    remote = Column(Boolean, default=False)
    embedding = Column(ARRAY(Float))  # embedding de la vacante
class CandidateDB(Base):
    __tablename__ = "candidates"

    id = Column(BigInteger, primary_key=True, index=True)
    title = Column(Text, nullable=False)         # rol del candidato
    experiences = Column(Text, nullable=False)   # texto de experiencia/CV
    salary = Column(Numeric(12, 2))
    skills = Column(ARRAY(Text))
    sectors = Column(ARRAY(Text))
    lat = Column(Float)
    lon = Column(Float)
    remote = Column(Boolean, default=False)
    embedding = Column(ARRAY(Float))  # embedding del candidato

In [None]:
def build_fourier(object:  VacancyDB | CandidateDB):
     lat = object.lat
     lon = object.lon
     D = 8
     scales_km = (1200, 400, 150, 50)
     seed = 0

     enc = GeoFourierEncoder(D=D, scales_km=scales_km, seed=seed)
     enc.B = np.load("./backend/utils/geo_B.npy") 
     enc_loc = enc.transform([[lat, lon]])
     return enc_loc


def coalesce_list(val):
    if isinstance(val, list):
        return [str(x) for x in val if x not in (None, "", float("nan"))]
    if pd.isna(val):
        return []
    return [str(val)]

def format_section(label, values):
    values = coalesce_list(values)
    return f"{label}: " + ", ".join(values) if values else ""

def build_text_candidate(candidate:CandidateDB):
    columnas = {'experience_descriptions':" experiencia", 'skill_names':" habilidades", 'sector_names':" sectores"}
    candidate["full_text"] = candidate["candidate_description"]
    for col in columnas: 
        col_ = candidate[col].apply(lambda x: format_section(columnas[col], x))
        candidate["full_text"] += col_
    candidate["full_text"] = candidate["full_text"] + candidate["candidate_salary"].apply(lambda x: " salario: " + str(x))
    return candidate

def build_vacant_text(vacancy:VacancyDB):
        columnas = {'skill_names':" habilidades", 'sector_names':" sectores"}

        for col in columnas:
              vacant_text[col] = vacant_text[col].fillna("") 
              print(col)
              col_ = vacant_text[col].apply(lambda x: format_section(columnas[col], x) if len(x)>0 else "")
              vacant_text["full_text"] += col_
        vacant_text["full_text"] = vacant_text["full_text"] + vacant_text["min_salary"].apply(lambda x: " salario: " + str(x)) 
        vacant_text= vacant_text["full_text"]


        return vacant_text

def create_fourier_vacants(features,remote:int):
     features = np.concat(features, remote)
     return features
def load_model() -> SiameseTwoTower:
     global _model
     if _model is None:
         if not os.path.exists(MODEL_PATH):
             raise RuntimeError(f"Modelo nn.pt no encontrado en {MODEL_PATH}")
         m = torch.load(MODEL_PATH, map_location=DEVICE)
         m.eval()
         m.to(DEVICE)
         _model = m
     return _model

def compute_affinity(candidate:CandidateDB, vacancy:VacancyDB):
     job_text = build_vacant_text(vacancy)
     cand_text = build_text_candidate(candidate)
     texts = {"job": job_text, "cand": cand_text}
     tokenizer = AutoTokenizer.from_pretrained(os.getenv("model_name"), use_fast=False)

     chunks = chunker(texts, tokenizer)
     vac_fou = build_fourier(vacancy)
     cand_fou = build_fourier(candidate)
     vac_fou = create_fourier_vacants(vac_fou, vacancy.remote)

     model = load_model( )
     job_input_ids = chunks["job_input_ids"].unsqueeze(1)
     job_attention_mask = chunks["job_attention_mask"].unsqueeze(1)
     cand_input_ids = chunks["cand_input_ids"].unsqueeze(1)
     cand_attention_mask = chunks["cand_attention_mask"].unsqueeze(1)
     batch = {
        "job_input_ids": job_input_ids.to(DEVICE),
        "job_attention_mask": job_attention_mask.to(DEVICE),
        "cand_input_ids": cand_input_ids.to(DEVICE),
        "cand_attention_mask": cand_attention_mask.to(DEVICE),
        "vac_loc_fourier": vac_fou.to(DEVICE),
        "cand_loc_fourier": cand_fou.to(DEVICE),
    }

     with torch.no_grad():
         z_job, z_cand, logit_scale = model(batch)
         logits = (z_job * z_cand).sum(dim=-1) * logit_scale
         prob = torch.sigmoid(logits)[0].item()

     return float(prob) 

     
     

