# 1. Imports and environment setup.

Purpose: lightweight environment initialization without embedding business logic.

In [None]:
import os
import pandas as pd
import numpy as np
from tfm.config import RAW_DIR, INTERIM_DIR, PROCESSED_DIR

# Planilla

In [None]:
planilla = pd.read_csv(os.path.join(RAW_DIR,"planilla.csv"))
#planilla.head()

In [None]:
planilla['fecha_ingreso'] = pd.to_datetime(planilla['fecha_ingreso'], format='%Y-%m-%d')
planilla = planilla.rename(columns={'numero_empleado': 'id', 
                                    'titulo': 'job_title', 
                                    'departamento':'department', 
                                    'fecha_ingreso':'start_date', 
                                    'modalidad_trabajo':'location',
                                    'salario_bruto_anual_eur':'salary'})

In [None]:
#planilla.head()

In [None]:
#planilla.info()

In [None]:
#planilla.describe()

In [None]:
#Extraer los diferentes titulos
job_title_df = pd.DataFrame(planilla['job_title'].unique())
job_title_df.columns = ['job_title']
#Agrega nueva columna ID
job_title_df.insert(0, 'id', range(1, len(job_title_df) + 1))
job_title_df.head(20)

In [None]:
#Extraer las diferentes modalidades
location_df = pd.DataFrame(planilla['location'].unique())
location_df.columns = ['location']
#Agrega nueva columna ID
location_df.insert(0, 'id', range(1, len(location_df) + 1))
location_df.head(20)

In [None]:
#Extraer las diferentes departamentos
department_df = pd.DataFrame(planilla['department'].unique())
department_df.columns = ['department']
#Agrega nueva columna ID
department_df.insert(0, 'id', range(1, len(department_df) + 1))
department_df.head(20)

In [None]:
job_map = job_title_df.set_index('job_title')['id'].to_dict()
job_map

In [None]:
location_map = location_df.set_index('location')['id'].to_dict()
location_map

In [None]:
department_map = department_df.set_index('department')['id'].to_dict()
department_map

In [None]:
planilla['job_title'] = planilla['job_title'].map(job_map)
planilla['location'] = planilla['location'].map(location_map)
planilla['department'] = planilla['department'].map(department_map)

In [None]:
#planilla.head(15)

In [None]:
job_title_df.to_csv(os.path.join(INTERIM_DIR,"job_titles.csv"), index=False)
location_df.to_csv(os.path.join(INTERIM_DIR,"locations.csv"), index=False)
department_df.to_csv(os.path.join(INTERIM_DIR,"departments.csv"), index=False)
planilla.to_csv(os.path.join(INTERIM_DIR,"employees.csv"), index=False)
#job_title_df.to_parquet(os.path.join(PROCESSED_DIR,"job_titles.parquet"), index=False)
#location_df.to_parquet(os.path.join(PROCESSED_DIR,"location.parquet"), index=False)
#department_df.to_parquet(os.path.join(PROCESSED_DIR,"departments.parquet"), index=False)
#planilla.to_parquet(os.path.join(PROCESSED_DIR,"employees.parquet"), index=False)

# CBI

In [None]:
cbi = pd.read_csv(os.path.join(RAW_DIR,"cbi_respuestas.csv"))
#cbi.head()

In [None]:
#cbi.info()

In [None]:
cbi = cbi.rename(columns={'numero_empleado': 'emp_id'})
cbi.rename(
    columns={f"cbi_item_{i}": f"cbi{i}" for i in range(1, 20)},
    inplace=True
)

In [None]:
#cbi.info()

In [None]:
respuestas = {
    'A menudo':1,
    'A veces':2,
    'Raramente':3,
    'Siempre':4,
    'Nunca':5
}

In [None]:
cols = cbi.loc[:, "cbi1":"cbi19"].columns
for col in cols:
    cbi[col] = (
        cbi[col]
            .map(respuestas)
            .astype("Int64")  # nullable integer
    )

In [None]:
cbi = cbi.drop(columns=['timestamp_respuesta'])
#cbi.head()


In [None]:
cbi.to_csv(os.path.join(INTERIM_DIR,"cbi_results.csv"), index=False)
#cbi.to_parquet(os.path.join(PROCESSED_DIR,"cbi_results.parquet"), index=False)

# Operativo

In [None]:
operativo = pd.read_csv(os.path.join(RAW_DIR,"datos_operativos.csv"))
#operativo.head()

In [None]:
#operativo.info()

In [None]:
operativo["mes"] = operativo["periodo"].str.split("-").str[1]
operativo["periodo"] = operativo["periodo"].str.split("-").str[0]

#cambiamos year de la ultima posicion a la posicion 2
col = operativo.columns[8]
operativo.insert(2, col, operativo.pop(col))

operativo["periodo"] = pd.to_numeric(operativo['periodo'], errors='coerce').astype('Int64')
operativo["mes"] = pd.to_numeric(operativo['mes'], errors='coerce').astype('Int64')

operativo = operativo.rename(columns={'numero_empleado': 'emp_id', 
                                    'periodo': 'year', 
                                    'mes':'month', 
                                    'horas_laborales_normales':'hours_normal', 
                                    'horas_extras':'hours_extra',
                                    'ausencias_justificadas':'absence',
                                    'ausencias_no_justificadas':'absence_unapprove',
                                    'llegadas_tardias':'lateness',
                                    'evaluacion_desempeno':'rating'})

#operativo.head()

In [None]:
rating = {
    'Outstanding':1,
    'Exceeds':2,
    'Meets':3,
    'Needs Improvement':4
}
rating_df = pd.DataFrame(
    list(rating.items()),
    columns=["description", "id"]
)

In [None]:
operativo['rating'] = operativo['rating'].map(rating)

In [None]:
#rating_df.head()

In [None]:
rating_df.to_csv(os.path.join(INTERIM_DIR,"rating.csv"), index=False)
operativo.to_csv(os.path.join(INTERIM_DIR,"history_operation.csv"), index=False)
#rating_df.to_parquet(os.path.join(PROCESSED_DIR,"rating.parquet"), index=False)
#operativo.to_parquet(os.path.join(PROCESSED_DIR,"history_operation.parquet"), index=False)

# Psicosociales

In [None]:
sociales = pd.read_csv(os.path.join(RAW_DIR,"factores_psicosociales.csv"))
#sociales.head()

In [None]:
sociales.rename(
    columns={"reconocimientos_desempeno_merito": "reconocimiento"},
    inplace=True
)

In [None]:
sociales["mes"] = sociales["periodo"].str.split("-").str[1]
sociales["periodo"] = sociales["periodo"].str.split("-").str[0]

#cambiamos year de la ultima posicion a la posicion 2
col = sociales.columns[6]
sociales.insert(2, col, sociales.pop(col))

sociales["periodo"] = pd.to_numeric(sociales['periodo'], errors='coerce').astype('Int64')
sociales["mes"] = pd.to_numeric(sociales['mes'], errors='coerce').astype('Int64')

sociales = sociales.rename(columns={'numero_empleado': 'emp_id', 
                                    'periodo': 'year', 
                                    'mes':'month', 
                                    'feedback_clientes':'feedback_clients', 
                                    'reconocimiento':'recognition',
                                    'apoyo_supervisor':'help_manager',
                                    'apoyo_equipo':'help_team'})

#sociales.head()

In [None]:
#sociales.info()

In [None]:
sociales.to_csv(os.path.join(INTERIM_DIR,"history_performance.csv"), index=False)
#sociales.to_parquet(os.path.join(PROCESSED_DIR,"history_performance.parquet"), index=False)