Importar las librerias



In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, LSTM, Dense, Concatenate
from tensorflow.keras.models import Model
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix

Cargar los archivos para la entrada de datos

In [None]:
workers_df = pd.read_csv('/content/sample_data/trabajadores_si.csv', sep=';')
tasks_df = pd.read_csv('/content/sample_data/requerimientos_si.csv', sep=';')
reparticion_df = pd.read_csv('/content/sample_data/reparticion_si.csv', sep=';')

Extraer los datos de los trabajadores y los requisitos

In [None]:
worker_skills = workers_df.iloc[:, 1:24].values
task_requirements = tasks_df.iloc[:, 1:24].values
reparticion_df = reparticion_df.set_index('Trabajador_ID')
reparticion = reparticion_df.values

Reshape de los datos

In [None]:
worker_skills = worker_skills.reshape((worker_skills.shape[0], worker_skills.shape[1], 1))
task_requirements = task_requirements.reshape((task_requirements.shape[0], task_requirements.shape[1], 1))
reparticion = reparticion.reshape((reparticion.shape[0], reparticion.shape[1], 1))

Crear modelo

In [None]:
input_worker = Input(shape=(worker_skills.shape[1], 1), name='input_worker')
input_task = Input(shape=(task_requirements.shape[1], 1), name='input_task')

lstm_layer_worker = LSTM(64, return_sequences=False)(input_worker)
lstm_layer_task = LSTM(64, return_sequences=False)(input_task)

Concatenar las salidas

In [None]:
concatenated = Concatenate()([lstm_layer_worker, lstm_layer_task])

output_layer = Dense(reparticion.shape[1], activation='sigmoid', name='output')(concatenated)

model = Model(inputs=[input_worker, input_task], outputs=output_layer)

Compilar el modelo

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

Pruebas de dimensiones

In [None]:
print("Dimensiones de worker_skills:", worker_skills.shape)
print("Dimensiones de task_requirements:", task_requirements.shape)
print("Dimensiones de reparticion después del reshape:", reparticion.shape)
print(reparticion[0, 0])  # Imprime el valor de reparticion en la posición (0, 0)
print(worker_skills[0, 0])  # Imprime el valor de worker_skills en la posición (0, 0)
print(task_requirements[0, 0])  # Imprime el valor de task_requirements en la posición (0, 0)

Entrenar el modelo con las reparticiones

In [None]:
model.fit([worker_skills, task_requirements], reparticion, epochs=10, batch_size=32)

Ahora se tomara las medidas del modelo

Cargar datos de prueba

In [None]:
# Cargar datos de prueba adicionales
workers_test_additional_df = pd.read_csv('/content/sample_data/trabajadores_si.csv', sep=';')
tasks_test_additional_df = pd.read_csv('/content/sample_data/requerimientos_si_t.csv', sep=';')
reparticion_test_additional_df = pd.read_csv('/content/sample_data/reparticion_si_t.csv', sep=';')

# Extraer las columnas de habilidades de trabajadores y requisitos de tareas de prueba adicional
worker_skills_test_additional = workers_test_additional_df.iloc[:, 1:24].values
task_requirements_test_additional = tasks_test_additional_df.iloc[:, 1:24].values
reparticion_test_additional = reparticion_test_additional_df['Reparticion'].values.reshape((reparticion_test_additional_df.shape[0], 1))

# Reshape de los datos de prueba adicional
worker_skills_test_additional = worker_skills_test_additional.reshape((worker_skills_test_additional.shape[0], worker_skills_test_additional.shape[1], 1))
task_requirements_test_additional = task_requirements_test_additional.reshape((task_requirements_test_additional.shape[0], task_requirements_test_additional.shape[1], 1))
reparticion_test_additional = reparticion_test_additional_df['Reparticion'].values.reshape((reparticion_test_additional_df.shape[0], 1))

Predicciones y Ajustando a Binario

In [None]:
# Obtener las predicciones del modelo en el conjunto de prueba
predictions = model.predict([worker_skills_test_additional, task_requirements_test_additional])

# Ajustar las predicciones al formato binario (0 o 1)
binary_predictions = (predictions > 0.5).astype(int)

Acurracy, ROC y Matriz de Confusion

In [None]:
# Calcular la precisión
accuracy = accuracy_score(reparticion_test, binary_predictions)
print(f'Accuracy: {accuracy}')

# Calcular el área bajo la curva ROC
roc_auc = roc_auc_score(reparticion_test, predictions)
print(f'AUC-ROC: {roc_auc}')

# Calcular la matriz de confusión
conf_matrix = confusion_matrix(reparticion_test, binary_predictions)
print('Confusion Matrix:')
print(conf_matrix)

Ahora se mostrara que trabajadores son los aptos para el proyecto usando el modelo dado

In [None]:
# Cargar datos de trabajadores y requerimientos para predecir
workers_to_predict_df = pd.read_csv('workers_to_predict.csv', sep=';')
requirements_to_predict_df = pd.read_csv('requirements_to_predict.csv', sep=';')

# Extraer las columnas de habilidades de trabajadores y requisitos de tareas para predecir
worker_skills_to_predict = workers_to_predict_df.iloc[:, 1:24].values
requirements_to_predict = requirements_to_predict_df.iloc[:, 1:24].values

# Reshape de los datos para que coincidan con las dimensiones del modelo
worker_skills_to_predict = worker_skills_to_predict.reshape((worker_skills_to_predict.shape[0], worker_skills_to_predict.shape[1], 1))
requirements_to_predict = requirements_to_predict.reshape((requirements_to_predict.shape[0], requirements_to_predict.shape[1], 1))

# Realizar predicciones
predictions = model.predict([worker_skills_to_predict, requirements_to_predict])

# Filtrar trabajadores que superan un umbral de confianza (ejemplo: 0.5)
threshold = 0.5
selected_workers = workers_to_predict_df[predictions > threshold]

# Mostrar los trabajadores seleccionados
print("Trabajadores seleccionados:")
print(selected_workers)