In [1]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise.prediction_algorithms import KNNBasic
from surprise import accuracy

#Cargar los datos de los archivos CSVs
jobs_df = pd.read_csv('jobs.csv',sep='\t')
ratings_df = pd.read_csv('ratings_section.csv')
sections_df = pd.read_csv('section.csv')

# Crear todas las combinaciones de secciones y trabajos
all_combinations = pd.MultiIndex.from_product([sections_df['id'], jobs_df['Jobid']], names=['sectionId', 'Jobid'])
all_combinations_df = pd.DataFrame(index=all_combinations).reset_index()

# Unir las combinaciones con los ratings
merged_df = all_combinations_df.merge(ratings_df, on='sectionId', how='left')

# Unir las combinaciones con los nombres de sección
merged_df = merged_df.merge(sections_df, left_on='sectionId', right_on='id', how='left')

# Unir las combinaciones con los datos de los trabajos
merged_df = merged_df.merge(jobs_df, left_on='Jobid', right_on='Jobid', how='left')

# Eliminar filas con valores NaN
merged_df.dropna(subset=['rating'], inplace=True)

#Preparar los datos para Surprise
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(merged_df[['sectionname', 'Description', 'rating']], reader)
trainset, testset = train_test_split(data,test_size=0.2)

#Entrenar el modelo de filtro colaborativo (KNNBasic())
knn_model = KNNBasic()
knn_model.fit(trainset)

#Entrenar el modelo basado en contenido (SVD()):
content_model = SVD()
content_model.fit(trainset)

#Calcular la similitud de descripción de trabajo con los sections y sus ratings:
def calculate_similarity(row):
    sectionname = row['sectionname']
    description = row['Description']
    if sectionname is None or pd.isnull(description):
        return 0
    else:
        rating = row['rating']
        similarity = description.lower().count(sectionname.lower()) * rating
        #print(similarity)
        return similarity


#Realizar predicciones combinando los modelos:
predictions = []
max_rating = merged_df['rating'].max()
min_rating = merged_df['rating'].min()

for test_section, test_description, test_rating in testset:
    knn_pred = knn_model.predict(test_section, test_description, test_rating).est
    content_pred = content_model.predict(test_section, test_description, test_rating).est

    similarity_pred_content = calculate_similarity({'sectionname': test_section, 'Description': test_description, 'rating': content_pred})
    similarity_pred_knn = calculate_similarity({'sectionname': test_section, 'Description': test_description, 'rating': knn_pred})

    similarity_hybrid_pred = min((similarity_pred_content + similarity_pred_knn) / 2, 1.0)
    similarity_hybrid_pred = round(similarity_hybrid_pred, 1)

    # Ajustar similarity_hybrid_pred en función del rating de la sección
    section_rating = merged_df.loc[merged_df['sectionname'] == test_section, 'rating'].iloc[0]
    normalized_rating = (section_rating - min_rating) / (max_rating - min_rating)

    similarity_hybrid_pred *= normalized_rating

    predictions.append((test_section, test_description, test_rating, similarity_hybrid_pred))


df_predictions = pd.DataFrame(predictions, columns=['sectionname', 'Description', 'rating','similarity_pred'])

recommendations = merged_df[['Jobid', 'Jobname', 'URL', 'Location', 'Date', 'Company','Description']].merge(df_predictions, on='Description')
recommendations = recommendations.sort_values('similarity_pred', ascending=False)[['Jobname', 'URL', 'Location','Date', 'Company', 'similarity_pred']]
recommendations = recommendations.drop_duplicates(subset=['Jobname'])
print(recommendations)


Computing the msd similarity matrix...
Done computing similarity matrix.
                                                Jobname  \
0                          FullStack Developer (Remote)   
1741                      Especialista de Base de Datos   
2537  React.js/TypeScript Developer - Remote - Latin...   
346   Programador / Desarrollador Fullstack Jr. / TI...   
337   Programador / Desarrollador Fullstack Junior -...   
768       Analista Desarrollador de Backend- San Isidro   
1505                         Desarrollador Java Backend   
2716  PHP + Laravel + Vue.js/React.js Developer - Re...   
1127                                  Developer Backend   
2841                  FullStack Developer Netcore React   
2169                     Desarrollador Front End (Lima)   
1533            Analista Programador .Net Core/ Angular   
1169                                Fullstack developer   
3418                                      iOS Developer   
2580  Full Stack React.js Developer - Remo