In [1]:
import random 
import numpy as np 
import pandas as pd

from sklearn.preprocessing import MinMaxScaler
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate, train_test_split
from utils.transformation import *

VAR_SEED = 42
VAR_TESTSET_SIZE = 0.20
VAR_DIR_DATA_CLEAN = '../data/cleaning'

random.seed(VAR_SEED)
np.random.seed(VAR_SEED)

catalogo = pd.read_csv(f"{VAR_DIR_DATA_CLEAN}/catalogo.csv", sep=",", encoding="latin1")
mf_dataset = pd.read_csv(f"{VAR_DIR_DATA_CLEAN}/mf_dataset.csv", sep=",", encoding="latin1")

In [2]:
catalogo = calculate_ratio_of_interactions(mf_dataset, catalogo)

# Parámetros para el cálculo del puntaje
features = {"hito": (1, 4), "skill": (0, 15), "knowledge": (1, 15), "interaction_ratio": (0.0, 1.0)}
weights = {"hito": 0.3, "skill": 0.25, "knowledge": 0.25, "interaction_ratio": 0.2}

# Calcular el puntaje en el DataFrame
catalogo = calculate_score_dataset(catalogo, features, weights, new_column='formula')

scale_complexity_norm = MinMaxScaler()
catalogo['complexity_norm'] = scale_complexity_norm.fit_transform(catalogo[['complexity']])

scale_complexity12_norm = MinMaxScaler()
catalogo['complexity12_norm'] = scale_complexity12_norm.fit_transform(catalogo[['complexity12']])

df_catalogo = catalogo[['id_ejercicio', 'nombre', 'hito', 'skill', 'knowledge', 'complexity', 'complexity12', 'formula', 'complexity_norm', 'complexity12_norm']]


In [3]:
df_ratings = factorization_to_ratings(df_catalogo, mf_dataset, user_col='id_estudiante')
df_ratings.head(10)

Unnamed: 0,id_estudiante,id_ejercicio,nombre,hito,skill,knowledge,complexity,complexity12,formula,complexity_norm,complexity12_norm
0,0,0,CÃ¡lculo del dÃ­gito verificador del rut,1,1,7,23,279,0.281251,0.111111,0.022774
1,0,1,Calculadora GeomÃ©trica,2,1,1,17,529,0.188468,0.080808,0.281573
2,0,3,NÃºmeros Primos,1,0,3,3,259,0.177229,0.010101,0.00207
3,0,4,Nota Final,1,0,1,1,257,0.191384,0.0,0.0
4,0,6,Suma de los divisores de un nÃºmero,2,1,3,19,531,0.224705,0.090909,0.283644
5,0,10,Descomponer un nÃºmero,1,0,3,3,259,0.192111,0.010101,0.00207
6,0,17,Suma de los N primeros nÃºmeros naturales,1,0,2,2,258,0.210285,0.005051,0.001035
7,0,18,Juego Adivina mi nÃºmero,1,0,3,3,259,0.137281,0.010101,0.00207
8,0,22,Signo del ZodÃ­aco,1,1,1,17,273,0.186379,0.080808,0.016563
9,0,23,Conversor de Decimal a Binario,1,0,2,2,258,0.171904,0.005051,0.001035


In [4]:
# ESCALAS DE RANKING 
escala_01 = Reader(rating_scale=(0, 1))
escala_02 = Reader(rating_scale=(1, 255))
escala_03 = Reader(rating_scale=(257, 1279))

# CARGA DE DATOS
data_formula = Dataset.load_from_df(df_ratings[["id_estudiante", "id_ejercicio", "formula"]], escala_01)
data_complexity = Dataset.load_from_df(df_ratings[["id_estudiante", "id_ejercicio", "complexity"]], escala_02)
data_complexity12 = Dataset.load_from_df(df_ratings[["id_estudiante", "id_ejercicio", "complexity12"]], escala_03)

# MODELOS
benchmark_formula = evaluate_algorithms(data_formula)
benchmark_complexity = evaluate_algorithms(data_complexity)
benchmark_complexity12 = evaluate_algorithms(data_complexity12)

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...


In [5]:
benchmark_formula

Unnamed: 0_level_0,test_rmse,fit_time,test_time,Benchmark Score
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
KNNBasic,0.0003,0.059912,0.52802,3329.592781
KNNBaseline,0.000888,0.073279,0.622905,1125.818595
BaselineOnly,0.004507,0.006427,0.007498,221.896348
SVDpp,0.014767,0.144684,0.073302,67.718609
KNNWithMeans,0.016288,0.066406,0.553905,61.395262
KNNWithZScore,0.022494,0.088786,0.556033,44.455442
SVD,0.043166,0.077508,0.017368,23.166295
SlopeOne,0.046607,0.009262,0.027373,21.455796
NMF,0.061214,0.10166,0.011894,16.336101
NormalPredictor,0.063652,0.004642,0.008542,15.710406


In [6]:
benchmark_complexity

Unnamed: 0_level_0,test_rmse,fit_time,test_time,Benchmark Score
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
SlopeOne,0.329358,0.009477,0.026904,3.036211
KNNBasic,0.397606,0.057369,0.521493,2.515055
KNNBaseline,0.510691,0.063798,0.593154,1.958131
SVDpp,1.280093,0.154747,0.068623,0.781193
SVD,2.134187,0.07483,0.013593,0.468562
BaselineOnly,2.589034,0.006908,0.007604,0.386244
CoClustering,4.234634,0.108843,0.011728,0.236148
KNNWithMeans,4.35331,0.065992,0.582897,0.22971
KNNWithZScore,6.197959,0.089499,0.579491,0.161343
NMF,8.185855,0.103851,0.012528,0.122162


In [7]:
benchmark_complexity12

Unnamed: 0_level_0,test_rmse,fit_time,test_time,Benchmark Score
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
KNNBasic,2.730082,0.05893,0.523279,0.366289
SlopeOne,3.182421,0.010283,0.027197,0.314226
KNNBaseline,3.255119,0.062875,0.625481,0.307208
BaselineOnly,15.158602,0.007134,0.007366,0.065969
CoClustering,39.480485,0.128134,0.012438,0.025329
KNNWithMeans,40.769536,0.064886,0.553046,0.024528
KNNWithZScore,58.791083,0.093711,0.579623,0.017009
NMF,111.283343,0.106581,0.012194,0.008986
NormalPredictor,126.754524,0.004723,0.00966,0.007889
SVDpp,979.494335,0.160011,0.070281,0.001021


In [8]:
train_formula, test_formula = train_test_split(data_formula, test_size=VAR_TESTSET_SIZE, random_state=VAR_SEED)
train_complexity, test_complexity = train_test_split(data_complexity, test_size=VAR_TESTSET_SIZE, random_state=VAR_SEED)
train_complexity12, test_complexity12 = train_test_split(data_complexity12, test_size=VAR_TESTSET_SIZE, random_state=VAR_SEED)

In [9]:
from surprise import BaselineOnly, KNNBasic, KNNWithMeans, SVD, NMF, NormalPredictor, KNNBaseline, KNNWithZScore, SVDpp, CoClustering, SlopeOne

model1 = create_and_evaluate_model(train_formula, test_formula, CoClustering())
model2 = create_and_evaluate_model(train_complexity, test_complexity, NormalPredictor())
model3 = create_and_evaluate_model(train_complexity12, test_complexity12, SVD())

RMSE: 0.2000
MSE: 0.0400
MAE:  0.1951

RMSE: 14.6731
MSE: 215.2994
MAE:  11.0461

RMSE: 980.6927
MSE: 961758.1842
MAE:  975.6870



In [10]:
from utils.transformation import *

In [11]:
# 0 | 1305
estudiante_1 = 1305
print(f'Ejercicios realizados por el usuario [ {estudiante_1} ]')
df_catalogo[df_catalogo['id_ejercicio'].isin(df_ratings[df_ratings['id_estudiante'] == estudiante_1]['id_ejercicio'].values)].sort_values(by=["complexity"], ascending=True)

Ejercicios realizados por el usuario [ 1305 ]


Unnamed: 0,id_ejercicio,nombre,hito,skill,knowledge,complexity,complexity12,formula,complexity_norm,complexity12_norm
25,25,Ordenar tres nÃºmeros,1,0,1,1,257,0.190601,0.0,0.0
23,23,Conversor de Decimal a Binario,1,0,2,2,258,0.171904,0.005051,0.001035


In [12]:
recomendaciones_estudiante_1 = get_top_n_recommendations(model1, df_ratings, feature_users='id_estudiante', feature_items='id_ejercicio', user_id=estudiante_1)
print(f'Estudiante: {estudiante_1}')
df_catalogo[df_catalogo['id_ejercicio'].isin(recomendaciones_estudiante_1)]

Estudiante: 1305


Unnamed: 0,id_ejercicio,nombre,hito,skill,knowledge,complexity,complexity12,formula,complexity_norm,complexity12_norm
0,0,CÃ¡lculo del dÃ­gito verificador del rut,1,1,7,23,279,0.281251,0.111111,0.022774
1,1,Calculadora GeomÃ©trica,2,1,1,17,529,0.188468,0.080808,0.281573
2,2,Subsecuencias de ADN,2,3,7,55,567,0.261581,0.272727,0.320911
3,3,NÃºmeros Primos,1,0,3,3,259,0.177229,0.010101,0.00207
4,4,Nota Final,1,0,1,1,257,0.191384,0.0,0.0
6,6,Suma de los divisores de un nÃºmero,2,1,3,19,531,0.224705,0.090909,0.283644
7,7,Resolver un sistema de ecuaciones,1,0,1,1,257,0.050914,0.0,0.0
8,8,Distancia Levenshtein,2,3,7,55,567,0.262626,0.272727,0.320911
9,9,Validar Secuencias de ADN,2,2,7,39,551,0.251442,0.191919,0.304348
10,10,Descomponer un nÃºmero,1,0,3,3,259,0.192111,0.010101,0.00207


In [13]:
recomendaciones_estudiante_1 = get_top_n_recommendations(model2, df_ratings, feature_users='id_estudiante', feature_items='id_ejercicio', user_id=estudiante_1)
print(f'Estudiante: {estudiante_1}')
df_catalogo[df_catalogo['id_ejercicio'].isin(recomendaciones_estudiante_1)]

Estudiante: 1305


Unnamed: 0,id_ejercicio,nombre,hito,skill,knowledge,complexity,complexity12,formula,complexity_norm,complexity12_norm
0,0,CÃ¡lculo del dÃ­gito verificador del rut,1,1,7,23,279,0.281251,0.111111,0.022774
1,1,Calculadora GeomÃ©trica,2,1,1,17,529,0.188468,0.080808,0.281573
3,3,NÃºmeros Primos,1,0,3,3,259,0.177229,0.010101,0.00207
4,4,Nota Final,1,0,1,1,257,0.191384,0.0,0.0
6,6,Suma de los divisores de un nÃºmero,2,1,3,19,531,0.224705,0.090909,0.283644
13,13,Cajero AutomÃ¡tico Nivel 2,1,1,3,19,275,0.113216,0.090909,0.018634
21,21,El antipoema,2,2,7,39,551,0.250137,0.191919,0.304348
26,26,Contestador AutomÃ¡tico,1,0,3,3,259,0.212738,0.010101,0.00207
31,31,Carro de Compras,2,2,7,39,551,0.241521,0.191919,0.304348
37,37,Jerigonzo,2,2,7,39,551,0.288518,0.191919,0.304348


In [14]:
recomendaciones_estudiante_1 = get_top_n_recommendations(model3, df_ratings, feature_users='id_estudiante', feature_items='id_ejercicio', user_id=estudiante_1)
print(f'Estudiante: {estudiante_1}')
df_catalogo[df_catalogo['id_ejercicio'].isin(recomendaciones_estudiante_1)]

Estudiante: 1305


Unnamed: 0,id_ejercicio,nombre,hito,skill,knowledge,complexity,complexity12,formula,complexity_norm,complexity12_norm
0,0,CÃ¡lculo del dÃ­gito verificador del rut,1,1,7,23,279,0.281251,0.111111,0.022774
1,1,Calculadora GeomÃ©trica,2,1,1,17,529,0.188468,0.080808,0.281573
2,2,Subsecuencias de ADN,2,3,7,55,567,0.261581,0.272727,0.320911
3,3,NÃºmeros Primos,1,0,3,3,259,0.177229,0.010101,0.00207
4,4,Nota Final,1,0,1,1,257,0.191384,0.0,0.0
6,6,Suma de los divisores de un nÃºmero,2,1,3,19,531,0.224705,0.090909,0.283644
7,7,Resolver un sistema de ecuaciones,1,0,1,1,257,0.050914,0.0,0.0
8,8,Distancia Levenshtein,2,3,7,55,567,0.262626,0.272727,0.320911
9,9,Validar Secuencias de ADN,2,2,7,39,551,0.251442,0.191919,0.304348
10,10,Descomponer un nÃºmero,1,0,3,3,259,0.192111,0.010101,0.00207
