# LIBRERIAS

In [1]:
import random 
import numpy as np 
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from surprise import Dataset, Reader, accuracy, BaselineOnly, SVD
from surprise.model_selection import cross_validate, train_test_split

# VARIABLES
MY_SEED = 42
TEST_SIZE = 0.20
DIR_DATA_LIMPIA = '../datos/data-limpia'

# SEMILLA
random.seed(MY_SEED)
np.random.seed(MY_SEED)

# IMPORTAR DATA
catalogo = pd.read_csv(f"{DIR_DATA_LIMPIA}/catalogo.csv", sep=",", encoding="latin1")
mf_dataset = pd.read_csv(f"{DIR_DATA_LIMPIA}/mf_dataset.csv", sep=",", encoding="latin1")

# DATOS

In [2]:
catalogo = catalogo[['oid', 'nombre', 'hito', 'skill', 'knowledge', 'complexity']]
df_catalogo = catalogo.copy()
catalogo.head()

Unnamed: 0,oid,nombre,hito,skill,knowledge,complexity
0,0973dae0e1b74ab8baa8d94339ee3ae6,CÃ¡lculo del dÃ­gito verificador del rut,1,1,7,23
1,16f619db31204ded9418136c4587ddd8,Calculadora GeomÃ©trica,2,1,1,17
2,17022c9ceac94ec5b2e7bc934c7b2d6f,Subsecuencias de ADN,2,3,7,55
3,171b5e86d4fb47268f2692587fbec073,NÃºmeros Primos,1,0,3,3
4,2437df93d3f44a87b00834072aeb1ab0,Nota Final,1,0,1,1


In [3]:
ratio_de_ejercicios = mf_dataset.sum().values.tolist()[1:]
resultados = [ n / len(mf_dataset) for n in ratio_de_ejercicios ]
max_valor = max(resultados)
indice_valor = resultados.index(max_valor)

print(f'Cantidad de registros de alumnos: {len(mf_dataset)}')
print(f'Cantidad de veces hechos (Ejercicios): {ratio_de_ejercicios}')
print(f'Porcentaje de realizacion del ejercicio: {resultados}')
print()
print(f'Indice del ejercicio: {indice_valor}\nCantidad del Ratio: {ratio_de_ejercicios[indice_valor]}\nRatio Max %: {max_valor}\n')

catalogo['ratio'] = resultados
catalogo.head()

Cantidad de registros de alumnos: 766
Cantidad de veces hechos (Ejercicios): [603, 275, 17, 542, 733, 0, 277, 195, 21, 42, 599, 38, 69, 233, 0, 0, 0, 737, 389, 0, 0, 37, 650, 590, 0, 730, 678, 0, 168, 653, 0, 4, 395, 11, 0, 347, 0, 184, 87, 0, 0, 0, 472, 0, 314, 0, 0, 0, 4, 0, 0, 0, 0]
Porcentaje de realizacion del ejercicio: [0.7872062663185379, 0.35900783289817234, 0.022193211488250653, 0.7075718015665796, 0.9569190600522193, 0.0, 0.36161879895561355, 0.2545691906005222, 0.02741514360313316, 0.05483028720626632, 0.7819843342036553, 0.04960835509138381, 0.09007832898172324, 0.304177545691906, 0.0, 0.0, 0.0, 0.9621409921671018, 0.5078328981723238, 0.0, 0.0, 0.048302872062663184, 0.8485639686684073, 0.7702349869451697, 0.0, 0.9530026109660574, 0.8851174934725848, 0.0, 0.2193211488250653, 0.8524804177545692, 0.0, 0.005221932114882507, 0.5156657963446475, 0.014360313315926894, 0.0, 0.45300261096605743, 0.0, 0.2402088772845953, 0.11357702349869452, 0.0, 0.0, 0.0, 0.6161879895561357, 0.0, 0

Unnamed: 0,oid,nombre,hito,skill,knowledge,complexity,ratio
0,0973dae0e1b74ab8baa8d94339ee3ae6,CÃ¡lculo del dÃ­gito verificador del rut,1,1,7,23,0.787206
1,16f619db31204ded9418136c4587ddd8,Calculadora GeomÃ©trica,2,1,1,17,0.359008
2,17022c9ceac94ec5b2e7bc934c7b2d6f,Subsecuencias de ADN,2,3,7,55,0.022193
3,171b5e86d4fb47268f2692587fbec073,NÃºmeros Primos,1,0,3,3,0.707572
4,2437df93d3f44a87b00834072aeb1ab0,Nota Final,1,0,1,1,0.956919


In [4]:
# Asignar pesos a cada componente
w_hito = 0.4
w_skill = 0.25
w_knowledge = 0.25
w_popularidad = 0.10

catalogo['complexity_combined'] = (
    w_hito * (catalogo['hito'] / 4) +
    w_skill * (catalogo['skill'] / 15) +
    w_knowledge * (catalogo['knowledge'] / 15) +
    w_popularidad * catalogo['ratio']
)

In [5]:

# Normalizar del 0 al 1 los valores de la columna complexity
n_comp_8bits = MinMaxScaler()
catalogo['complexity_norm'] = n_comp_8bits.fit_transform(catalogo[['complexity']])

catalogo[['oid', 'complexity_norm', 'complexity_combined']].head()

Unnamed: 0,oid,complexity_norm,complexity_combined
0,0973dae0e1b74ab8baa8d94339ee3ae6,0.111111,0.312054
1,16f619db31204ded9418136c4587ddd8,0.080808,0.269234
2,17022c9ceac94ec5b2e7bc934c7b2d6f,0.272727,0.368886
3,171b5e86d4fb47268f2692587fbec073,0.010101,0.220757
4,2437df93d3f44a87b00834072aeb1ab0,0.0,0.212359


In [6]:
def factorization_to_ratings(df_exercises: pd.DataFrame, df_matrix: pd.DataFrame)-> pd.DataFrame:
    rows = []
    for _, row_matrix in df_matrix.iterrows():
        user_id = row_matrix['rut']
        for exercise in df_matrix.columns[1:]:
            if row_matrix[exercise] == 1:
                row_exercise = df_exercises.iloc[int(exercise.lstrip('e'))]
                new_row = {'rut': user_id}
                new_row.update(row_exercise.to_dict())
                rows.append(new_row)
    df = pd.DataFrame(rows)      
    return df

In [7]:
catalogo = catalogo[['oid', 'complexity_norm', 'complexity_combined']]

exercises_users = factorization_to_ratings(catalogo, mf_dataset)
exercises_users.head(10)

Unnamed: 0,rut,oid,complexity_norm,complexity_combined
0,0,0973dae0e1b74ab8baa8d94339ee3ae6,0.111111,0.312054
1,0,16f619db31204ded9418136c4587ddd8,0.080808,0.269234
2,0,171b5e86d4fb47268f2692587fbec073,0.010101,0.220757
3,0,2437df93d3f44a87b00834072aeb1ab0,0.0,0.212359
4,0,29f15ef8dc32426f945f64e28c910a57,0.090909,0.302829
5,0,46850a246d48484b8f104f8aab5679b6,0.010101,0.228198
6,0,718578451f3f4eca87437cadfe98d688,0.005051,0.229547
7,0,729d37da8f2d46f3af2d891df04949ef,0.010101,0.200783
8,0,7f60644b0a1b484681ae5c8e36166c58,0.080808,0.21819
9,0,80c61dae74fa4915bf272ab17dfa62ff,0.005051,0.210357


In [8]:
# ESCALAS DE RANKING
reader = Reader(rating_scale=(0, 1))

In [9]:
# CARGA DE DATOS
data_complexity_norm = Dataset.load_from_df(exercises_users[["rut", "oid", "complexity_norm"]], reader)
data_complexity_combined = Dataset.load_from_df(exercises_users[["rut", "oid", "complexity_combined"]], reader)

In [10]:
# DIVISION DE DATOS PARA ENTRENAR
train_complexity_norm, test_complexity_norm = train_test_split(data_complexity_norm, test_size=TEST_SIZE, random_state=MY_SEED)
train_complexity_combined, test_complexity_combined = train_test_split(data_complexity_combined, test_size=TEST_SIZE, random_state=MY_SEED)

# MODELOS

In [11]:
# SVD 

## COMPLEXITY NORM
model_svd_complexity_norm = SVD()
model_svd_complexity_norm.fit(train_complexity_norm)
predictions_svd_complexity_norm = model_svd_complexity_norm.test(test_complexity_norm)

accuracy.rmse(predictions_svd_complexity_norm)
accuracy.mse(predictions_svd_complexity_norm)
accuracy.mae(predictions_svd_complexity_norm)
print()

results_svd_complexity_norm = cross_validate(BaselineOnly(), data_complexity_norm, measures=["RMSE", "MAE"], cv=5, verbose=True)
results_svd_complexity_norm = pd.DataFrame(results_svd_complexity_norm)

print()
print()

## COMPLEXITY COMBINED
model_svd_complexity_combined = SVD()
model_svd_complexity_combined.fit(train_complexity_combined)
predictions_svd_complexity_combined = model_svd_complexity_combined.test(test_complexity_combined)

accuracy.rmse(predictions_svd_complexity_combined)
accuracy.mse(predictions_svd_complexity_combined)
accuracy.mae(predictions_svd_complexity_combined)
print()

results_svd_complexity_combined = cross_validate(BaselineOnly(), data_complexity_combined, measures=["RMSE", "MAE"], cv=5, verbose=True)
results_svd_complexity_combined = pd.DataFrame(results_svd_complexity_combined)


RMSE: 0.0341
MSE: 0.0012
MAE:  0.0216

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Evaluating RMSE, MAE of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.0101  0.0147  0.0085  0.0112  0.0139  0.0117  0.0023  
MAE (testset)     0.0028  0.0034  0.0028  0.0032  0.0031  0.0031  0.0002  
Fit time          0.01    0.02    0.02    0.01    0.02    0.01    0.01    
Test time         0.01    0.01    0.01    0.00    0.01    0.01    0.00    


RMSE: 0.0376
MSE: 0.0014
MAE:  0.0264

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Evaluating RMSE, MAE of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.0072  0.0077  

In [12]:
from surprise import KNNBasic, SVDpp, KNNWithMeans, CoClustering, NMF, SlopeOne, BaselineOnly, accuracy
from surprise.model_selection import cross_validate

In [13]:
# KNNBasic Model
## COMPLEXITY NORM
model_knn_basic_complexity_norm = KNNBasic()
model_knn_basic_complexity_norm.fit(train_complexity_norm)
predictions_knn_basic_complexity_norm = model_knn_basic_complexity_norm.test(test_complexity_norm)

accuracy.rmse(predictions_knn_basic_complexity_norm)
accuracy.mse(predictions_knn_basic_complexity_norm)
accuracy.mae(predictions_knn_basic_complexity_norm)
print()

results_knn_basic_complexity_norm = cross_validate(BaselineOnly(), data_complexity_norm, measures=["RMSE", "MAE"], cv=5, verbose=True)
results_knn_basic_complexity_norm = pd.DataFrame(results_knn_basic_complexity_norm)

print()
print()

## COMPLEXITY COMBINED
model_knn_basic_complexity_combined = KNNBasic()
model_knn_basic_complexity_combined.fit(train_complexity_combined)
predictions_knn_basic_complexity_combined = model_knn_basic_complexity_combined.test(test_complexity_combined)

accuracy.rmse(predictions_knn_basic_complexity_combined)
accuracy.mse(predictions_knn_basic_complexity_combined)
accuracy.mae(predictions_knn_basic_complexity_combined)
print()

results_knn_basic_complexity_combined = cross_validate(BaselineOnly(), data_complexity_combined, measures=["RMSE", "MAE"], cv=5, verbose=True)
results_knn_basic_complexity_combined = pd.DataFrame(results_knn_basic_complexity_combined)

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.0000
MSE: 0.0000
MAE:  0.0000

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Evaluating RMSE, MAE of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.0126  0.0115  0.0135  0.0096  0.0119  0.0118  0.0013  
MAE (testset)     0.0032  0.0029  0.0032  0.0030  0.0032  0.0031  0.0001  
Fit time          0.01    0.02    0.01    0.01    0.02    0.02    0.01    
Test time         0.01    0.01    0.01    0.01    0.00    0.01    0.00    


Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.0000
MSE: 0.0000
MAE:  0.0000

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Evaluating RMSE, MAE of al

In [14]:
# SVDpp Model
## COMPLEXITY NORM
model_svdpp_complexity_norm = SVDpp()
model_svdpp_complexity_norm.fit(train_complexity_norm)
predictions_svdpp_complexity_norm = model_svdpp_complexity_norm.test(test_complexity_norm)

accuracy.rmse(predictions_svdpp_complexity_norm)
accuracy.mse(predictions_svdpp_complexity_norm)
accuracy.mae(predictions_svdpp_complexity_norm)
print()

results_svdpp_complexity_norm = cross_validate(BaselineOnly(), data_complexity_norm, measures=["RMSE", "MAE"], cv=5, verbose=True)
results_svdpp_complexity_norm = pd.DataFrame(results_svdpp_complexity_norm)

print()
print()

## COMPLEXITY COMBINED
model_svdpp_complexity_combined = SVDpp()
model_svdpp_complexity_combined.fit(train_complexity_combined)
predictions_svdpp_complexity_combined = model_svdpp_complexity_combined.test(test_complexity_combined)

accuracy.rmse(predictions_svdpp_complexity_combined)
accuracy.mse(predictions_svdpp_complexity_combined)
accuracy.mae(predictions_svdpp_complexity_combined)
print()

results_svdpp_complexity_combined = cross_validate(BaselineOnly(), data_complexity_combined, measures=["RMSE", "MAE"], cv=5, verbose=True)
results_svdpp_complexity_combined = pd.DataFrame(results_svdpp_complexity_combined)

RMSE: 0.0151
MSE: 0.0002
MAE:  0.0081

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Evaluating RMSE, MAE of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.0085  0.0135  0.0104  0.0158  0.0102  0.0117  0.0026  
MAE (testset)     0.0028  0.0033  0.0030  0.0037  0.0028  0.0031  0.0004  
Fit time          0.01    0.02    0.02    0.01    0.03    0.02    0.01    
Test time         0.01    0.00    0.01    0.00    0.02    0.01    0.01    


RMSE: 0.0128
MSE: 0.0002
MAE:  0.0086

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Evaluating RMSE, MAE of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.0062  0.0068  

In [15]:
# KNNWithMeans Model
## COMPLEXITY NORM
model_knn_means_complexity_norm = KNNWithMeans()
model_knn_means_complexity_norm.fit(train_complexity_norm)
predictions_knn_means_complexity_norm = model_knn_means_complexity_norm.test(test_complexity_norm)

accuracy.rmse(predictions_knn_means_complexity_norm)
accuracy.mse(predictions_knn_means_complexity_norm)
accuracy.mae(predictions_knn_means_complexity_norm)
print()

results_knn_means_complexity_norm = cross_validate(BaselineOnly(), data_complexity_norm, measures=["RMSE", "MAE"], cv=5, verbose=True)
results_knn_means_complexity_norm = pd.DataFrame(results_knn_means_complexity_norm)

print()
print()

## COMPLEXITY COMBINED
model_knn_means_complexity_combined = KNNWithMeans()
model_knn_means_complexity_combined.fit(train_complexity_combined)
predictions_knn_means_complexity_combined = model_knn_means_complexity_combined.test(test_complexity_combined)

accuracy.rmse(predictions_knn_means_complexity_combined)
accuracy.mse(predictions_knn_means_complexity_combined)
accuracy.mae(predictions_knn_means_complexity_combined)
print()

results_knn_means_complexity_combined = cross_validate(BaselineOnly(), data_complexity_combined, measures=["RMSE", "MAE"], cv=5, verbose=True)
results_knn_means_complexity_combined = pd.DataFrame(results_knn_means_complexity_combined)

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.0222
MSE: 0.0005
MAE:  0.0151

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Evaluating RMSE, MAE of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.0112  0.0099  0.0119  0.0133  0.0143  0.0121  0.0015  
MAE (testset)     0.0029  0.0030  0.0030  0.0033  0.0034  0.0031  0.0002  
Fit time          0.02    0.01    0.01    0.02    0.01    0.01    0.00    
Test time         0.00    0.01    0.00    0.01    0.00    0.01    0.00    


Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.0197
MSE: 0.0004
MAE:  0.0152

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Evaluating RMSE, MAE of al

In [16]:
# CoClustering Model
## COMPLEXITY NORM
model_coclustering_complexity_norm = CoClustering()
model_coclustering_complexity_norm.fit(train_complexity_norm)
predictions_coclustering_complexity_norm = model_coclustering_complexity_norm.test(test_complexity_norm)

accuracy.rmse(predictions_coclustering_complexity_norm)
accuracy.mse(predictions_coclustering_complexity_norm)
accuracy.mae(predictions_coclustering_complexity_norm)
print()

results_coclustering_complexity_norm = cross_validate(BaselineOnly(), data_complexity_norm, measures=["RMSE", "MAE"], cv=5, verbose=True)
results_coclustering_complexity_norm = pd.DataFrame(results_coclustering_complexity_norm)

print()
print()

## COMPLEXITY COMBINED
model_coclustering_complexity_combined = CoClustering()
model_coclustering_complexity_combined.fit(train_complexity_combined)
predictions_coclustering_complexity_combined = model_coclustering_complexity_combined.test(test_complexity_combined)

accuracy.rmse(predictions_coclustering_complexity_combined)
accuracy.mse(predictions_coclustering_complexity_combined)
accuracy.mae(predictions_coclustering_complexity_combined)
print()

results_coclustering_complexity_combined = cross_validate(BaselineOnly(), data_complexity_combined, measures=["RMSE", "MAE"], cv=5, verbose=True)
results_coclustering_complexity_combined = pd.DataFrame(results_coclustering_complexity_combined)

RMSE: 0.0701
MSE: 0.0049
MAE:  0.0418

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Evaluating RMSE, MAE of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.0095  0.0118  0.0136  0.0097  0.0147  0.0119  0.0020  
MAE (testset)     0.0031  0.0031  0.0034  0.0025  0.0035  0.0031  0.0004  
Fit time          0.01    0.01    0.02    0.01    0.01    0.01    0.00    
Test time         0.01    0.00    0.01    0.01    0.01    0.01    0.00    


RMSE: 0.2440
MSE: 0.0595
MAE:  0.2394

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Evaluating RMSE, MAE of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.0062  0.0067  

In [17]:
# NMF Model
## COMPLEXITY NORM
model_nmf_complexity_norm = NMF()
model_nmf_complexity_norm.fit(train_complexity_norm)
predictions_nmf_complexity_norm = model_nmf_complexity_norm.test(test_complexity_norm)

accuracy.rmse(predictions_nmf_complexity_norm)
accuracy.mse(predictions_nmf_complexity_norm)
accuracy.mae(predictions_nmf_complexity_norm)
print()

results_nmf_complexity_norm = cross_validate(BaselineOnly(), data_complexity_norm, measures=["RMSE", "MAE"], cv=5, verbose=True)
results_nmf_complexity_norm = pd.DataFrame(results_nmf_complexity_norm)

print()
print()

## COMPLEXITY COMBINED
model_nmf_complexity_combined = NMF()
model_nmf_complexity_combined.fit(train_complexity_combined)
predictions_nmf_complexity_combined = model_nmf_complexity_combined.test(test_complexity_combined)

accuracy.rmse(predictions_nmf_complexity_combined)
accuracy.mse(predictions_nmf_complexity_combined)
accuracy.mae(predictions_nmf_complexity_combined)
print()

results_nmf_complexity_combined = cross_validate(BaselineOnly(), data_complexity_combined, measures=["RMSE", "MAE"], cv=5, verbose=True)
results_nmf_complexity_combined = pd.DataFrame(results_nmf_complexity_combined)

RMSE: 0.0542
MSE: 0.0029
MAE:  0.0337

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Evaluating RMSE, MAE of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.0091  0.0094  0.0152  0.0126  0.0134  0.0119  0.0023  
MAE (testset)     0.0028  0.0030  0.0035  0.0029  0.0034  0.0031  0.0003  
Fit time          0.02    0.01    0.01    0.01    0.01    0.01    0.00    
Test time         0.01    0.00    0.01    0.01    0.00    0.01    0.00    


RMSE: 0.0614
MSE: 0.0038
MAE:  0.0597

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Evaluating RMSE, MAE of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.0081  0.0053  

In [18]:
# SlopeOne Model
## COMPLEXITY NORM
model_slopeone_complexity_norm = SlopeOne()
model_slopeone_complexity_norm.fit(train_complexity_norm)
predictions_slopeone_complexity_norm = model_slopeone_complexity_norm.test(test_complexity_norm)

accuracy.rmse(predictions_slopeone_complexity_norm)
accuracy.mse(predictions_slopeone_complexity_norm)
accuracy.mae(predictions_slopeone_complexity_norm)
print()

results_slopeone_complexity_norm = cross_validate(BaselineOnly(), data_complexity_norm, measures=["RMSE", "MAE"], cv=5, verbose=True)
results_slopeone_complexity_norm = pd.DataFrame(results_slopeone_complexity_norm)

print()
print()

## COMPLEXITY COMBINED
model_slopeone_complexity_combined = SlopeOne()
model_slopeone_complexity_combined.fit(train_complexity_combined)
predictions_slopeone_complexity_combined = model_slopeone_complexity_combined.test(test_complexity_combined)

accuracy.rmse(predictions_slopeone_complexity_combined)
accuracy.mse(predictions_slopeone_complexity_combined)
accuracy.mae(predictions_slopeone_complexity_combined)
print()

results_slopeone_complexity_combined = cross_validate(BaselineOnly(), data_complexity_combined, measures=["RMSE", "MAE"], cv=5, verbose=True)
results_slopeone_complexity_combined = pd.DataFrame(results_slopeone_complexity_combined)

RMSE: 0.0572
MSE: 0.0033
MAE:  0.0458

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Evaluating RMSE, MAE of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.0089  0.0124  0.0135  0.0116  0.0126  0.0118  0.0016  
MAE (testset)     0.0028  0.0034  0.0033  0.0029  0.0031  0.0031  0.0002  
Fit time          0.01    0.02    0.02    0.01    0.02    0.02    0.00    
Test time         0.00    0.01    0.01    0.01    0.00    0.01    0.00    


RMSE: 0.0476
MSE: 0.0023
MAE:  0.0350

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Evaluating RMSE, MAE of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.0071  0.0070  

# PREDICCIONES

In [19]:
def suprise_get_top_n_recommendations(df, model, feature_users: str = 'rut', feature_items: str = 'oid', user_id: int = 0, n_recommenders: int = 10):

    items_interacted = df[df[feature_users] == user_id][feature_items].unique()
    all_items = df[feature_items].unique()

    items_pairs = [(user_id, ejer_oid, 0) for ejer_oid in list(set(all_items) - set(items_interacted))]

    predictions = model.test(items_pairs)

    top_n_recommendations = sorted(predictions, key = lambda x: x.est, reverse=True)[:n_recommenders]
    user_recommendation = [str(pred.iid) for pred in top_n_recommendations]
    
    return user_recommendation

## ESTUDIANTES

In [57]:
estudiante = 1305

In [58]:
# EJERCICIOS REALIZADOS POR EL ESTUDIANTE ALEATORIO
print(f'Ejercicios realizados por el usuario [ {estudiante} ]')
df_catalogo[df_catalogo['oid'].isin(exercises_users[exercises_users['rut'] == estudiante]['oid'].values)] # .sort_values(by=["complexity_12bits"], ascending=True)

Ejercicios realizados por el usuario [ 1305 ]


Unnamed: 0,oid,nombre,hito,skill,knowledge,complexity
23,80c61dae74fa4915bf272ab17dfa62ff,Conversor de Decimal a Binario,1,0,2,2
25,89f44e7f5842479fb283e43c52ce067b,Ordenar tres nÃºmeros,1,0,1,1


### ESTUDIANTE

In [59]:
recommendations_norm_nmf = suprise_get_top_n_recommendations(df=exercises_users, model=model_nmf_complexity_norm, user_id=estudiante)
recommendations_norm_svd = suprise_get_top_n_recommendations(df=exercises_users, model=model_svd_complexity_norm, user_id=estudiante)
recommendations_norm_svdpp = suprise_get_top_n_recommendations(df=exercises_users, model=model_svdpp_complexity_norm, user_id=estudiante)
recommendations_norm_slopeone = suprise_get_top_n_recommendations(df=exercises_users, model=model_slopeone_complexity_norm, user_id=estudiante)
recommendations_norm_knn_basic = suprise_get_top_n_recommendations(df=exercises_users, model=model_knn_basic_complexity_norm, user_id=estudiante)
recommendations_norm_knn_means = suprise_get_top_n_recommendations(df=exercises_users, model=model_knn_means_complexity_norm, user_id=estudiante)
recommendations_norm_coclustering = suprise_get_top_n_recommendations(df=exercises_users, model=model_coclustering_complexity_norm, user_id=estudiante)


In [60]:
recommendations_combined_nmf = suprise_get_top_n_recommendations(df=exercises_users, model=model_nmf_complexity_combined, user_id=estudiante)
recommendations_combined_svd = suprise_get_top_n_recommendations(df=exercises_users, model=model_svd_complexity_combined, user_id=estudiante)
recommendations_combined_svdpp = suprise_get_top_n_recommendations(df=exercises_users, model=model_svdpp_complexity_combined, user_id=estudiante)
recommendations_combined_slopeone = suprise_get_top_n_recommendations(df=exercises_users, model=model_slopeone_complexity_combined, user_id=estudiante)
recommendations_combined_knn_basic = suprise_get_top_n_recommendations(df=exercises_users, model=model_knn_basic_complexity_combined, user_id=estudiante)
recommendations_combined_knn_means = suprise_get_top_n_recommendations(df=exercises_users, model=model_knn_means_complexity_combined, user_id=estudiante)
recommendations_combined_coclustering = suprise_get_top_n_recommendations(df=exercises_users, model=model_coclustering_complexity_combined, user_id=estudiante)

#### NMF

In [61]:
print(f'Estudiante: {estudiante}')
df_catalogo[df_catalogo['oid'].isin(recommendations_norm_nmf)]

Estudiante: 1305


Unnamed: 0,oid,nombre,hito,skill,knowledge,complexity
2,17022c9ceac94ec5b2e7bc934c7b2d6f,Subsecuencias de ADN,2,3,7,55
8,37e4f4a1e8174e9496d21b00d67fc8f1,Distancia Levenshtein,2,3,7,55
9,3d7d9e64ac0846caadc08dccf2538f55,Validar Secuencias de ADN,2,2,7,39
11,4d51d13b9a2848a2803f7d1143fde6d4,Adivina la palabra,2,2,7,39
12,4d5ed23727c04a8790f2e4ab3f09767e,FunciÃ³n buscarTodas,2,2,7,39
21,7da6cedd04c44a15b5e421440253acff,El antipoema,2,2,7,39
31,abc052e584734d0f8121d5e5ca659f82,Carro de Compras,2,2,7,39
33,b16016fbdbf04317871d334cc4da6eae,Alineamiento de Secuencias,2,4,7,71
38,c0d9a3b2f7be46dab1f792c7fbd365b9,Decodificador,2,2,7,39
48,e72db1cb2e9f400990cfa3b464d0391a,Sopa de Letras,2,5,7,87


In [62]:
print(f'Estudiante: {estudiante}')
df_catalogo[df_catalogo['oid'].isin(recommendations_combined_nmf)]

Estudiante: 1305


Unnamed: 0,oid,nombre,hito,skill,knowledge,complexity
2,17022c9ceac94ec5b2e7bc934c7b2d6f,Subsecuencias de ADN,2,3,7,55
8,37e4f4a1e8174e9496d21b00d67fc8f1,Distancia Levenshtein,2,3,7,55
9,3d7d9e64ac0846caadc08dccf2538f55,Validar Secuencias de ADN,2,2,7,39
12,4d5ed23727c04a8790f2e4ab3f09767e,FunciÃ³n buscarTodas,2,2,7,39
21,7da6cedd04c44a15b5e421440253acff,El antipoema,2,2,7,39
28,9e02c76901da440ba1b70f0a75d8ae89,Encriptador ROT13,2,2,7,39
33,b16016fbdbf04317871d334cc4da6eae,Alineamiento de Secuencias,2,4,7,71
37,c0a2cd8059d44afb822a031066678092,Jerigonzo,2,2,7,39
38,c0d9a3b2f7be46dab1f792c7fbd365b9,Decodificador,2,2,7,39
48,e72db1cb2e9f400990cfa3b464d0391a,Sopa de Letras,2,5,7,87


#### SVD

In [63]:
print(f'Estudiante: {estudiante}')
df_catalogo[df_catalogo['oid'].isin(recommendations_norm_svd)]

Estudiante: 1305


Unnamed: 0,oid,nombre,hito,skill,knowledge,complexity
2,17022c9ceac94ec5b2e7bc934c7b2d6f,Subsecuencias de ADN,2,3,7,55
8,37e4f4a1e8174e9496d21b00d67fc8f1,Distancia Levenshtein,2,3,7,55
9,3d7d9e64ac0846caadc08dccf2538f55,Validar Secuencias de ADN,2,2,7,39
11,4d51d13b9a2848a2803f7d1143fde6d4,Adivina la palabra,2,2,7,39
21,7da6cedd04c44a15b5e421440253acff,El antipoema,2,2,7,39
22,7f60644b0a1b484681ae5c8e36166c58,Signo del ZodÃ­aco,1,1,1,17
28,9e02c76901da440ba1b70f0a75d8ae89,Encriptador ROT13,2,2,7,39
33,b16016fbdbf04317871d334cc4da6eae,Alineamiento de Secuencias,2,4,7,71
37,c0a2cd8059d44afb822a031066678092,Jerigonzo,2,2,7,39
48,e72db1cb2e9f400990cfa3b464d0391a,Sopa de Letras,2,5,7,87


In [64]:
print(f'Estudiante: {estudiante}')
df_catalogo[df_catalogo['oid'].isin(recommendations_combined_svd)]

Estudiante: 1305


Unnamed: 0,oid,nombre,hito,skill,knowledge,complexity
0,0973dae0e1b74ab8baa8d94339ee3ae6,CÃ¡lculo del dÃ­gito verificador del rut,1,1,7,23
2,17022c9ceac94ec5b2e7bc934c7b2d6f,Subsecuencias de ADN,2,3,7,55
9,3d7d9e64ac0846caadc08dccf2538f55,Validar Secuencias de ADN,2,2,7,39
11,4d51d13b9a2848a2803f7d1143fde6d4,Adivina la palabra,2,2,7,39
12,4d5ed23727c04a8790f2e4ab3f09767e,FunciÃ³n buscarTodas,2,2,7,39
21,7da6cedd04c44a15b5e421440253acff,El antipoema,2,2,7,39
28,9e02c76901da440ba1b70f0a75d8ae89,Encriptador ROT13,2,2,7,39
29,a3963220090f4e50a266ce53d33b9841,AprobaciÃ³n de CrÃ©ditos,1,0,3,3
37,c0a2cd8059d44afb822a031066678092,Jerigonzo,2,2,7,39
44,db7987d040dc469a9c247d54dd72939a,NÃºmeros Perfectos,2,1,3,19


#### SVDPP

In [65]:
print(f'Estudiante: {estudiante}')
df_catalogo[df_catalogo['oid'].isin(recommendations_norm_svdpp)]

Estudiante: 1305


Unnamed: 0,oid,nombre,hito,skill,knowledge,complexity
2,17022c9ceac94ec5b2e7bc934c7b2d6f,Subsecuencias de ADN,2,3,7,55
8,37e4f4a1e8174e9496d21b00d67fc8f1,Distancia Levenshtein,2,3,7,55
9,3d7d9e64ac0846caadc08dccf2538f55,Validar Secuencias de ADN,2,2,7,39
11,4d51d13b9a2848a2803f7d1143fde6d4,Adivina la palabra,2,2,7,39
12,4d5ed23727c04a8790f2e4ab3f09767e,FunciÃ³n buscarTodas,2,2,7,39
28,9e02c76901da440ba1b70f0a75d8ae89,Encriptador ROT13,2,2,7,39
33,b16016fbdbf04317871d334cc4da6eae,Alineamiento de Secuencias,2,4,7,71
37,c0a2cd8059d44afb822a031066678092,Jerigonzo,2,2,7,39
38,c0d9a3b2f7be46dab1f792c7fbd365b9,Decodificador,2,2,7,39
48,e72db1cb2e9f400990cfa3b464d0391a,Sopa de Letras,2,5,7,87


In [66]:
print(f'Estudiante: {estudiante}')
df_catalogo[df_catalogo['oid'].isin(recommendations_combined_svdpp)]

Estudiante: 1305


Unnamed: 0,oid,nombre,hito,skill,knowledge,complexity
9,3d7d9e64ac0846caadc08dccf2538f55,Validar Secuencias de ADN,2,2,7,39
11,4d51d13b9a2848a2803f7d1143fde6d4,Adivina la palabra,2,2,7,39
12,4d5ed23727c04a8790f2e4ab3f09767e,FunciÃ³n buscarTodas,2,2,7,39
21,7da6cedd04c44a15b5e421440253acff,El antipoema,2,2,7,39
28,9e02c76901da440ba1b70f0a75d8ae89,Encriptador ROT13,2,2,7,39
31,abc052e584734d0f8121d5e5ca659f82,Carro de Compras,2,2,7,39
33,b16016fbdbf04317871d334cc4da6eae,Alineamiento de Secuencias,2,4,7,71
37,c0a2cd8059d44afb822a031066678092,Jerigonzo,2,2,7,39
38,c0d9a3b2f7be46dab1f792c7fbd365b9,Decodificador,2,2,7,39
48,e72db1cb2e9f400990cfa3b464d0391a,Sopa de Letras,2,5,7,87


#### SLOPEONE

In [67]:
print(f'Estudiante: {estudiante}')
df_catalogo[df_catalogo['oid'].isin(recommendations_norm_slopeone)]

Estudiante: 1305


Unnamed: 0,oid,nombre,hito,skill,knowledge,complexity
8,37e4f4a1e8174e9496d21b00d67fc8f1,Distancia Levenshtein,2,3,7,55
10,46850a246d48484b8f104f8aab5679b6,Descomponer un nÃºmero,1,0,3,3
22,7f60644b0a1b484681ae5c8e36166c58,Signo del ZodÃ­aco,1,1,1,17
29,a3963220090f4e50a266ce53d33b9841,AprobaciÃ³n de CrÃ©ditos,1,0,3,3
32,ac7382763e484d37908da54c076f7577,Cajero AutomÃ¡tico Nivel 1,1,0,3,3
33,b16016fbdbf04317871d334cc4da6eae,Alineamiento de Secuencias,2,4,7,71
35,baf2f8e0167a4e089d2cec16582c9ae9,Factores Primos,1,1,3,19
38,c0d9a3b2f7be46dab1f792c7fbd365b9,Decodificador,2,2,7,39
42,d8395f43e4a1454d90346ac5a1ba561a,NÃºmeros Amigos,1,0,3,3
44,db7987d040dc469a9c247d54dd72939a,NÃºmeros Perfectos,2,1,3,19


In [68]:
print(f'Estudiante: {estudiante}')
df_catalogo[df_catalogo['oid'].isin(recommendations_combined_slopeone)]

Estudiante: 1305


Unnamed: 0,oid,nombre,hito,skill,knowledge,complexity
8,37e4f4a1e8174e9496d21b00d67fc8f1,Distancia Levenshtein,2,3,7,55
10,46850a246d48484b8f104f8aab5679b6,Descomponer un nÃºmero,1,0,3,3
22,7f60644b0a1b484681ae5c8e36166c58,Signo del ZodÃ­aco,1,1,1,17
29,a3963220090f4e50a266ce53d33b9841,AprobaciÃ³n de CrÃ©ditos,1,0,3,3
32,ac7382763e484d37908da54c076f7577,Cajero AutomÃ¡tico Nivel 1,1,0,3,3
33,b16016fbdbf04317871d334cc4da6eae,Alineamiento de Secuencias,2,4,7,71
35,baf2f8e0167a4e089d2cec16582c9ae9,Factores Primos,1,1,3,19
38,c0d9a3b2f7be46dab1f792c7fbd365b9,Decodificador,2,2,7,39
42,d8395f43e4a1454d90346ac5a1ba561a,NÃºmeros Amigos,1,0,3,3
44,db7987d040dc469a9c247d54dd72939a,NÃºmeros Perfectos,2,1,3,19


#### KNN BASIC

In [69]:
print(f'Estudiante: {estudiante}')
df_catalogo[df_catalogo['oid'].isin(recommendations_norm_knn_basic)]

Estudiante: 1305


Unnamed: 0,oid,nombre,hito,skill,knowledge,complexity
2,17022c9ceac94ec5b2e7bc934c7b2d6f,Subsecuencias de ADN,2,3,7,55
8,37e4f4a1e8174e9496d21b00d67fc8f1,Distancia Levenshtein,2,3,7,55
9,3d7d9e64ac0846caadc08dccf2538f55,Validar Secuencias de ADN,2,2,7,39
11,4d51d13b9a2848a2803f7d1143fde6d4,Adivina la palabra,2,2,7,39
12,4d5ed23727c04a8790f2e4ab3f09767e,FunciÃ³n buscarTodas,2,2,7,39
21,7da6cedd04c44a15b5e421440253acff,El antipoema,2,2,7,39
28,9e02c76901da440ba1b70f0a75d8ae89,Encriptador ROT13,2,2,7,39
33,b16016fbdbf04317871d334cc4da6eae,Alineamiento de Secuencias,2,4,7,71
38,c0d9a3b2f7be46dab1f792c7fbd365b9,Decodificador,2,2,7,39
48,e72db1cb2e9f400990cfa3b464d0391a,Sopa de Letras,2,5,7,87


In [70]:
print(f'Estudiante: {estudiante}')
df_catalogo[df_catalogo['oid'].isin(recommendations_combined_knn_basic)]

Estudiante: 1305


Unnamed: 0,oid,nombre,hito,skill,knowledge,complexity
2,17022c9ceac94ec5b2e7bc934c7b2d6f,Subsecuencias de ADN,2,3,7,55
8,37e4f4a1e8174e9496d21b00d67fc8f1,Distancia Levenshtein,2,3,7,55
9,3d7d9e64ac0846caadc08dccf2538f55,Validar Secuencias de ADN,2,2,7,39
11,4d51d13b9a2848a2803f7d1143fde6d4,Adivina la palabra,2,2,7,39
12,4d5ed23727c04a8790f2e4ab3f09767e,FunciÃ³n buscarTodas,2,2,7,39
28,9e02c76901da440ba1b70f0a75d8ae89,Encriptador ROT13,2,2,7,39
33,b16016fbdbf04317871d334cc4da6eae,Alineamiento de Secuencias,2,4,7,71
37,c0a2cd8059d44afb822a031066678092,Jerigonzo,2,2,7,39
38,c0d9a3b2f7be46dab1f792c7fbd365b9,Decodificador,2,2,7,39
48,e72db1cb2e9f400990cfa3b464d0391a,Sopa de Letras,2,5,7,87


#### KNN MEANS

In [71]:
print(f'Estudiante: {estudiante}')
df_catalogo[df_catalogo['oid'].isin(recommendations_norm_knn_means)]

Estudiante: 1305


Unnamed: 0,oid,nombre,hito,skill,knowledge,complexity
2,17022c9ceac94ec5b2e7bc934c7b2d6f,Subsecuencias de ADN,2,3,7,55
8,37e4f4a1e8174e9496d21b00d67fc8f1,Distancia Levenshtein,2,3,7,55
9,3d7d9e64ac0846caadc08dccf2538f55,Validar Secuencias de ADN,2,2,7,39
12,4d5ed23727c04a8790f2e4ab3f09767e,FunciÃ³n buscarTodas,2,2,7,39
21,7da6cedd04c44a15b5e421440253acff,El antipoema,2,2,7,39
28,9e02c76901da440ba1b70f0a75d8ae89,Encriptador ROT13,2,2,7,39
33,b16016fbdbf04317871d334cc4da6eae,Alineamiento de Secuencias,2,4,7,71
37,c0a2cd8059d44afb822a031066678092,Jerigonzo,2,2,7,39
38,c0d9a3b2f7be46dab1f792c7fbd365b9,Decodificador,2,2,7,39
48,e72db1cb2e9f400990cfa3b464d0391a,Sopa de Letras,2,5,7,87


In [72]:
print(f'Estudiante: {estudiante}')
df_catalogo[df_catalogo['oid'].isin(recommendations_combined_knn_means)]

Estudiante: 1305


Unnamed: 0,oid,nombre,hito,skill,knowledge,complexity
2,17022c9ceac94ec5b2e7bc934c7b2d6f,Subsecuencias de ADN,2,3,7,55
8,37e4f4a1e8174e9496d21b00d67fc8f1,Distancia Levenshtein,2,3,7,55
9,3d7d9e64ac0846caadc08dccf2538f55,Validar Secuencias de ADN,2,2,7,39
12,4d5ed23727c04a8790f2e4ab3f09767e,FunciÃ³n buscarTodas,2,2,7,39
21,7da6cedd04c44a15b5e421440253acff,El antipoema,2,2,7,39
28,9e02c76901da440ba1b70f0a75d8ae89,Encriptador ROT13,2,2,7,39
33,b16016fbdbf04317871d334cc4da6eae,Alineamiento de Secuencias,2,4,7,71
37,c0a2cd8059d44afb822a031066678092,Jerigonzo,2,2,7,39
38,c0d9a3b2f7be46dab1f792c7fbd365b9,Decodificador,2,2,7,39
48,e72db1cb2e9f400990cfa3b464d0391a,Sopa de Letras,2,5,7,87


#### CO CLUSTERING

In [73]:
print(f'Estudiante: {estudiante}')
df_catalogo[df_catalogo['oid'].isin(recommendations_norm_coclustering)]

Estudiante: 1305


Unnamed: 0,oid,nombre,hito,skill,knowledge,complexity
8,37e4f4a1e8174e9496d21b00d67fc8f1,Distancia Levenshtein,2,3,7,55
10,46850a246d48484b8f104f8aab5679b6,Descomponer un nÃºmero,1,0,3,3
22,7f60644b0a1b484681ae5c8e36166c58,Signo del ZodÃ­aco,1,1,1,17
29,a3963220090f4e50a266ce53d33b9841,AprobaciÃ³n de CrÃ©ditos,1,0,3,3
32,ac7382763e484d37908da54c076f7577,Cajero AutomÃ¡tico Nivel 1,1,0,3,3
33,b16016fbdbf04317871d334cc4da6eae,Alineamiento de Secuencias,2,4,7,71
35,baf2f8e0167a4e089d2cec16582c9ae9,Factores Primos,1,1,3,19
38,c0d9a3b2f7be46dab1f792c7fbd365b9,Decodificador,2,2,7,39
42,d8395f43e4a1454d90346ac5a1ba561a,NÃºmeros Amigos,1,0,3,3
44,db7987d040dc469a9c247d54dd72939a,NÃºmeros Perfectos,2,1,3,19


In [74]:
print(f'Estudiante: {estudiante}')
df_catalogo[df_catalogo['oid'].isin(recommendations_combined_coclustering)]

Estudiante: 1305


Unnamed: 0,oid,nombre,hito,skill,knowledge,complexity
8,37e4f4a1e8174e9496d21b00d67fc8f1,Distancia Levenshtein,2,3,7,55
10,46850a246d48484b8f104f8aab5679b6,Descomponer un nÃºmero,1,0,3,3
22,7f60644b0a1b484681ae5c8e36166c58,Signo del ZodÃ­aco,1,1,1,17
29,a3963220090f4e50a266ce53d33b9841,AprobaciÃ³n de CrÃ©ditos,1,0,3,3
32,ac7382763e484d37908da54c076f7577,Cajero AutomÃ¡tico Nivel 1,1,0,3,3
33,b16016fbdbf04317871d334cc4da6eae,Alineamiento de Secuencias,2,4,7,71
35,baf2f8e0167a4e089d2cec16582c9ae9,Factores Primos,1,1,3,19
38,c0d9a3b2f7be46dab1f792c7fbd365b9,Decodificador,2,2,7,39
42,d8395f43e4a1454d90346ac5a1ba561a,NÃºmeros Amigos,1,0,3,3
44,db7987d040dc469a9c247d54dd72939a,NÃºmeros Perfectos,2,1,3,19
