# LIBRERIAS

In [1]:
import random 
import numpy as np 
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from surprise import Dataset, Reader, accuracy, BaselineOnly, SVD
from surprise.model_selection import cross_validate, train_test_split

# VARIABLES
MY_SEED = 42
TEST_SIZE = 0.20
DIR_DATA_LIMPIA = '../datos/data-limpia'

# SEMILLA
random.seed(MY_SEED)
np.random.seed(MY_SEED)

# IMPORTAR DATA
catalogo = pd.read_csv(f"{DIR_DATA_LIMPIA}/catalogo.csv", sep=",", encoding="latin1")
mf_dataset = pd.read_csv(f"{DIR_DATA_LIMPIA}/mf_dataset.csv", sep=",", encoding="latin1")

# DATOS

In [2]:
catalogo = catalogo[['oid', 'nombre', 'hito', 'skill', 'knowledge', 'complexity_8bits', 'complexity_12bits']]
catalogo.head()

Unnamed: 0,oid,nombre,hito,skill,knowledge,complexity_8bits,complexity_12bits
0,0973dae0e1b74ab8baa8d94339ee3ae6,CÃ¡lculo del dÃ­gito verificador del rut,1,1,7,23,279
1,16f619db31204ded9418136c4587ddd8,Calculadora GeomÃ©trica,2,1,1,17,529
2,17022c9ceac94ec5b2e7bc934c7b2d6f,Subsecuencias de ADN,2,3,7,55,567
3,171b5e86d4fb47268f2692587fbec073,NÃºmeros Primos,1,0,3,3,259
4,2437df93d3f44a87b00834072aeb1ab0,Nota Final,1,0,1,1,257


In [3]:
ratio_de_ejercicios = mf_dataset.sum().values.tolist()[1:]
resultados = [ n / len(mf_dataset) for n in ratio_de_ejercicios ]
max_valor = max(resultados)
indice_valor = resultados.index(max_valor)

print(f'Cantidad de registros de alumnos: {len(mf_dataset)}')
print(f'Cantidad de veces hechos (Ejercicios): {ratio_de_ejercicios}')
print(f'Porcentaje de realizacion del ejercicio: {resultados}')
print()
print(f'Indice del ejercicio: {indice_valor}\nCantidad del Ratio: {ratio_de_ejercicios[indice_valor]}\nRatio Max %: {max_valor}\n')

catalogo['ratio'] = resultados
catalogo.head()

Cantidad de registros de alumnos: 766
Cantidad de veces hechos (Ejercicios): [603, 275, 17, 542, 733, 0, 277, 195, 21, 42, 599, 38, 69, 233, 0, 0, 0, 737, 389, 0, 0, 37, 650, 590, 0, 730, 678, 0, 168, 653, 0, 4, 395, 11, 0, 347, 0, 184, 87, 0, 0, 0, 472, 0, 314, 0, 0, 0, 4, 0, 0, 0, 0]
Porcentaje de realizacion del ejercicio: [0.7872062663185379, 0.35900783289817234, 0.022193211488250653, 0.7075718015665796, 0.9569190600522193, 0.0, 0.36161879895561355, 0.2545691906005222, 0.02741514360313316, 0.05483028720626632, 0.7819843342036553, 0.04960835509138381, 0.09007832898172324, 0.304177545691906, 0.0, 0.0, 0.0, 0.9621409921671018, 0.5078328981723238, 0.0, 0.0, 0.048302872062663184, 0.8485639686684073, 0.7702349869451697, 0.0, 0.9530026109660574, 0.8851174934725848, 0.0, 0.2193211488250653, 0.8524804177545692, 0.0, 0.005221932114882507, 0.5156657963446475, 0.014360313315926894, 0.0, 0.45300261096605743, 0.0, 0.2402088772845953, 0.11357702349869452, 0.0, 0.0, 0.0, 0.6161879895561357, 0.0, 0

Unnamed: 0,oid,nombre,hito,skill,knowledge,complexity_8bits,complexity_12bits,ratio
0,0973dae0e1b74ab8baa8d94339ee3ae6,CÃ¡lculo del dÃ­gito verificador del rut,1,1,7,23,279,0.787206
1,16f619db31204ded9418136c4587ddd8,Calculadora GeomÃ©trica,2,1,1,17,529,0.359008
2,17022c9ceac94ec5b2e7bc934c7b2d6f,Subsecuencias de ADN,2,3,7,55,567,0.022193
3,171b5e86d4fb47268f2692587fbec073,NÃºmeros Primos,1,0,3,3,259,0.707572
4,2437df93d3f44a87b00834072aeb1ab0,Nota Final,1,0,1,1,257,0.956919


In [4]:
# Asignar pesos a cada componente
w_hito = 0.4
w_skill = 0.25
w_knowledge = 0.25
w_popularidad = 0.10

catalogo['combined_score_1'] = (
    w_hito * (catalogo['hito'] / 4) +
    (w_skill+w_knowledge) * (catalogo['complexity_8bits'] / 255) + 
    w_popularidad * catalogo['ratio']
)

catalogo['combined_score_2'] = (
    (w_hito+w_skill+w_knowledge) * (catalogo['complexity_12bits'] / 2303) + 
    w_popularidad * catalogo['ratio']
)

catalogo['combined_score_3'] = (
    w_hito * (catalogo['hito'] / 4) +
    w_skill * (catalogo['skill'] / 15) +
    w_knowledge * (catalogo['knowledge'] / 15) +
    w_popularidad * catalogo['ratio']
)

catalogo = catalogo[['oid', 'complexity_8bits', 'complexity_12bits', 'combined_score_1', 'combined_score_2', 'combined_score_3']]
catalogo.head()

Unnamed: 0,oid,complexity_8bits,complexity_12bits,combined_score_1,combined_score_2,combined_score_3
0,0973dae0e1b74ab8baa8d94339ee3ae6,23,279,0.223819,0.187752,0.312054
1,16f619db31204ded9418136c4587ddd8,17,529,0.269234,0.242631,0.269234
2,17022c9ceac94ec5b2e7bc934c7b2d6f,55,567,0.310062,0.2238,0.368886
3,171b5e86d4fb47268f2692587fbec073,3,259,0.17664,0.171973,0.220757
4,2437df93d3f44a87b00834072aeb1ab0,1,257,0.197653,0.196126,0.212359


In [5]:
n_comp_8bits = MinMaxScaler()
catalogo['comp_8bits_norm'] = n_comp_8bits.fit_transform(catalogo[['complexity_8bits']])

n_comp_12bits = MinMaxScaler()
catalogo['comp_12bits_norm'] = n_comp_12bits.fit_transform(catalogo[['complexity_12bits']])

In [6]:
def factorization_to_ratings(df_exercises: pd.DataFrame, df_matrix: pd.DataFrame)-> pd.DataFrame:
    rows = []
    for _, row_matrix in df_matrix.iterrows():
        user_id = row_matrix['rut']
        for exercise in df_matrix.columns[1:]:
            if row_matrix[exercise] == 1:
                row_exercise = df_exercises.iloc[int(exercise.lstrip('e'))]
                new_row = {'rut': user_id}
                new_row.update(row_exercise.to_dict())
                rows.append(new_row)
    df = pd.DataFrame(rows)      
    return df

In [7]:
exercises_users = factorization_to_ratings(catalogo, mf_dataset)
exercises_users.head(10)

Unnamed: 0,rut,oid,complexity_8bits,complexity_12bits,combined_score_1,combined_score_2,combined_score_3,comp_8bits_norm,comp_12bits_norm
0,0,0973dae0e1b74ab8baa8d94339ee3ae6,23,279,0.223819,0.187752,0.312054,0.111111,0.011055
1,0,16f619db31204ded9418136c4587ddd8,17,529,0.269234,0.242631,0.269234,0.080808,0.136683
2,0,171b5e86d4fb47268f2692587fbec073,3,259,0.17664,0.171973,0.220757,0.010101,0.001005
3,0,2437df93d3f44a87b00834072aeb1ab0,1,257,0.197653,0.196126,0.212359,0.0,0.0
4,0,29f15ef8dc32426f945f64e28c910a57,19,531,0.273417,0.243674,0.302829,0.090909,0.137688
5,0,46850a246d48484b8f104f8aab5679b6,3,259,0.184081,0.179414,0.228198,0.010101,0.001005
6,0,718578451f3f4eca87437cadfe98d688,2,258,0.200136,0.197039,0.229547,0.005051,0.000503
7,0,729d37da8f2d46f3af2d891df04949ef,3,259,0.156666,0.151999,0.200783,0.010101,0.001005
8,0,7f60644b0a1b484681ae5c8e36166c58,17,273,0.21819,0.191543,0.21819,0.080808,0.00804
9,0,80c61dae74fa4915bf272ab17dfa62ff,2,258,0.180945,0.177849,0.210357,0.005051,0.000503


In [8]:
# ESCALAS DE RANKING
reader_norm = Reader(rating_scale=(0, 1))
reader_8bits = Reader(rating_scale=(1, 255))
reader_12bits = Reader(rating_scale=(257, 2247))

In [9]:
# CARGA DE DATOS
data_combined_score_1 = Dataset.load_from_df(exercises_users[["rut", "oid", "combined_score_1"]], reader_norm)
data_combined_score_2 = Dataset.load_from_df(exercises_users[["rut", "oid", "combined_score_2"]], reader_norm)
data_combined_score_3 = Dataset.load_from_df(exercises_users[["rut", "oid", "combined_score_3"]], reader_norm)

data_comp_8bits_norm = Dataset.load_from_df(exercises_users[["rut", "oid", "comp_8bits_norm"]], reader_norm)
data_comp_12bits_norm = Dataset.load_from_df(exercises_users[["rut", "oid", "comp_12bits_norm"]], reader_norm)

data_complexity_8bits = Dataset.load_from_df(exercises_users[["rut", "oid", "complexity_8bits"]], reader_8bits)
data_complexity_12bits = Dataset.load_from_df(exercises_users[["rut", "oid", "complexity_12bits"]], reader_12bits)


In [10]:
# DIVISION DE DATOS PARA ENTRENAR
train_combined_score_1, test_combined_score_1 = train_test_split(data_combined_score_1, test_size=TEST_SIZE, random_state=MY_SEED)
train_combined_score_2, test_combined_score_2 = train_test_split(data_combined_score_2, test_size=TEST_SIZE, random_state=MY_SEED)
train_combined_score_3, test_combined_score_3 = train_test_split(data_combined_score_3, test_size=TEST_SIZE, random_state=MY_SEED)

train_comp_8bits_norm, test_comp_8bits_norm = train_test_split(data_comp_8bits_norm, test_size=TEST_SIZE, random_state=MY_SEED)
train_comp_12bits_norm, test_comp_12bits_norm = train_test_split(data_comp_12bits_norm, test_size=TEST_SIZE, random_state=MY_SEED)

train_complexity_8bits, test_complexity_8bits = train_test_split(data_complexity_8bits, test_size=TEST_SIZE, random_state=MY_SEED)
train_complexity_12bits, test_complexity_12bits = train_test_split(data_complexity_12bits, test_size=TEST_SIZE, random_state=MY_SEED)


# MODELOS

In [11]:
# modelo 1
model_combined_score_1 = SVD()
model_combined_score_1.fit(train_combined_score_1)
predictions_combined_score_1 = model_combined_score_1.test(test_combined_score_1)
accuracy.rmse(predictions_combined_score_1)
accuracy.mse(predictions_combined_score_1)
accuracy.mae(predictions_combined_score_1)
print()

results_data_combined_score_1 = cross_validate(BaselineOnly(), data_combined_score_1, measures=["RMSE", "MAE"], cv=5, verbose=True)
results_data_combined_score_1 = pd.DataFrame(results_data_combined_score_1)
results_data_combined_score_1

RMSE: 0.0373
MSE: 0.0014
MAE:  0.0270

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Evaluating RMSE, MAE of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.0050  0.0068  0.0044  0.0055  0.0065  0.0056  0.0009  
MAE (testset)     0.0018  0.0021  0.0019  0.0020  0.0020  0.0019  0.0001  
Fit time          0.01    0.01    0.01    0.01    0.01    0.01    0.00    
Test time         0.00    0.01    0.00    0.01    0.00    0.00    0.00    


Unnamed: 0,test_rmse,test_mae,fit_time,test_time
0,0.004983,0.001838,0.006955,0.004249
1,0.006849,0.002073,0.010479,0.00525
2,0.00442,0.001855,0.008141,0.004144
3,0.0055,0.001995,0.011285,0.006042
4,0.006451,0.001984,0.007668,0.004303


In [12]:
# modelo 2
model_combined_score_2 = SVD()
model_combined_score_2.fit(train_combined_score_2)
predictions_combined_score_2 = model_combined_score_2.test(test_combined_score_2)
accuracy.rmse(predictions_combined_score_2)
accuracy.mse(predictions_combined_score_2)
accuracy.mae(predictions_combined_score_2)
print()

results_data_combined_score_2 = cross_validate(BaselineOnly(), data_combined_score_2, measures=["RMSE", "MAE"], cv=5, verbose=True)
results_data_combined_score_2 = pd.DataFrame(results_data_combined_score_2)
results_data_combined_score_2

RMSE: 0.0373
MSE: 0.0014
MAE:  0.0264

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Evaluating RMSE, MAE of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.0023  0.0024  0.0022  0.0020  0.0022  0.0022  0.0001  
MAE (testset)     0.0011  0.0011  0.0011  0.0011  0.0011  0.0011  0.0000  
Fit time          0.01    0.01    0.01    0.01    0.01    0.01    0.00    
Test time         0.01    0.00    0.00    0.00    0.01    0.00    0.00    


Unnamed: 0,test_rmse,test_mae,fit_time,test_time
0,0.002287,0.001105,0.007567,0.005181
1,0.002427,0.001142,0.010112,0.00417
2,0.002173,0.001104,0.007754,0.004635
3,0.002003,0.00107,0.009342,0.004614
4,0.002167,0.001125,0.014903,0.006171


In [13]:
# modelo 3
model_combined_score_3 = SVD()
model_combined_score_3.fit(train_combined_score_3)
predictions_combined_score_3 = model_combined_score_3.test(test_combined_score_3)
accuracy.rmse(predictions_combined_score_3)
accuracy.mse(predictions_combined_score_3)
accuracy.mae(predictions_combined_score_3)
print()

results_data_combined_score_3 = cross_validate(BaselineOnly(), data_combined_score_3, measures=["RMSE", "MAE"], cv=5, verbose=True)
results_data_combined_score_3 = pd.DataFrame(results_data_combined_score_3)
results_data_combined_score_3

RMSE: 0.0381
MSE: 0.0014
MAE:  0.0271

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Evaluating RMSE, MAE of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.0060  0.0058  0.0063  0.0086  0.0080  0.0070  0.0011  
MAE (testset)     0.0022  0.0022  0.0023  0.0025  0.0026  0.0024  0.0002  
Fit time          0.01    0.01    0.01    0.01    0.01    0.01    0.00    
Test time         0.00    0.01    0.00    0.01    0.00    0.01    0.00    


Unnamed: 0,test_rmse,test_mae,fit_time,test_time
0,0.006031,0.002227,0.009665,0.004444
1,0.005834,0.002212,0.010475,0.006423
2,0.0063,0.002348,0.010101,0.00465
3,0.008635,0.002515,0.009347,0.006489
4,0.007965,0.00261,0.010047,0.004659


In [14]:
# modelo 4
model_comp_8bits_norm = SVD()
model_comp_8bits_norm.fit(train_comp_8bits_norm)
predictions_comp_8bits_norm = model_comp_8bits_norm.test(test_comp_8bits_norm)
accuracy.rmse(predictions_comp_8bits_norm)
accuracy.mse(predictions_comp_8bits_norm)
accuracy.mae(predictions_comp_8bits_norm)
print()

results_data_comp_8bits_norm = cross_validate(BaselineOnly(), data_comp_8bits_norm, measures=["RMSE", "MAE"], cv=5, verbose=True)
results_data_comp_8bits_norm = pd.DataFrame(results_data_comp_8bits_norm)
results_data_comp_8bits_norm

RMSE: 0.0353
MSE: 0.0012
MAE:  0.0224

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Evaluating RMSE, MAE of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.0102  0.0183  0.0083  0.0120  0.0100  0.0117  0.0035  
MAE (testset)     0.0031  0.0035  0.0027  0.0032  0.0031  0.0031  0.0003  
Fit time          0.01    0.02    0.01    0.01    0.01    0.01    0.01    
Test time         0.06    0.01    0.00    0.00    0.00    0.02    0.02    


Unnamed: 0,test_rmse,test_mae,fit_time,test_time
0,0.010154,0.003101,0.010436,0.062838
1,0.01834,0.003529,0.023356,0.005603
2,0.00825,0.002676,0.008696,0.004514
3,0.012026,0.003188,0.010635,0.004896
4,0.009967,0.003087,0.008935,0.004983


In [15]:
# modelo 5
model_comp_12bits_norm = SVD()
model_comp_12bits_norm.fit(train_comp_12bits_norm)
predictions_comp_12bits_norm = model_comp_12bits_norm.test(test_comp_12bits_norm)
accuracy.rmse(predictions_comp_12bits_norm)
accuracy.mse(predictions_comp_12bits_norm)
accuracy.mae(predictions_comp_12bits_norm)
print()

results_data_comp_12bits_norm = cross_validate(BaselineOnly(), data_comp_12bits_norm, measures=["RMSE", "MAE"], cv=5, verbose=True)
results_data_comp_12bits_norm = pd.DataFrame(results_data_comp_12bits_norm)
results_data_comp_12bits_norm

RMSE: 0.0312
MSE: 0.0010
MAE:  0.0182

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Evaluating RMSE, MAE of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.0060  0.0069  0.0081  0.0079  0.0060  0.0070  0.0009  
MAE (testset)     0.0023  0.0023  0.0025  0.0024  0.0023  0.0024  0.0001  
Fit time          0.03    0.02    0.01    0.01    0.01    0.02    0.01    
Test time         0.02    0.01    0.01    0.01    0.00    0.01    0.01    


Unnamed: 0,test_rmse,test_mae,fit_time,test_time
0,0.006037,0.002271,0.028513,0.01863
1,0.006867,0.002289,0.023353,0.005301
2,0.008096,0.002546,0.01204,0.005269
3,0.00785,0.002448,0.00981,0.008159
4,0.006012,0.002292,0.013004,0.004644


In [16]:
# modelo 6
model_complexity_8bits = SVD()
model_complexity_8bits.fit(train_complexity_8bits)
predictions_complexity_8bits = model_complexity_8bits.test(test_complexity_8bits)
accuracy.rmse(predictions_complexity_8bits)
accuracy.mse(predictions_complexity_8bits)
accuracy.mae(predictions_complexity_8bits)
print()

results_data_complexity_8bits = cross_validate(BaselineOnly(), data_complexity_8bits, measures=["RMSE", "MAE"], cv=5, verbose=True)
results_data_complexity_8bits = pd.DataFrame(results_data_complexity_8bits)
results_data_complexity_8bits

RMSE: 1.8515
MSE: 3.4279
MAE:  0.4647

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Evaluating RMSE, MAE of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.7689  2.0519  2.9053  2.5341  2.2229  2.2966  0.3926  
MAE (testset)     0.5700  0.5394  0.6900  0.6485  0.6103  0.6117  0.0538  
Fit time          0.01    0.01    0.01    0.01    0.01    0.01    0.00    
Test time         0.00    0.01    0.00    0.00    0.00    0.00    0.00    


Unnamed: 0,test_rmse,test_mae,fit_time,test_time
0,1.768856,0.570038,0.009207,0.004257
1,2.051919,0.539392,0.008953,0.005855
2,2.90531,0.690006,0.010238,0.003974
3,2.534105,0.648548,0.007705,0.004625
4,2.222907,0.610347,0.013293,0.004116


In [17]:
# modelo 7
model_complexity_12bits = SVD()
model_complexity_12bits.fit(train_complexity_12bits)
predictions_complexity_12bits = model_complexity_12bits.test(test_complexity_12bits)
accuracy.rmse(predictions_complexity_12bits)
accuracy.mse(predictions_complexity_12bits)
accuracy.mae(predictions_complexity_12bits)
print()

results_data_complexity_12bits = cross_validate(BaselineOnly(), data_complexity_12bits, measures=["RMSE", "MAE"], cv=5, verbose=True)
results_data_complexity_12bits = pd.DataFrame(results_data_complexity_12bits)
results_data_complexity_12bits

RMSE: 1946.2046
MSE: 3787712.1654
MAE:  1943.6870

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Evaluating RMSE, MAE of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    15.8274 13.2423 13.3756 12.4974 14.6362 13.9158 1.1773  
MAE (testset)     5.1369  4.3720  4.4895  4.5783  5.0504  4.7254  0.3089  
Fit time          0.02    0.02    0.01    0.01    0.01    0.01    0.00    
Test time         0.00    0.01    0.01    0.00    0.01    0.01    0.00    


Unnamed: 0,test_rmse,test_mae,fit_time,test_time
0,15.827432,5.136908,0.015232,0.004592
1,13.242338,4.371981,0.021567,0.010911
2,13.375637,4.489511,0.009593,0.006673
3,12.497365,4.578305,0.011557,0.004002
4,14.636169,5.050414,0.012005,0.006783


# PREDICCIONES

In [18]:
def suprise_get_top_n_recommendations(df, model, feature_users: str = 'rut', feature_items: str = 'oid', user_id: int = 100, n_recommenders: int = 10):

    items_interacted = df[df[feature_users] == user_id][feature_items].unique()
    all_items = df[feature_items].unique()

    items_pairs = [(user_id, ejer_oid, 0) for ejer_oid in list(set(all_items) - set(items_interacted))]

    predictions = model.test(items_pairs)

    top_n_recommendations = sorted(predictions, key = lambda x: x.est, reverse=True)[:n_recommenders]
    user_recommendation = [str(pred.iid) for pred in top_n_recommendations]
    
    return user_recommendation

#### ESTUDIANTES

In [19]:
estudiante_1 = 1305
estudiante_2 = 985

In [20]:
# EJERCICIOS REALIZADOS POR EL ESTUDIANTE ALEATORIO
print(f'Ejercicios realizados por el usuario [ {estudiante_1} ]')
catalogo[catalogo['oid'].isin(exercises_users[exercises_users['rut'] == estudiante_1]['oid'].values)].sort_values(by=["complexity_12bits"], ascending=True)

Ejercicios realizados por el usuario [ 1305 ]


Unnamed: 0,oid,complexity_8bits,complexity_12bits,combined_score_1,combined_score_2,combined_score_3,comp_8bits_norm,comp_12bits_norm
25,89f44e7f5842479fb283e43c52ce067b,1,257,0.197261,0.195734,0.211967,0.0,0.0
23,80c61dae74fa4915bf272ab17dfa62ff,2,258,0.180945,0.177849,0.210357,0.005051,0.000503


In [21]:
# EJERCICIOS REALIZADOS POR EL ESTUDIANTE ALEATORIO
print(f'Ejercicios realizados por el usuario [ {estudiante_2} ]')
catalogo[catalogo['oid'].isin(exercises_users[exercises_users['rut'] == estudiante_2]['oid'].values)].sort_values(by=["complexity_12bits"], ascending=True)

Ejercicios realizados por el usuario [ 985 ]


Unnamed: 0,oid,complexity_8bits,complexity_12bits,combined_score_1,combined_score_2,combined_score_3,comp_8bits_norm,comp_12bits_norm
4,2437df93d3f44a87b00834072aeb1ab0,1,257,0.197653,0.196126,0.212359,0.0,0.0
7,31ea1c1b12174428b5a67a6576627de9,1,257,0.127418,0.125891,0.142124,0.0,0.0
25,89f44e7f5842479fb283e43c52ce067b,1,257,0.197261,0.195734,0.211967,0.0,0.0
17,718578451f3f4eca87437cadfe98d688,2,258,0.200136,0.197039,0.229547,0.005051,0.000503
23,80c61dae74fa4915bf272ab17dfa62ff,2,258,0.180945,0.177849,0.210357,0.005051,0.000503
3,171b5e86d4fb47268f2692587fbec073,3,259,0.17664,0.171973,0.220757,0.010101,0.001005
10,46850a246d48484b8f104f8aab5679b6,3,259,0.184081,0.179414,0.228198,0.010101,0.001005
18,729d37da8f2d46f3af2d891df04949ef,3,259,0.156666,0.151999,0.200783,0.010101,0.001005
26,8f24397e36034cccb71e9d578975c33d,3,259,0.194394,0.189728,0.238512,0.010101,0.001005
29,a3963220090f4e50a266ce53d33b9841,3,259,0.19113,0.186464,0.235248,0.010101,0.001005


### ESTUDIANTE 1

In [22]:
# Recommendations 2
recommendations_combined_score_1 = suprise_get_top_n_recommendations(df=exercises_users, model=model_combined_score_1, user_id=estudiante_1)
recommendations_combined_score_2 = suprise_get_top_n_recommendations(df=exercises_users, model=model_combined_score_2, user_id=estudiante_1)
recommendations_combined_score_3 = suprise_get_top_n_recommendations(df=exercises_users, model=model_combined_score_3, user_id=estudiante_1)

recommendations_comp_8bits_norm = suprise_get_top_n_recommendations(df=exercises_users, model=model_comp_8bits_norm, user_id=estudiante_1)
recommendations_comp_12bits_norm = suprise_get_top_n_recommendations(df=exercises_users, model=model_comp_12bits_norm, user_id=estudiante_1)

recommendations_complexity_8bits = suprise_get_top_n_recommendations(df=exercises_users, model=model_complexity_8bits, user_id=estudiante_1)
recommendations_complexity_12bits = suprise_get_top_n_recommendations(df=exercises_users, model=model_complexity_12bits, user_id=estudiante_1)


In [23]:
# COMBINED SCORE 1
print(f'Estudiante: {estudiante_1}')
catalogo[catalogo['oid'].isin(recommendations_combined_score_1)] # .sort_values(by=["complexity"], ascending=True)

Estudiante: 1305


Unnamed: 0,oid,complexity_8bits,complexity_12bits,combined_score_1,combined_score_2,combined_score_3,comp_8bits_norm,comp_12bits_norm
6,29f15ef8dc32426f945f64e28c910a57,19,531,0.273417,0.243674,0.302829,0.090909,0.137688
8,37e4f4a1e8174e9496d21b00d67fc8f1,55,567,0.310585,0.224322,0.369408,0.272727,0.155779
9,3d7d9e64ac0846caadc08dccf2538f55,39,551,0.281954,0.220811,0.355483,0.191919,0.147739
11,4d51d13b9a2848a2803f7d1143fde6d4,39,551,0.281431,0.220289,0.354961,0.191919,0.147739
21,7da6cedd04c44a15b5e421440253acff,39,551,0.281301,0.220158,0.35483,0.191919,0.147739
22,7f60644b0a1b484681ae5c8e36166c58,17,273,0.21819,0.191543,0.21819,0.080808,0.00804
28,9e02c76901da440ba1b70f0a75d8ae89,39,551,0.298403,0.23726,0.371932,0.191919,0.147739
33,b16016fbdbf04317871d334cc4da6eae,71,583,0.340652,0.229269,0.384769,0.353535,0.163819
37,c0a2cd8059d44afb822a031066678092,39,551,0.300491,0.239349,0.374021,0.191919,0.147739
48,e72db1cb2e9f400990cfa3b464d0391a,87,599,0.37111,0.234608,0.400522,0.434343,0.171859


In [24]:
# COMBINED SCORE 2
print(f'Estudiante: {estudiante_1}')
catalogo[catalogo['oid'].isin(recommendations_combined_score_2)] # .sort_values(by=["complexity"], ascending=True)

Estudiante: 1305


Unnamed: 0,oid,complexity_8bits,complexity_12bits,combined_score_1,combined_score_2,combined_score_3,comp_8bits_norm,comp_12bits_norm
1,16f619db31204ded9418136c4587ddd8,17,529,0.269234,0.242631,0.269234,0.080808,0.136683
9,3d7d9e64ac0846caadc08dccf2538f55,39,551,0.281954,0.220811,0.355483,0.191919,0.147739
17,718578451f3f4eca87437cadfe98d688,2,258,0.200136,0.197039,0.229547,0.005051,0.000503
21,7da6cedd04c44a15b5e421440253acff,39,551,0.281301,0.220158,0.35483,0.191919,0.147739
22,7f60644b0a1b484681ae5c8e36166c58,17,273,0.21819,0.191543,0.21819,0.080808,0.00804
28,9e02c76901da440ba1b70f0a75d8ae89,39,551,0.298403,0.23726,0.371932,0.191919,0.147739
29,a3963220090f4e50a266ce53d33b9841,3,259,0.19113,0.186464,0.235248,0.010101,0.001005
35,baf2f8e0167a4e089d2cec16582c9ae9,19,275,0.182555,0.152769,0.211967,0.090909,0.009045
37,c0a2cd8059d44afb822a031066678092,39,551,0.300491,0.239349,0.374021,0.191919,0.147739
44,db7987d040dc469a9c247d54dd72939a,19,531,0.278247,0.248504,0.307659,0.090909,0.137688


In [25]:
# COMBINED SCORE 3
print(f'Estudiante: {estudiante_1}')
catalogo[catalogo['oid'].isin(recommendations_combined_score_3)] # .sort_values(by=["complexity"], ascending=True)

Estudiante: 1305


Unnamed: 0,oid,complexity_8bits,complexity_12bits,combined_score_1,combined_score_2,combined_score_3,comp_8bits_norm,comp_12bits_norm
0,0973dae0e1b74ab8baa8d94339ee3ae6,23,279,0.223819,0.187752,0.312054,0.111111,0.011055
8,37e4f4a1e8174e9496d21b00d67fc8f1,55,567,0.310585,0.224322,0.369408,0.272727,0.155779
11,4d51d13b9a2848a2803f7d1143fde6d4,39,551,0.281431,0.220289,0.354961,0.191919,0.147739
12,4d5ed23727c04a8790f2e4ab3f09767e,39,551,0.285478,0.224336,0.359008,0.191919,0.147739
21,7da6cedd04c44a15b5e421440253acff,39,551,0.281301,0.220158,0.35483,0.191919,0.147739
28,9e02c76901da440ba1b70f0a75d8ae89,39,551,0.298403,0.23726,0.371932,0.191919,0.147739
31,abc052e584734d0f8121d5e5ca659f82,39,551,0.276993,0.21585,0.350522,0.191919,0.147739
37,c0a2cd8059d44afb822a031066678092,39,551,0.300491,0.239349,0.374021,0.191919,0.147739
44,db7987d040dc469a9c247d54dd72939a,19,531,0.278247,0.248504,0.307659,0.090909,0.137688
48,e72db1cb2e9f400990cfa3b464d0391a,87,599,0.37111,0.234608,0.400522,0.434343,0.171859


In [26]:
# COMPLEXITY 8 BITS NORM
print(f'Estudiante: {estudiante_1}')
catalogo[catalogo['oid'].isin(recommendations_comp_8bits_norm)] # .sort_values(by=["complexity"], ascending=True)

Estudiante: 1305


Unnamed: 0,oid,complexity_8bits,complexity_12bits,combined_score_1,combined_score_2,combined_score_3,comp_8bits_norm,comp_12bits_norm
0,0973dae0e1b74ab8baa8d94339ee3ae6,23,279,0.223819,0.187752,0.312054,0.111111,0.011055
2,17022c9ceac94ec5b2e7bc934c7b2d6f,55,567,0.310062,0.2238,0.368886,0.272727,0.155779
9,3d7d9e64ac0846caadc08dccf2538f55,39,551,0.281954,0.220811,0.355483,0.191919,0.147739
11,4d51d13b9a2848a2803f7d1143fde6d4,39,551,0.281431,0.220289,0.354961,0.191919,0.147739
12,4d5ed23727c04a8790f2e4ab3f09767e,39,551,0.285478,0.224336,0.359008,0.191919,0.147739
21,7da6cedd04c44a15b5e421440253acff,39,551,0.281301,0.220158,0.35483,0.191919,0.147739
35,baf2f8e0167a4e089d2cec16582c9ae9,19,275,0.182555,0.152769,0.211967,0.090909,0.009045
37,c0a2cd8059d44afb822a031066678092,39,551,0.300491,0.239349,0.374021,0.191919,0.147739
38,c0d9a3b2f7be46dab1f792c7fbd365b9,39,551,0.287828,0.226686,0.361358,0.191919,0.147739
48,e72db1cb2e9f400990cfa3b464d0391a,87,599,0.37111,0.234608,0.400522,0.434343,0.171859


In [27]:
# COMPLEXITY 12 BITS NORM
print(f'Estudiante: {estudiante_1}')
catalogo[catalogo['oid'].isin(recommendations_comp_12bits_norm)] # .sort_values(by=["complexity"], ascending=True)

Estudiante: 1305


Unnamed: 0,oid,complexity_8bits,complexity_12bits,combined_score_1,combined_score_2,combined_score_3,comp_8bits_norm,comp_12bits_norm
1,16f619db31204ded9418136c4587ddd8,17,529,0.269234,0.242631,0.269234,0.080808,0.136683
2,17022c9ceac94ec5b2e7bc934c7b2d6f,55,567,0.310062,0.2238,0.368886,0.272727,0.155779
6,29f15ef8dc32426f945f64e28c910a57,19,531,0.273417,0.243674,0.302829,0.090909,0.137688
9,3d7d9e64ac0846caadc08dccf2538f55,39,551,0.281954,0.220811,0.355483,0.191919,0.147739
31,abc052e584734d0f8121d5e5ca659f82,39,551,0.276993,0.21585,0.350522,0.191919,0.147739
33,b16016fbdbf04317871d334cc4da6eae,71,583,0.340652,0.229269,0.384769,0.353535,0.163819
37,c0a2cd8059d44afb822a031066678092,39,551,0.300491,0.239349,0.374021,0.191919,0.147739
38,c0d9a3b2f7be46dab1f792c7fbd365b9,39,551,0.287828,0.226686,0.361358,0.191919,0.147739
44,db7987d040dc469a9c247d54dd72939a,19,531,0.278247,0.248504,0.307659,0.090909,0.137688
48,e72db1cb2e9f400990cfa3b464d0391a,87,599,0.37111,0.234608,0.400522,0.434343,0.171859


In [28]:
# COMPLEXITY 8 BITS
print(f'Estudiante: {estudiante_1}')
catalogo[catalogo['oid'].isin(recommendations_complexity_8bits)] # .sort_values(by=["complexity"], ascending=True)

Estudiante: 1305


Unnamed: 0,oid,complexity_8bits,complexity_12bits,combined_score_1,combined_score_2,combined_score_3,comp_8bits_norm,comp_12bits_norm
0,0973dae0e1b74ab8baa8d94339ee3ae6,23,279,0.223819,0.187752,0.312054,0.111111,0.011055
6,29f15ef8dc32426f945f64e28c910a57,19,531,0.273417,0.243674,0.302829,0.090909,0.137688
9,3d7d9e64ac0846caadc08dccf2538f55,39,551,0.281954,0.220811,0.355483,0.191919,0.147739
12,4d5ed23727c04a8790f2e4ab3f09767e,39,551,0.285478,0.224336,0.359008,0.191919,0.147739
13,52620b0c858a4c59bc324b65278d28bd,19,275,0.167673,0.137886,0.197084,0.090909,0.009045
28,9e02c76901da440ba1b70f0a75d8ae89,39,551,0.298403,0.23726,0.371932,0.191919,0.147739
35,baf2f8e0167a4e089d2cec16582c9ae9,19,275,0.182555,0.152769,0.211967,0.090909,0.009045
37,c0a2cd8059d44afb822a031066678092,39,551,0.300491,0.239349,0.374021,0.191919,0.147739
38,c0d9a3b2f7be46dab1f792c7fbd365b9,39,551,0.287828,0.226686,0.361358,0.191919,0.147739
44,db7987d040dc469a9c247d54dd72939a,19,531,0.278247,0.248504,0.307659,0.090909,0.137688


In [29]:
# COMPLEXITY 12 BITS
print(f'Estudiante: {estudiante_1}')
catalogo[catalogo['oid'].isin(recommendations_complexity_12bits)] # .sort_values(by=["complexity"], ascending=True)

Estudiante: 1305


Unnamed: 0,oid,complexity_8bits,complexity_12bits,combined_score_1,combined_score_2,combined_score_3,comp_8bits_norm,comp_12bits_norm
0,0973dae0e1b74ab8baa8d94339ee3ae6,23,279,0.223819,0.187752,0.312054,0.111111,0.011055
1,16f619db31204ded9418136c4587ddd8,17,529,0.269234,0.242631,0.269234,0.080808,0.136683
3,171b5e86d4fb47268f2692587fbec073,3,259,0.17664,0.171973,0.220757,0.010101,0.001005
18,729d37da8f2d46f3af2d891df04949ef,3,259,0.156666,0.151999,0.200783,0.010101,0.001005
21,7da6cedd04c44a15b5e421440253acff,39,551,0.281301,0.220158,0.35483,0.191919,0.147739
26,8f24397e36034cccb71e9d578975c33d,3,259,0.194394,0.189728,0.238512,0.010101,0.001005
28,9e02c76901da440ba1b70f0a75d8ae89,39,551,0.298403,0.23726,0.371932,0.191919,0.147739
32,ac7382763e484d37908da54c076f7577,3,259,0.157449,0.152782,0.201567,0.010101,0.001005
35,baf2f8e0167a4e089d2cec16582c9ae9,19,275,0.182555,0.152769,0.211967,0.090909,0.009045
42,d8395f43e4a1454d90346ac5a1ba561a,3,259,0.167501,0.162835,0.211619,0.010101,0.001005


### ESTUDIANTE 2

In [30]:
# Recommendations 2
recommendations_combined_score_1 = suprise_get_top_n_recommendations(df=exercises_users, model=model_combined_score_1, user_id=estudiante_2)
recommendations_combined_score_2 = suprise_get_top_n_recommendations(df=exercises_users, model=model_combined_score_2, user_id=estudiante_2)
recommendations_combined_score_3 = suprise_get_top_n_recommendations(df=exercises_users, model=model_combined_score_3, user_id=estudiante_2)

recommendations_comp_8bits_norm = suprise_get_top_n_recommendations(df=exercises_users, model=model_comp_8bits_norm, user_id=estudiante_2)
recommendations_comp_12bits_norm = suprise_get_top_n_recommendations(df=exercises_users, model=model_comp_12bits_norm, user_id=estudiante_2)

recommendations_complexity_8bits = suprise_get_top_n_recommendations(df=exercises_users, model=model_complexity_8bits, user_id=estudiante_2)
recommendations_complexity_12bits = suprise_get_top_n_recommendations(df=exercises_users, model=model_complexity_12bits, user_id=estudiante_2)

In [31]:
# COMBINED SCORE 1
print(f'Estudiante: {estudiante_2}')
catalogo[catalogo['oid'].isin(recommendations_combined_score_1)] # .sort_values(by=["complexity"], ascending=True)

Estudiante: 985


Unnamed: 0,oid,complexity_8bits,complexity_12bits,combined_score_1,combined_score_2,combined_score_3,comp_8bits_norm,comp_12bits_norm
1,16f619db31204ded9418136c4587ddd8,17,529,0.269234,0.242631,0.269234,0.080808,0.136683
2,17022c9ceac94ec5b2e7bc934c7b2d6f,55,567,0.310062,0.2238,0.368886,0.272727,0.155779
6,29f15ef8dc32426f945f64e28c910a57,19,531,0.273417,0.243674,0.302829,0.090909,0.137688
8,37e4f4a1e8174e9496d21b00d67fc8f1,55,567,0.310585,0.224322,0.369408,0.272727,0.155779
12,4d5ed23727c04a8790f2e4ab3f09767e,39,551,0.285478,0.224336,0.359008,0.191919,0.147739
21,7da6cedd04c44a15b5e421440253acff,39,551,0.281301,0.220158,0.35483,0.191919,0.147739
28,9e02c76901da440ba1b70f0a75d8ae89,39,551,0.298403,0.23726,0.371932,0.191919,0.147739
37,c0a2cd8059d44afb822a031066678092,39,551,0.300491,0.239349,0.374021,0.191919,0.147739
38,c0d9a3b2f7be46dab1f792c7fbd365b9,39,551,0.287828,0.226686,0.361358,0.191919,0.147739
44,db7987d040dc469a9c247d54dd72939a,19,531,0.278247,0.248504,0.307659,0.090909,0.137688


In [32]:
# COMBINED SCORE 2
print(f'Estudiante: {estudiante_2}')
catalogo[catalogo['oid'].isin(recommendations_combined_score_2)] # .sort_values(by=["complexity"], ascending=True)

Estudiante: 985


Unnamed: 0,oid,complexity_8bits,complexity_12bits,combined_score_1,combined_score_2,combined_score_3,comp_8bits_norm,comp_12bits_norm
1,16f619db31204ded9418136c4587ddd8,17,529,0.269234,0.242631,0.269234,0.080808,0.136683
6,29f15ef8dc32426f945f64e28c910a57,19,531,0.273417,0.243674,0.302829,0.090909,0.137688
9,3d7d9e64ac0846caadc08dccf2538f55,39,551,0.281954,0.220811,0.355483,0.191919,0.147739
12,4d5ed23727c04a8790f2e4ab3f09767e,39,551,0.285478,0.224336,0.359008,0.191919,0.147739
28,9e02c76901da440ba1b70f0a75d8ae89,39,551,0.298403,0.23726,0.371932,0.191919,0.147739
31,abc052e584734d0f8121d5e5ca659f82,39,551,0.276993,0.21585,0.350522,0.191919,0.147739
33,b16016fbdbf04317871d334cc4da6eae,71,583,0.340652,0.229269,0.384769,0.353535,0.163819
37,c0a2cd8059d44afb822a031066678092,39,551,0.300491,0.239349,0.374021,0.191919,0.147739
44,db7987d040dc469a9c247d54dd72939a,19,531,0.278247,0.248504,0.307659,0.090909,0.137688
48,e72db1cb2e9f400990cfa3b464d0391a,87,599,0.37111,0.234608,0.400522,0.434343,0.171859


In [33]:
# COMBINED SCORE 3
print(f'Estudiante: {estudiante_2}')
catalogo[catalogo['oid'].isin(recommendations_combined_score_3)] # .sort_values(by=["complexity"], ascending=True)

Estudiante: 985


Unnamed: 0,oid,complexity_8bits,complexity_12bits,combined_score_1,combined_score_2,combined_score_3,comp_8bits_norm,comp_12bits_norm
2,17022c9ceac94ec5b2e7bc934c7b2d6f,55,567,0.310062,0.2238,0.368886,0.272727,0.155779
6,29f15ef8dc32426f945f64e28c910a57,19,531,0.273417,0.243674,0.302829,0.090909,0.137688
8,37e4f4a1e8174e9496d21b00d67fc8f1,55,567,0.310585,0.224322,0.369408,0.272727,0.155779
9,3d7d9e64ac0846caadc08dccf2538f55,39,551,0.281954,0.220811,0.355483,0.191919,0.147739
11,4d51d13b9a2848a2803f7d1143fde6d4,39,551,0.281431,0.220289,0.354961,0.191919,0.147739
12,4d5ed23727c04a8790f2e4ab3f09767e,39,551,0.285478,0.224336,0.359008,0.191919,0.147739
21,7da6cedd04c44a15b5e421440253acff,39,551,0.281301,0.220158,0.35483,0.191919,0.147739
31,abc052e584734d0f8121d5e5ca659f82,39,551,0.276993,0.21585,0.350522,0.191919,0.147739
33,b16016fbdbf04317871d334cc4da6eae,71,583,0.340652,0.229269,0.384769,0.353535,0.163819
37,c0a2cd8059d44afb822a031066678092,39,551,0.300491,0.239349,0.374021,0.191919,0.147739


In [34]:
# COMPLEXITY 8 BITS NORM
print(f'Estudiante: {estudiante_2}')
catalogo[catalogo['oid'].isin(recommendations_comp_8bits_norm)] # .sort_values(by=["complexity"], ascending=True)

Estudiante: 985


Unnamed: 0,oid,complexity_8bits,complexity_12bits,combined_score_1,combined_score_2,combined_score_3,comp_8bits_norm,comp_12bits_norm
2,17022c9ceac94ec5b2e7bc934c7b2d6f,55,567,0.310062,0.2238,0.368886,0.272727,0.155779
6,29f15ef8dc32426f945f64e28c910a57,19,531,0.273417,0.243674,0.302829,0.090909,0.137688
8,37e4f4a1e8174e9496d21b00d67fc8f1,55,567,0.310585,0.224322,0.369408,0.272727,0.155779
11,4d51d13b9a2848a2803f7d1143fde6d4,39,551,0.281431,0.220289,0.354961,0.191919,0.147739
21,7da6cedd04c44a15b5e421440253acff,39,551,0.281301,0.220158,0.35483,0.191919,0.147739
28,9e02c76901da440ba1b70f0a75d8ae89,39,551,0.298403,0.23726,0.371932,0.191919,0.147739
31,abc052e584734d0f8121d5e5ca659f82,39,551,0.276993,0.21585,0.350522,0.191919,0.147739
33,b16016fbdbf04317871d334cc4da6eae,71,583,0.340652,0.229269,0.384769,0.353535,0.163819
37,c0a2cd8059d44afb822a031066678092,39,551,0.300491,0.239349,0.374021,0.191919,0.147739
38,c0d9a3b2f7be46dab1f792c7fbd365b9,39,551,0.287828,0.226686,0.361358,0.191919,0.147739


In [35]:
# COMPLEXITY 12 BITS
print(f'Estudiante: {estudiante_2}')
catalogo[catalogo['oid'].isin(recommendations_complexity_12bits)] # .sort_values(by=["complexity"], ascending=True)

Estudiante: 985


Unnamed: 0,oid,complexity_8bits,complexity_12bits,combined_score_1,combined_score_2,combined_score_3,comp_8bits_norm,comp_12bits_norm
1,16f619db31204ded9418136c4587ddd8,17,529,0.269234,0.242631,0.269234,0.080808,0.136683
8,37e4f4a1e8174e9496d21b00d67fc8f1,55,567,0.310585,0.224322,0.369408,0.272727,0.155779
11,4d51d13b9a2848a2803f7d1143fde6d4,39,551,0.281431,0.220289,0.354961,0.191919,0.147739
12,4d5ed23727c04a8790f2e4ab3f09767e,39,551,0.285478,0.224336,0.359008,0.191919,0.147739
21,7da6cedd04c44a15b5e421440253acff,39,551,0.281301,0.220158,0.35483,0.191919,0.147739
28,9e02c76901da440ba1b70f0a75d8ae89,39,551,0.298403,0.23726,0.371932,0.191919,0.147739
33,b16016fbdbf04317871d334cc4da6eae,71,583,0.340652,0.229269,0.384769,0.353535,0.163819
37,c0a2cd8059d44afb822a031066678092,39,551,0.300491,0.239349,0.374021,0.191919,0.147739
38,c0d9a3b2f7be46dab1f792c7fbd365b9,39,551,0.287828,0.226686,0.361358,0.191919,0.147739
44,db7987d040dc469a9c247d54dd72939a,19,531,0.278247,0.248504,0.307659,0.090909,0.137688


# PRUEBAS EN RECHAZADOS