In [24]:
import pandas as pd
pd.set_option('display.max_columns', None)

In [None]:
# After loading the dataframe:
subgroups = pd.read_csv("subgroup_model_results.csv")

# Assign rank: every subgroup has two consecutive rows, global_baseline gets "N/A"
subgroups['subgroup_rank'] = ((subgroups.index // 2) + 1).astype("object")
subgroups.loc[subgroups['model_type'] == 'global_baseline', 'subgroup_rank'] = "N/A"

# Round columns
subgroups['cookD'] = subgroups['cookD'].round(2)
subgroups['r2'] = subgroups['r2'].round(3)
subgroups['mae'] = subgroups['mae'].round(3)
subgroups['mse'] = subgroups['mse'].round(3)
subgroups['mean_residual'] = subgroups['mean_residual'].round(3)

# Specify desired column order
main_cols = [
    'subgroup_rank', 'model_type', 'description', 'cookD', 'n_train', 'n_test',
    'r2', 'mae', 'mse', 'mean_residual'
]
rest_cols = [c for c in subgroups.columns if c not in main_cols]
new_order = main_cols + rest_cols

# Reorder columns
subgroups = subgroups[new_order]

In [47]:
# Check global model
subgroups.tail(1)

Unnamed: 0,subgroup_rank,model_type,description,cookD,n_train,n_test,r2,mae,mse,mean_residual,intercept,coef__total_attended_labsessions,pval__total_attended_labsessions,coef__active_minutes,pval__active_minutes,coef__nr_distinct_files_viewed,pval__nr_distinct_files_viewed,coef__total_course_activities,pval__total_course_activities,coef__distinct_days,pval__distinct_days,coef__nr_files_viewed,pval__nr_files_viewed,coef__nr_practice_exams_viewed,pval__nr_practice_exams_viewed
30,16,global,,,559,374,0.226249,0.930291,1.359054,0.063027,2.774317,0.064525,7.6e-05,4e-05,0.040861,0.096937,4.528641e-12,0.016668,6.406497e-10,-0.02897,0.007907,-0.079991,4.967337e-12,0.072741,2e-06


In [48]:
# Check subgroup models
subgroups.head(-1)

Unnamed: 0,subgroup_rank,model_type,description,cookD,n_train,n_test,r2,mae,mse,mean_residual,intercept,coef__total_attended_labsessions,pval__total_attended_labsessions,coef__active_minutes,pval__active_minutes,coef__nr_distinct_files_viewed,pval__nr_distinct_files_viewed,coef__total_course_activities,pval__total_course_activities,coef__distinct_days,pval__distinct_days,coef__nr_files_viewed,pval__nr_files_viewed,coef__nr_practice_exams_viewed,pval__nr_practice_exams_viewed
0,1,subgroup_global_baseline,ECTS=='15' ∧ course_repeater==np.False_ ∧ sex=...,19.51085,161,122,-0.35276,0.987781,1.376756,0.431498,2.774317,0.064525,7.6e-05,3.969333e-05,0.040861,0.096937,4.528641e-12,0.016668,6.406497e-10,-0.02897,0.007907,-0.079991,4.967337e-12,0.072741,2e-06
1,1,subgroup_model,ECTS=='15' ∧ course_repeater==np.False_ ∧ sex=...,19.51085,161,122,-0.00474,0.845617,1.022562,-0.046203,4.779713,0.059493,0.011402,2.123436e-05,0.37408,0.074019,0.002725588,0.006153,0.1884854,-0.00205,0.900882,-0.036407,0.06297257,0.056328,0.008924
2,2,subgroup_global_baseline,ECTS=='15' ∧ course_repeater==np.False_,19.391462,222,157,-0.3462,0.935694,1.287818,0.436479,2.774317,0.064525,7.6e-05,3.969333e-05,0.040861,0.096937,4.528641e-12,0.016668,6.406497e-10,-0.02897,0.007907,-0.079991,4.967337e-12,0.072741,2e-06
3,2,subgroup_model,ECTS=='15' ∧ course_repeater==np.False_,19.39146,222,157,-0.006417,0.808524,0.962771,-0.028196,4.952012,0.055126,0.004479,2.180098e-05,0.291611,0.075963,0.0002015234,0.006769,0.07577193,-0.013839,0.323329,-0.038075,0.01825012,0.060359,0.000925
4,3,subgroup_global_baseline,ECTS=='15',17.821683,224,158,-0.352201,0.938447,1.291557,0.425042,2.774317,0.064525,7.6e-05,3.969333e-05,0.040861,0.096937,4.528641e-12,0.016668,6.406497e-10,-0.02897,0.007907,-0.079991,4.967337e-12,0.072741,2e-06
5,3,subgroup_model,ECTS=='15',17.82168,224,158,-0.011619,0.810427,0.96625,-0.025525,4.820753,0.053338,0.00638,2.329117e-05,0.264509,0.080467,8.242501e-05,0.00634,0.09896983,-0.010717,0.445345,-0.036967,0.02311647,0.059345,0.00123
6,4,subgroup_global_baseline,ECTS=='15' ∧ sex=='SEX_M',17.764911,163,123,-0.360281,0.990894,1.380836,0.416847,2.774317,0.064525,7.6e-05,3.969333e-05,0.040861,0.096937,4.528641e-12,0.016668,6.406497e-10,-0.02897,0.007907,-0.079991,4.967337e-12,0.072741,2e-06
7,4,subgroup_model,ECTS=='15' ∧ sex=='SEX_M',17.76491,163,123,-0.009644,0.848712,1.024901,-0.042736,4.644857,0.057681,0.015048,2.330406e-05,0.334634,0.080338,0.001099184,0.005375,0.2540198,0.001517,0.926772,-0.034106,0.0842112,0.055407,0.010796
8,5,subgroup_global_baseline,course_repeater==np.False_ ∧ croho=='B Compute...,12.472851,167,106,-0.263678,1.061566,1.619106,0.435562,2.774317,0.064525,7.6e-05,3.969333e-05,0.040861,0.096937,4.528641e-12,0.016668,6.406497e-10,-0.02897,0.007907,-0.079991,4.967337e-12,0.072741,2e-06
9,5,subgroup_model,course_repeater==np.False_ ∧ croho=='B Compute...,12.47285,167,106,-0.025997,0.951084,1.314573,0.068068,3.428655,0.09656,0.001031,-5.186074e-06,0.838207,0.100617,3.511573e-05,0.009385,0.03917193,-0.002748,0.872128,-0.05322,0.005692475,0.095627,3.4e-05
