# Collaborative Filtering

This notebook executes the experiments for Collaborative Filtering, which is only performed with reference classes.

The experiments are separated into 3 versions.

Note: The code in this notebook for the different versions and parts only differs by the specified class-to-reference-class dictionary and the conf dictionary used.

In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import datetime


import sys
import os
sys.path.append(os.path.abspath('../../sources'))

import config
import utils
import training_general
import training_with_rc
from data_preparation import determine_reference_classes

In [None]:
# read data
df = utils.read_data_file("final_data_main_approach.csv")
df_orig = df.copy()
print(df.shape)

# get dictionary with reference classes
class_to_reference_class = determine_reference_classes.get_reference_classes(df)

# uncomment when executing experiments for version 2
#class_to_reference_class = determine_reference_classes.restrict_c2rc_dict(
#    class_to_reference_class, "ref_classes_restricted.csv"
#)

# uncomment when executing experiments for version 3
#class_to_reference_class = determine_reference_classes.restrict_c2rc_dict(
#    class_to_reference_class, "ref_classes_restricted_random.csv"
#)

In [None]:
# separate class_to_reference_class in 4 parts
cut1 = 350
cut2 = 600
cut3 = 1050
c2rc_part1 = {k: v for k, v in list(class_to_reference_class.items())[:cut1]}
c2rc_part2 = {k: v for k, v in list(class_to_reference_class.items())[cut1:cut2]}
c2rc_part3 = {k: v for k, v in list(class_to_reference_class.items())[cut2:cut3]}
c2rc_part4 = {k: v for k, v in list(class_to_reference_class.items())[cut3:]}
print(len(c2rc_part1), len(c2rc_part2), len(c2rc_part3), len(c2rc_part4))

### Version 1: User-based Approaches without Significance Weighting --> all reference classes

In [None]:
def get_conf_version1(filename_suffix: str) -> dict:
    return {
        "lim": [0.3, 0.5, 0.7, config.LimType.DYNAMIC],
        "eval_groups": ["info_cols", "reg_metrics", "class_metrics"],
        "reg_metrics": [config.RegMetrics.MAE, config.RegMetrics.MSE],
        "class_metrics": [
            config.ClassMetrics.ACC,
            config.ClassMetrics.F1,
            config.ClassMetrics.PREC,
            config.ClassMetrics.REC,
        ],
        "info_cols": [
            config.InfoCols.NUM_UT_PROBS,
            config.InfoCols.NUM_IU_PROBS,
            config.InfoCols.MEAN_UT_PERF,
            config.InfoCols.MEAN_IU_PERF,
            config.InfoCols.NUM_STUD_RC,
            config.InfoCols.MAX_NUM_IU_PROBS_RC,
            config.InfoCols.MEAN_IU_PERF_RC,
            config.InfoCols.MEAN_UT_PERF_RC
        ],
        "method": config.RecMethod.CF,
        "with_ref_class": True,
        "models": [
            {
                "model_type": config.CFModelType.KNN,
                "sim": "manhattan",
                "pred": "resnick",
                "k": 3,
            },
            {
                "model_type": config.CFModelType.KNN,
                "sim": "manhattan",
                "pred": "resnick",
                "k": 5,
            },
            {
                "model_type": config.CFModelType.KNN,
                "sim": "manhattan",
                "pred": "weightavg",
                "k": 3,
            },
            {
                "model_type": config.CFModelType.KNN,
                "sim": "manhattan",
                "pred": "weightavg",
                "k": 5,
            }
        ],
        "saving_file": {
            "folder": "collaborative_filtering",
            "filename": "version1",
            "filename_suffix": filename_suffix,
        },
    }

save_file = True

##### Part 1

In [None]:
c2rc = c2rc_part1.copy()
df = df_orig.copy()

conf = get_conf_version1(filename_suffix="part1")

# check validity of conf dictionary
training_general.check_conf(conf, save_file=save_file)

with_rc = conf["with_ref_class"]

# create dataframes
df, ass_seq, stud_per_class = training_general.create_dataframes(df)

# create empty evaluation dataframe for complete training
if with_rc:
    index = training_with_rc.get_idx_pred_df(c2rc)
else:
    raise NotImplementedError
pred_df = training_general.initialize_pred_df(index=index, conf=conf)


count = 0
for cid, cid_dict in c2rc.items():
#for cid, cid_dict in list(c2rc.items())[:10]:
#for cid in ["2JFV80TTBO"]:
    #cid_dict = c2rc[cid]
    #print(f"----------- Class {cid} ------------")

    # make predictions for cid, evaluate and store evaluation results
    if with_rc:
        pred_df.loc[cid] = (
            training_with_rc.perform_predictions_for_cid(
                conf, cid, cid_dict, df, ass_seq, stud_per_class
            )
            .reindex(pred_df.loc[cid].index)
            .to_numpy()
        )
    else:
        raise NotImplementedError

    count += 1
    if count % 10 == 0:
        d = datetime.datetime.now()
        print(f"{count} classes completed, last cid: {cid}, time: {d}")

In [None]:
# drop rows only containing nans
pred_df = pred_df.dropna(subset=["y_true"])
pred_df1 = pred_df.copy()
print(len(pred_df))

# save
utils.save_predictions(pred_df, conf, save_idx=True)

In [None]:
pred_df = pred_df1.copy()

# evaluate predictions and save
training_general.evaluate_predictions_and_save(pred_df, conf)

##### Part 2

In [None]:
c2rc = c2rc_part2.copy()
df = df_orig.copy()

conf = get_conf_version1(filename_suffix="part2")

# check validity of conf dictionary
training_general.check_conf(conf, save_file=save_file)

with_rc = conf["with_ref_class"]

# create dataframes
df, ass_seq, stud_per_class = training_general.create_dataframes(df)

# create empty evaluation dataframe for complete training
if with_rc:
    index = training_with_rc.get_idx_pred_df(c2rc)
else:
    raise NotImplementedError
pred_df = training_general.initialize_pred_df(index=index, conf=conf)


count = 0
#count = 320
for cid, cid_dict in c2rc.items():
#for cid, cid_dict in list(c2rc.items())[:3]:
#for cid in ["2JFV80TTBO"]:
    #cid_dict = c2rc[cid]
    #print(f"----------- Class {cid} ------------")

    # make predictions for cid, evaluate and store evaluation results
    if with_rc:
        pred_df.loc[cid] = (
            training_with_rc.perform_predictions_for_cid(
                conf, cid, cid_dict, df, ass_seq, stud_per_class
            )
            .reindex(pred_df.loc[cid].index)
            .to_numpy()
        )
    else:
        raise NotImplementedError

    count += 1
    if count % 10 == 0:
        d = datetime.datetime.now()
        print(f"{count} classes completed, last cid: {cid}, time: {d}")

In [None]:
# drop rows only containing nans
pred_df = pred_df.dropna(subset=["y_true"])
pred_df2 = pred_df.copy()
print(len(pred_df))

# save
utils.save_predictions(pred_df, conf, save_idx=True)

In [None]:
# evaluate predictions and save
pred_df = pred_df2.copy()
training_general.evaluate_predictions_and_save(pred_df, conf)

##### Part 3

In [None]:
c2rc = c2rc_part3.copy()
df = df_orig.copy()

conf = get_conf_version1(filename_suffix="part3")

# check validity of conf dictionary
training_general.check_conf(conf, save_file=save_file)

with_rc = conf["with_ref_class"]

# create dataframes
df, ass_seq, stud_per_class = training_general.create_dataframes(df)

# create empty evaluation dataframe for complete training
if with_rc:
    index = training_with_rc.get_idx_pred_df(c2rc)
else:
    raise NotImplementedError
pred_df = training_general.initialize_pred_df(index=index, conf=conf)


count = 0
#count = 320
for cid, cid_dict in c2rc.items():
#for cid, cid_dict in list(c2rc.items())[:3]:
#for cid in ["2JFV80TTBO"]:
    #cid_dict = c2rc[cid]
    #print(f"----------- Class {cid} ------------")

    # make predictions for cid, evaluate and store evaluation results
    if with_rc:
        pred_df.loc[cid] = (
            training_with_rc.perform_predictions_for_cid(
                conf, cid, cid_dict, df, ass_seq, stud_per_class
            )
            .reindex(pred_df.loc[cid].index)
            .to_numpy()
        )
    else:
        raise NotImplementedError

    count += 1
    if count % 10 == 0:
        d = datetime.datetime.now()
        print(f"{count} classes completed, last cid: {cid}, time: {d}")

In [None]:
# drop rows only containing nans
pred_df = pred_df.dropna(subset=["y_true"])
pred_df3 = pred_df.copy()
print(len(pred_df))

# save
utils.save_predictions(pred_df, conf, save_idx=True)

In [None]:
# evaluate predictions and save
pred_df = pred_df3.copy()
training_general.evaluate_predictions_and_save(pred_df, conf)

##### Part 4

In [None]:
c2rc = c2rc_part4.copy()
df = df_orig.copy()

conf = get_conf_version1(filename_suffix="part4")

# check validity of conf dictionary
training_general.check_conf(conf, save_file=save_file)

with_rc = conf["with_ref_class"]

# create dataframes
df, ass_seq, stud_per_class = training_general.create_dataframes(df)

# create empty evaluation dataframe for complete training
if with_rc:
    index = training_with_rc.get_idx_pred_df(c2rc)
else:
    raise NotImplementedError
pred_df = training_general.initialize_pred_df(index=index, conf=conf)


count = 0
#count = 320
for cid, cid_dict in c2rc.items():
#for cid, cid_dict in list(c2rc.items())[:3]:
#for cid in ["2JFV80TTBO"]:
    #cid_dict = c2rc[cid]
    #print(f"----------- Class {cid} ------------")

    # make predictions for cid, evaluate and store evaluation results
    if with_rc:
        pred_df.loc[cid] = (
            training_with_rc.perform_predictions_for_cid(
                conf, cid, cid_dict, df, ass_seq, stud_per_class
            )
            .reindex(pred_df.loc[cid].index)
            .to_numpy()
        )
    else:
        raise NotImplementedError

    count += 1
    if count % 10 == 0:
        d = datetime.datetime.now()
        print(f"{count} classes completed, last cid: {cid}, time: {d}")

In [None]:
# drop rows only containing nans
pred_df = pred_df.dropna(subset=["y_true"])
pred_df4 = pred_df.copy()
print(len(pred_df))

# save
utils.save_predictions(pred_df, conf, save_idx=True)

In [None]:
# evaluate predictions and save
pred_df = pred_df4.copy()
training_general.evaluate_predictions_and_save(pred_df, conf)

### Version 2: User-based Approaches with Significance Weighting --> restricted reference classes

In [None]:
def get_conf_version2(filename_suffix: str) -> dict:
    return {
        "lim": [0.5],
        #"lim": [0.3, 0.5, 0.7, config.LimType.DYNAMIC],
        "eval_groups": ["info_cols", "reg_metrics", "class_metrics"],
        "reg_metrics": [config.RegMetrics.MAE, config.RegMetrics.MSE],
        "class_metrics": [
            config.ClassMetrics.ACC,
            config.ClassMetrics.F1,
            config.ClassMetrics.PREC,
            config.ClassMetrics.REC,
        ],
        "info_cols": [
            config.InfoCols.NUM_UT_PROBS,
            config.InfoCols.NUM_IU_PROBS,
            config.InfoCols.MEAN_UT_PERF,
            config.InfoCols.MEAN_IU_PERF,
            config.InfoCols.NUM_STUD_RC,
            config.InfoCols.MAX_NUM_IU_PROBS_RC,
            config.InfoCols.MEAN_IU_PERF_RC,
            config.InfoCols.MEAN_UT_PERF_RC
        ],
        "method": config.RecMethod.CF,
        "with_ref_class": True,
        "models": [
            {
                "model_type": config.CFModelType.KNN,
                "sim": "manhattan",
                "weight": "significance",
                "T": 10,
                "pred": "resnick",
                "k": 3,
            },
            {
                "model_type": config.CFModelType.KNN,
                "sim": "manhattan",
                "weight": "significance",
                "T": 20,
                "pred": "resnick",
                "k": 3,
            },
            {
                "model_type": config.CFModelType.KNN,
                "sim": "manhattan",
                "weight": "significance",
                "T": 10,
                "pred": "resnick",
                "k": 5,
            },
            {
                "model_type": config.CFModelType.KNN,
                "sim": "manhattan",
                "weight": "significance",
                "T": 20,
                "pred": "resnick",
                "k": 5,
            },
            {
                "model_type": config.CFModelType.KNN,
                "sim": "manhattan",
                "weight": "significance",
                "T": 10,
                "pred": "weightavg",
                "k": 3,
            },
            {
                "model_type": config.CFModelType.KNN,
                "sim": "manhattan",
                "weight": "significance",
                "T": 20,
                "pred": "weightavg",
                "k": 3,
            },
            {
                "model_type": config.CFModelType.KNN,
                "sim": "manhattan",
                "weight": "significance",
                "T": 10,
                "pred": "weightavg",
                "k": 5,
            },
            {
                "model_type": config.CFModelType.KNN,
                "sim": "manhattan",
                "weight": "significance",
                "T": 20,
                "pred": "weightavg",
                "k": 5,
            },
        ],
        "saving_file": {
            "folder": "collaborative_filtering",
            "filename": "version2",
            "filename_suffix": filename_suffix,
        },
    }

save_file = True

##### Part 1

In [None]:
c2rc = c2rc_part1.copy()
df = df_orig.copy()

conf = get_conf_version2(filename_suffix="part1")

# check validity of conf dictionary
training_general.check_conf(conf, save_file=save_file)

with_rc = conf["with_ref_class"]

# create dataframes
df, ass_seq, stud_per_class = training_general.create_dataframes(df)

# create empty evaluation dataframe for complete training
if with_rc:
    index = training_with_rc.get_idx_pred_df(c2rc)
else:
    raise NotImplementedError
pred_df = training_general.initialize_pred_df(index=index, conf=conf)


count = 0
#count = 60
for cid, cid_dict in c2rc.items():
#for cid, cid_dict in list(c2rc.items())[:5]:
#for cid in ["2JFV80TTBO"]:
    #cid_dict = c2rc[cid]
    #print(f"----------- Class {cid} ------------")

    # make predictions for cid, evaluate and store evaluation results
    if with_rc:
        pred_df.loc[cid] = (
            training_with_rc.perform_predictions_for_cid(
                conf, cid, cid_dict, df, ass_seq, stud_per_class
            )
            .reindex(pred_df.loc[cid].index)
            .to_numpy()
        )
    else:
        raise NotImplementedError

    count += 1
    if count % 10 == 0:
        d = datetime.datetime.now()
        print(f"{count} classes completed, last cid: {cid}, time: {d}")

In [None]:
# drop rows only containing nans
pred_df = pred_df.dropna(subset=["y_true"])
pred_df1 = pred_df.copy()
print(len(pred_df))

# save
utils.save_predictions(pred_df, conf, save_idx=True)

In [None]:
pred_df = pred_df1.copy()

# evaluate predictions and save
training_general.evaluate_predictions_and_save(pred_df, conf)

##### Part 2

In [None]:
c2rc = c2rc_part2.copy()
df = df_orig.copy()

conf = get_conf_version2(filename_suffix="part2")

# check validity of conf dictionary
training_general.check_conf(conf, save_file=save_file)

with_rc = conf["with_ref_class"]

# create dataframes
df, ass_seq, stud_per_class = training_general.create_dataframes(df)

# create empty evaluation dataframe for complete training
if with_rc:
    index = training_with_rc.get_idx_pred_df(c2rc)
else:
    raise NotImplementedError
pred_df = training_general.initialize_pred_df(index=index, conf=conf)


count = 0
#count = 320
for cid, cid_dict in c2rc.items():
#for cid, cid_dict in list(c2rc.items())[:3]:
#for cid in ["2JFV80TTBO"]:
    #cid_dict = c2rc[cid]
    #print(f"----------- Class {cid} ------------")

    # make predictions for cid, evaluate and store evaluation results
    if with_rc:
        pred_df.loc[cid] = (
            training_with_rc.perform_predictions_for_cid(
                conf, cid, cid_dict, df, ass_seq, stud_per_class
            )
            .reindex(pred_df.loc[cid].index)
            .to_numpy()
        )
    else:
        raise NotImplementedError

    count += 1
    if count % 10 == 0:
        d = datetime.datetime.now()
        print(f"{count} classes completed, last cid: {cid}, time: {d}")

In [None]:
# drop rows only containing nans
pred_df = pred_df.dropna(subset=["y_true"])
pred_df2 = pred_df.copy()
print(len(pred_df))

# save
utils.save_predictions(pred_df, conf, save_idx=True)

In [None]:
# evaluate predictions and save
pred_df = pred_df2.copy()
training_general.evaluate_predictions_and_save(pred_df, conf)

##### Part 3

In [None]:
c2rc = c2rc_part3.copy()
df = df_orig.copy()

conf = get_conf_version2(filename_suffix="part3")

# check validity of conf dictionary
training_general.check_conf(conf, save_file=save_file)

with_rc = conf["with_ref_class"]

# create dataframes
df, ass_seq, stud_per_class = training_general.create_dataframes(df)

# create empty evaluation dataframe for complete training
if with_rc:
    index = training_with_rc.get_idx_pred_df(c2rc)
else:
    raise NotImplementedError
pred_df = training_general.initialize_pred_df(index=index, conf=conf)


count = 0
#count = 320
for cid, cid_dict in c2rc.items():
#for cid, cid_dict in list(c2rc.items())[:3]:
#for cid in ["2JFV80TTBO"]:
    #cid_dict = c2rc[cid]
    #print(f"----------- Class {cid} ------------")

    # make predictions for cid, evaluate and store evaluation results
    if with_rc:
        pred_df.loc[cid] = (
            training_with_rc.perform_predictions_for_cid(
                conf, cid, cid_dict, df, ass_seq, stud_per_class
            )
            .reindex(pred_df.loc[cid].index)
            .to_numpy()
        )
    else:
        raise NotImplementedError

    count += 1
    if count % 10 == 0:
        d = datetime.datetime.now()
        print(f"{count} classes completed, last cid: {cid}, time: {d}")

In [None]:
# drop rows only containing nans
pred_df = pred_df.dropna(subset=["y_true"])
pred_df3 = pred_df.copy()
print(len(pred_df))

# save
utils.save_predictions(pred_df, conf, save_idx=True)

In [None]:
# evaluate predictions and save
pred_df = pred_df3.copy()
training_general.evaluate_predictions_and_save(pred_df, conf)

##### Part 4

In [None]:
c2rc = c2rc_part4.copy()
df = df_orig.copy()

conf = get_conf_version2(filename_suffix="part4")

# check validity of conf dictionary
training_general.check_conf(conf, save_file=save_file)

with_rc = conf["with_ref_class"]

# create dataframes
df, ass_seq, stud_per_class = training_general.create_dataframes(df)

# create empty evaluation dataframe for complete training
if with_rc:
    index = training_with_rc.get_idx_pred_df(c2rc)
else:
    raise NotImplementedError
pred_df = training_general.initialize_pred_df(index=index, conf=conf)


count = 0
#count = 320
for cid, cid_dict in c2rc.items():
#for cid, cid_dict in list(c2rc.items())[:3]:
#for cid in ["2JFV80TTBO"]:
    #cid_dict = c2rc[cid]
    #print(f"----------- Class {cid} ------------")

    # make predictions for cid, evaluate and store evaluation results
    if with_rc:
        pred_df.loc[cid] = (
            training_with_rc.perform_predictions_for_cid(
                conf, cid, cid_dict, df, ass_seq, stud_per_class
            )
            .reindex(pred_df.loc[cid].index)
            .to_numpy()
        )
    else:
        raise NotImplementedError

    count += 1
    if count % 10 == 0:
        d = datetime.datetime.now()
        print(f"{count} classes completed, last cid: {cid}, time: {d}")

In [None]:
# drop rows only containing nans
pred_df = pred_df.dropna(subset=["y_true"])
pred_df4 = pred_df.copy()
print(len(pred_df))

# save
utils.save_predictions(pred_df, conf, save_idx=True)

In [None]:
# evaluate predictions and save
pred_df = pred_df4.copy()
training_general.evaluate_predictions_and_save(pred_df, conf)

### Version 3: Item-based Approaches --> restricted random reference classes

In [None]:
def get_conf_version3(filename_suffix: str) -> dict:
    return {
        "lim": [0.5],
        #"lim": [0.3, 0.5, 0.7, config.LimType.DYNAMIC],
        "eval_groups": ["info_cols", "reg_metrics", "class_metrics"],
        "reg_metrics": [config.RegMetrics.MAE, config.RegMetrics.MSE],
        "class_metrics": [
            config.ClassMetrics.ACC,
            config.ClassMetrics.F1,
            config.ClassMetrics.PREC,
            config.ClassMetrics.REC,
        ],
        "info_cols": [
            config.InfoCols.NUM_UT_PROBS,
            config.InfoCols.NUM_IU_PROBS,
            config.InfoCols.MEAN_UT_PERF,
            config.InfoCols.MEAN_IU_PERF,
            config.InfoCols.NUM_STUD_RC,
            config.InfoCols.MAX_NUM_IU_PROBS_RC,
            config.InfoCols.MEAN_IU_PERF_RC,
            config.InfoCols.MEAN_UT_PERF_RC
        ],
        "method": config.RecMethod.CF,
        "with_ref_class": True,
        "models": [
            {
                "model_type": config.CFModelType.KNN_ITEM,
                "sim": "manhattan",
                "pred": "weightavg",
                "k": 5,
            },
            {
                "model_type": config.CFModelType.KNN_ITEM,
                "sim": "manhattan",
                "pred": "resnick",
                "k": 5,
            },
            {
                "model_type": config.CFModelType.KNN_ITEM,
                "sim": "manhattan",
                "weight": "significance",
                "T": 10,
                "pred": "weightavg",
                "k": 5,
            },
            {
                "model_type": config.CFModelType.KNN_ITEM,
                "sim": "manhattan",
                "weight": "significance",
                "T": 10,
                "pred": "resnick",
                "k": 5,
            },
        ],
        "saving_file": {
            "folder": "collaborative_filtering",
            "filename": "version3",
            "filename_suffix": filename_suffix,
        },
    }

save_file = True

##### Part 1

In [None]:
c2rc = c2rc_part1.copy()
df = df_orig.copy()

conf = get_conf_version3(filename_suffix="part1")

# check validity of conf dictionary
training_general.check_conf(conf, save_file=save_file)

with_rc = conf["with_ref_class"]

# create dataframes
df, ass_seq, stud_per_class = training_general.create_dataframes(df)

In [None]:
# create empty evaluation dataframe for complete training
if with_rc:
    index = training_with_rc.get_idx_pred_df(c2rc)
else:
    raise NotImplementedError
pred_df = training_general.initialize_pred_df(index=index, conf=conf)


count = 0
#count = 60
for cid, cid_dict in c2rc.items():
#for cid, cid_dict in list(c2rc.items())[:4]:
#for cid in ["2JFV80TTBO"]:
    #cid_dict = c2rc[cid]
    #print(f"----------- Class {cid} ------------")

    # make predictions for cid, evaluate and store evaluation results
    if with_rc:
        pred_df.loc[cid] = (
            training_with_rc.perform_predictions_for_cid(
                conf, cid, cid_dict, df, ass_seq, stud_per_class
            )
            .reindex(pred_df.loc[cid].index)
            .to_numpy()
        )
    else:
        raise NotImplementedError

    count += 1
    if count % 10 == 0:
        d = datetime.datetime.now()
        print(f"{count} classes completed, last cid: {cid}, time: {d}")

In [None]:
# drop rows only containing nans
pred_df = pred_df.dropna(subset=["y_true"])
pred_df1 = pred_df.copy()
print(len(pred_df))

# save
utils.save_predictions(pred_df, conf, save_idx=True)

In [None]:
pred_df = pred_df1.copy()
conf = get_conf_version3(filename_suffix="part1")

# evaluate predictions and save
training_general.evaluate_predictions_and_save(pred_df, conf)

##### Part 2

In [None]:
c2rc = c2rc_part2.copy()
df = df_orig.copy()

conf = get_conf_version3(filename_suffix="part2")

# check validity of conf dictionary
training_general.check_conf(conf, save_file=save_file)

with_rc = conf["with_ref_class"]

# create dataframes
df, ass_seq, stud_per_class = training_general.create_dataframes(df)

# create empty evaluation dataframe for complete training
if with_rc:
    index = training_with_rc.get_idx_pred_df(c2rc)
else:
    raise NotImplementedError
pred_df = training_general.initialize_pred_df(index=index, conf=conf)


count = 0
#count = 320
for cid, cid_dict in c2rc.items():
#for cid, cid_dict in list(c2rc.items())[:3]:
#for cid in ["2JFV80TTBO"]:
    #cid_dict = c2rc[cid]
    #print(f"----------- Class {cid} ------------")

    # make predictions for cid, evaluate and store evaluation results
    if with_rc:
        pred_df.loc[cid] = (
            training_with_rc.perform_predictions_for_cid(
                conf, cid, cid_dict, df, ass_seq, stud_per_class
            )
            .reindex(pred_df.loc[cid].index)
            .to_numpy()
        )
    else:
        raise NotImplementedError

    count += 1
    if count % 10 == 0:
        d = datetime.datetime.now()
        print(f"{count} classes completed, last cid: {cid}, time: {d}")

In [None]:
# drop rows only containing nans
pred_df = pred_df.dropna(subset=["y_true"])
pred_df2 = pred_df.copy()
print(len(pred_df))

# save
utils.save_predictions(pred_df, conf, save_idx=True)

In [None]:
# evaluate predictions and save
pred_df = pred_df2.copy()
conf = get_conf_version3(filename_suffix="part2")
training_general.evaluate_predictions_and_save(pred_df, conf)

##### Part 3

In [None]:
c2rc = c2rc_part3.copy()
df = df_orig.copy()

conf = get_conf_version3(filename_suffix="part3")

# check validity of conf dictionary
training_general.check_conf(conf, save_file=save_file)

with_rc = conf["with_ref_class"]

# create dataframes
df, ass_seq, stud_per_class = training_general.create_dataframes(df)

# create empty evaluation dataframe for complete training
if with_rc:
    index = training_with_rc.get_idx_pred_df(c2rc)
else:
    raise NotImplementedError
pred_df = training_general.initialize_pred_df(index=index, conf=conf)


count = 0
#count = 320
for cid, cid_dict in c2rc.items():
#for cid, cid_dict in list(c2rc.items())[:3]:
#for cid in ["2JFV80TTBO"]:
    #cid_dict = c2rc[cid]
    #print(f"----------- Class {cid} ------------")

    # make predictions for cid, evaluate and store evaluation results
    if with_rc:
        pred_df.loc[cid] = (
            training_with_rc.perform_predictions_for_cid(
                conf, cid, cid_dict, df, ass_seq, stud_per_class
            )
            .reindex(pred_df.loc[cid].index)
            .to_numpy()
        )
    else:
        raise NotImplementedError

    count += 1
    if count % 10 == 0:
        d = datetime.datetime.now()
        print(f"{count} classes completed, last cid: {cid}, time: {d}")

In [None]:
# drop rows only containing nans
pred_df = pred_df.dropna(subset=["y_true"])
pred_df3 = pred_df.copy()
print(len(pred_df))

# save
utils.save_predictions(pred_df, conf, save_idx=True)

In [None]:
# evaluate predictions and save
pred_df = pred_df3.copy()
conf = get_conf_version3(filename_suffix="part3")
training_general.evaluate_predictions_and_save(pred_df, conf)

##### Part 4

In [None]:
c2rc = c2rc_part4.copy()
df = df_orig.copy()

conf = get_conf_version3(filename_suffix="part4")

# check validity of conf dictionary
training_general.check_conf(conf, save_file=save_file)

with_rc = conf["with_ref_class"]

# create dataframes
df, ass_seq, stud_per_class = training_general.create_dataframes(df)

# create empty evaluation dataframe for complete training
if with_rc:
    index = training_with_rc.get_idx_pred_df(c2rc)
else:
    raise NotImplementedError
pred_df = training_general.initialize_pred_df(index=index, conf=conf)


count = 0
#count = 320
for cid, cid_dict in c2rc.items():
#for cid, cid_dict in list(c2rc.items())[:3]:
#for cid in ["2JFV80TTBO"]:
    #cid_dict = c2rc[cid]
    #print(f"----------- Class {cid} ------------")

    # make predictions for cid, evaluate and store evaluation results
    if with_rc:
        pred_df.loc[cid] = (
            training_with_rc.perform_predictions_for_cid(
                conf, cid, cid_dict, df, ass_seq, stud_per_class
            )
            .reindex(pred_df.loc[cid].index)
            .to_numpy()
        )
    else:
        raise NotImplementedError

    count += 1
    if count % 10 == 0:
        d = datetime.datetime.now()
        print(f"{count} classes completed, last cid: {cid}, time: {d}")

In [None]:
# drop rows only containing nans
pred_df = pred_df.dropna(subset=["y_true"])
pred_df4 = pred_df.copy()
print(len(pred_df))

# save
utils.save_predictions(pred_df, conf, save_idx=True)

In [None]:
# evaluate predictions and save
pred_df = pred_df4.copy()
conf = get_conf_version3(filename_suffix="part4")
training_general.evaluate_predictions_and_save(pred_df, conf)