In [None]:
from globals import *
from cpfunctions import *

In [None]:
# Read file.
df = pd.read_csv(FILE_PATH)

# Encode labels into integers.
le = LabelEncoder()
df["label"] = le.fit_transform(df["label"])

In [None]:
unique_users = np.unique(df["userid"])

# Arrays to store results.
it = []
userid = []
method = []
groundTruth = []
prediction = []
predictionSet = []
scores = [] # prediction scores as produced by the underlying model.
pvalues = []

for i in range(ITERATIONS):

    curr_it = 1 + i # current iteration.
    
    print("Iteration " + str(curr_it))

    # Set random seed. This should be updated based on iteration number.
    random_seed = 100 + curr_it

    for u in unique_users:
        
        tmp = df[df["userid"]==u]
        
        # Get the target user.
        target = df[df["userid"]==u]

        # Get all other users.
        allother = df[df["userid"]!=u]

        # Get features, and class for target user.
        y_target = target["label"]
        X_target = target.drop(["label","userid"], axis = 1)

        # Get features, and class for all other users.
        y_other = allother["label"]
        X_other = allother.drop(["label","userid"], axis = 1)
        
        # Split into train and calibration.
        X_train, X_calib, y_train, y_calib = train_test_split(X_other, y_other,
                                                            train_size = PCT_TRAIN, 
                                                            stratify = y_other, 
                                                            random_state = random_seed)

        # Use target user's data for testing.
        # The test is discarded so the experiment is comparable with the target user's calibrated model.
        X_test, X_dummy, y_test, y_dummy = train_test_split(X_target, y_target,
                                                            train_size = PCT_TARGET_TEST,
                                                            stratify = y_target,
                                                            random_state = random_seed)

        # Normalize data.
        scaler = MinMaxScaler()
        X_train = scaler.fit_transform(X_train)
        X_calib = scaler.transform(X_calib)
        X_test = scaler.transform(X_test)
        
        # Fit the models
        classifiers = fit_models(X_train, y_train, random_seed)
        
        # Build the conformal models.
        for model in classifiers:
            cp = MapieClassifier(estimator=model[1],
                                   cv="prefit",
                                   method="score",
                                   random_state=random_seed)

            cp.fit(X_calib, y_calib)

            y_pred, y_set = cp.predict(X_test, alpha=ALPHA)

            y_set = np.squeeze(y_set)

            #### Append results ####
            n = len(y_pred)

            # Iteration
            tmp = np.empty(n, dtype=int)
            tmp.fill(curr_it)
            it.extend(tmp)
            
            # User id
            tmp = np.empty(n, dtype=int)
            tmp.fill(u)
            userid.extend(tmp)

            # Method name
            method.extend([model[0]] * n)

            # Ground truth
            groundTruth.extend(le.inverse_transform(y_test))

            # Prediction
            prediction.extend(le.inverse_transform(y_pred))

            # Prediction set.
            predictionSet.extend(["|".join(le.classes_[y_set[i]]) for i in range(n)])
            
            # Predicted scores.
            pred_scores = model[1].predict_proba(X_test)
            scores.extend(["|".join(pred_scores[i,y_set[i]].astype(str)) for i in range(n)])
            
            # Compute p-values.
            cal_probs = model[1].predict_proba(X_calib)
            prob_true_class = cal_probs[np.arange(len(X_calib)),y_calib]
            calib_scores = 1 - prob_true_class
            test_scores = 1 - pred_scores
            arr_pvalues = compute_pvalues(calib_scores, test_scores)
            pvalues.extend(["|".join(arr_pvalues[i,:].astype(str)) for i in range(n)])
            
# Store results in data frame.
d = {'it': it,
     'userid': userid,
     'method': method,
     'groundTruth': groundTruth,
     'prediction': prediction,
     'predictionSet': predictionSet,
     'scores': scores,
     'pvalues': pvalues}

results = pd.DataFrame(d)

save_df(results, DATASET_PATH, "ui", "results.csv")

print("Done!")