In [None]:
from globals import *
from cpfunctions import *

In [None]:
# Read file.
df = pd.read_csv(FILE_PATH)

# Encode labels into integers.
le = LabelEncoder()
df["class"] = le.fit_transform(df["class"])

# Get features, class
y = df["class"]

# Drop the label.
X = df.drop(["class"], axis = 1)

# Get column names.
colnames = list(X.columns)

# Save classes order.
save_classes_order(pd.DataFrame(le.classes_), DATASET_PATH)

In [None]:
# Arrays to store results.
it = []
method = []
groundTruth = []
prediction = []
predictionSet = []
scores = [] # prediction scores as produced by the underlying model.
pvalues = []

for i in range(ITERATIONS):

    curr_it = 1 + i # current iteration.
    
    print("Iteration " + str(curr_it))

    # Set random seed. This should be updated based on iteration number.
    random_seed = 100 + curr_it

    # Split into train, calibration, and test sets.
    X_train, X_rest, y_train, y_rest = train_test_split(X, y,
                                                        train_size = PCT_TRAIN, 
                                                        stratify = y, 
                                                        random_state = random_seed)

    X_calib, X_test, y_calib, y_test = train_test_split(X_rest, y_rest,
                                                        train_size = PCT_CALIBRATION,
                                                        stratify = y_rest,
                                                        random_state = random_seed)


    # Fit the models
    classifiers = fit_models_2v(X_train, y_train, random_seed)
    
    # Build the conformal models.
    for model in classifiers:
        
        modeltype = model[0]
        
        cp = MapieClassifier(estimator=model[1],
                               cv="prefit",
                               method="score",
                               random_state=random_seed,
                               n_jobs=NUMCORES)

        if modeltype in ['v1','v2','v3']:
            selectedcols = [x for x in colnames if modeltype + '_' in x]
        elif modeltype == "aggregated":
            selectedcols = colnames
        elif modeltype == "mvcs":
            selectedcols = colnames
    
        cp.fit(X_calib.loc[:, selectedcols], y_calib)

        y_pred, y_set = cp.predict(X_test.loc[:, selectedcols], alpha=ALPHA)

        y_set = np.squeeze(y_set)

        #### Append results ####
        n = len(y_pred)

        # Iteration
        tmp = np.empty(n, dtype=int)
        tmp.fill(curr_it)
        it.extend(tmp)

        # Method name
        method.extend([model[0]] * n)

        # Ground truth
        groundTruth.extend(le.inverse_transform(y_test))

        # Prediction
        prediction.extend(le.inverse_transform(y_pred))

        # Prediction set.
        predictionSet.extend(["|".join(le.classes_[y_set[i]]) for i in range(n)])

        # Predicted scores.
        pred_scores = model[1].predict_proba(X_test.loc[:, selectedcols])
        scores.extend(["|".join(pred_scores[i,y_set[i]].astype(str)) for i in range(n)])
        
        # Compute p-values.
        cal_probs = model[1].predict_proba(X_calib.loc[:, selectedcols])
        prob_true_class = cal_probs[np.arange(len(X_calib.loc[:, selectedcols])),y_calib]
        calib_scores = 1 - prob_true_class
        test_scores = 1 - pred_scores
        arr_pvalues = compute_pvalues(calib_scores, test_scores)
        pvalues.extend(["|".join(arr_pvalues[i,:].astype(str)) for i in range(n)])

# Store results in data frame.
d = {'it': it, 'method': method, 
     'groundTruth': groundTruth,
     'prediction': prediction,
     'predictionSet': predictionSet,
     'scores': scores,
     'pvalues': pvalues}

results = pd.DataFrame(d)

save_df(results, DATASET_PATH, "1", "results.csv")

print("Done!")