In [None]:
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from statistics import mean
import copy


# Standardization
def standardization(data):
    feature_columns = ['feat1', 'feat2', 'feat3', 'feat4']
    zscore = lambda x: (x - x.mean()) / x.std()
    dfs = data.copy()
    dfs[feature_columns] = dfs[feature_columns].transform(zscore)
    return dfs


# Calculate c-index
def c_index(true_labels, predictions):
    n = 0
    h_sum = 0
    for i in range(len(true_labels)):
        t = true_labels[i]
        p = predictions[i]
        for j in range(len(true_labels)):
            if j>i:
                nt = true_labels[j]
                np = predictions[j]
                if t != nt:
                    n = n + 1
                    if (p < np and t < nt) or (p > np and t > nt):
                        h_sum += 1
                    elif p == np:
                        h_sum += 0.5
    if n == 0:
        n = 1
    index = h_sum/n
    return index


def main():
            
    # Load the data
    data = pd.read_csv('paindata.csv', 
                           names=['subject', 'test', 'label', 'label_time', 'feat1', 'feat2', 'feat3', 'feat4'], 
                           header=0)
    
    # Standardize the data by subjects
    standardized_data = []
    for i in range(1, 32):
        d = data[data['subject'] == i]
        d = standardization(d)
        # Select the needed columns
        d = d[['label', 'feat1', 'feat2', 'feat3', 'feat4']]
        standardized_data.append(d)
    
    knn = KNeighborsClassifier(n_neighbors=37, weights='uniform', metric="euclidean")
    data = []
    labels = []
    for i in range(31):
        # Select columns 'feat1', 'feat2', 'feat3' and 'feat4' as a data
        d = standardized_data[i].iloc[:, 1:5]
        data.append(d)
        # Select column 'label' as a label
        l = standardized_data[i].loc[:, 'label'] 
        labels.append(l)
    
    # All c-indexes
    all_c = []
    # Minimum x-index
    min_c = 1
    # Maximum c-index
    max_c = 0
    
    # Leave-subject-out cross-validation
    for i in range(31):
        
        # Get train data
        train_data = copy.copy(data)
        # Leave one subject out
        del train_data[i]
        # Combine arrays
        train_data = np.concatenate(train_data, axis=0)
        
        # Labels of train data
        train_labels = copy.copy(labels)
        # Leave one subject out
        del train_labels[i]
        # Combine arrays
        train_labels = np.concatenate(train_labels, axis=0)
        
        knn.fit(train_data, train_labels)
        # Prediction using the subject that was left out
        predictions = knn.predict(data[i])
        test_labels = copy.copy(labels[i])
        c = c_index(test_labels.tolist(), predictions)
        all_c.append(c)
        print("C-index for subject", i+1, ":", c)
        
        if c < min_c:
            min_c = c
        if c > max_c:
            max_c = c
    
    # Calculate the mean of all c-indexes    
    m = mean(all_c)
    
    print("\nAverage c-index:", m)  
    print("Minimum c-index:", min_c)
    print("Maximum c-index:", max_c)
    
    
main()

