# Ordinal Classification

In this project, the IUCN classes that form our response variable take the form of an ordinal system ranging from "LC" -- least concern -- to "CR" -- critically endangered. To move beyond binary classification, we can attempt to predict the specific class that each group belongs to. However, multiclass approaches will (by default) not account for the pseudo-numeric differences between our different classes.

One approach is to adapt our learning objective to a binary classifier[by decomposing ordinal classes into a series of sequential tasks](https://www.cs.waikato.ac.nz/~eibe/pubs/ordinal_tech_report.pdf). This approach is conceptually similar to a one-vs-rest classifier:

* $P(y_i = LC) = 1 - P(y_i > LC)$ 
* $P(y_i = NT) = P(y_i > LC) - P(y_i > NT)$ 

$\dots$
* $P(y_i = CR) = P(y_i > CR)$

Assigned classes can then be generated by the maximum probability for each label.

In [1]:
import pandas as pd
pd.options.mode.chained_assignment = None
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

from sklearn.metrics import confusion_matrix
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC

In [2]:
# Reading in datasets
features_std = pd.read_csv("../data/features_std.csv", index_col=0)
features_imp = pd.read_csv("../data/features_imputed.csv", index_col=0)
y_full = features_std["Red List status"]

In [3]:
# Defining new response variables 
def return_y_ordinal(y_dat):
    resp_ordinal = pd.get_dummies(y_dat).drop("LC", axis = 1)[["NT", "VU", "EN", "CR"]]
    resp_ordinal["EN"] = resp_ordinal["EN"] + resp_ordinal["CR"]
    resp_ordinal["VU"] = resp_ordinal["VU"] + resp_ordinal["EN"]
    resp_ordinal["NT"] = resp_ordinal["NT"] + resp_ordinal["VU"]
    
    return(resp_ordinal)

_Example of response structure:_

An example visualization of this new response variable is presented below.

In [4]:
new_y = return_y_ordinal(y_full)
response_example = new_y.drop_duplicates()
response_example.index = y_full[new_y.drop_duplicates().index]
response_example

Unnamed: 0_level_0,NT,VU,EN,CR
Red List status,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LC,0,0,0,0
VU,1,1,0,0
EN,1,1,1,0
CR,1,1,1,1
NT,1,0,0,0


## Defining Ordinal Classifier

In [5]:
def OrdinalClassifier(class_obj, x_train, y_train, x_test, y_test):
    
    # First, run classifier on the lowest ordinal class
    class_obj.fit(x_train, y_train["NT"])
    cat_1_trprob = class_obj.predict_proba(x_train)
    cat_1_teprob = class_obj.predict_proba(x_test)
    
    # Repeat for second class
    class_obj.fit(x_train, y_train["VU"])
    cat_2_trprob = class_obj.predict_proba(x_train)
    cat_2_teprob = class_obj.predict_proba(x_test)
    
    # Repeat for third class
    class_obj.fit(x_train, y_train["EN"])
    cat_3_trprob = class_obj.predict_proba(x_train)
    cat_3_teprob = class_obj.predict_proba(x_test)
    
    # Repeat for fourth class
    class_obj.fit(x_train, y_train["CR"])
    cat_4_trprob = class_obj.predict_proba(x_train)
    cat_4_teprob = class_obj.predict_proba(x_test)
    
    # Returning ordinal class probabilities
    train_probs = pd.DataFrame(np.vstack([cat_1_trprob[:,1], cat_2_trprob[:,1], 
                                          cat_3_trprob[:,1], cat_4_trprob[:,1]]).transpose())

    test_probs = pd.DataFrame(np.vstack([cat_1_teprob[:,1], cat_2_teprob[:,1], 
                                         cat_3_teprob[:,1], cat_4_teprob[:,1]]).transpose())
    
    return(train_probs, test_probs)

__Evaluating Ordinal Classifier__

In [None]:
skf = StratifiedKFold(n_splits=5)
skf.get_n_splits(features_imp, y_full)

classlist = ["LC", "NT", "VU", "EN", "CR"]
mean_accuracy = []

for c_val in [1,2,3,4,5]:
    
    accuracy = []
    
    for train_index, test_index in skf.split(features_imp, y_full):

        # Setting features
        x_train = features_imp.loc[train_index]
        x_test = features_imp.loc[test_index]

        # Setting labels
        y_train = return_y_ordinal(y_full[train_index])
        y_test = return_y_ordinal(y_full[test_index])

        # Standardizing numeric variables
        stdscl = StandardScaler()
        stdscl.fit(x_train.iloc[:,57:60])
        x_train.iloc[:,57:60] = stdscl.transform(x_train.iloc[:,57:60])
        x_test.iloc[:,57:60] = stdscl.transform(x_test.iloc[:,57:60])

        # Running ordinal classification 
        classifier = SVC(kernel = "rbf", probability = True, C = c_val)
        tp, te = OrdinalClassifier(classifier, x_train, y_train, x_test, y_test)

        # Calculating predicted class probabilities
        baseclass_prob = 1 - te.loc[:,0]
        te.loc[:, 0] = te.loc[:, 0] - te.loc[:, 1]
        te.loc[:, 1] = te.loc[:, 1] - te.loc[:, 2]
        te.loc[:, 2] = te.loc[:, 2] - te.loc[:, 3]
        
        # Selecting max probability in each row as estimated class
        pred_classes = pd.concat([baseclass_prob, te], ignore_index=True, axis = 1).idxmax(axis=1)

        accuracy.append(accuracy_score(y_full[test_index], [classlist[pred] for pred in pred_classes]))
        
    print("CV run completed - C = %s" % str(c_val))
    
    mean_accuracy.append(np.mean(accuracy))        

In [None]:
plt.plot([1,2,3,4,5], mean_accuracy)

In [None]:
x_train = x_test = features_imp
y_train = y_test = return_y_ordinal(y_full)

stdscl = StandardScaler()
stdscl.fit(x_train.iloc[:,57:60])
x_train.iloc[:,57:60] = stdscl.transform(x_train.iloc[:,57:60])
        
classifier = SVC(kernel = "rbf", probability = True, C = 1)
tp, te = OrdinalClassifier(classifier, x_train, y_train, x_train, y_train)
     
baseclass_prob = 1 - te.loc[:,0]
te.loc[:, 0] = te.loc[:, 0] - te.loc[:, 1]
te.loc[:, 1] = te.loc[:, 1] - te.loc[:, 2]
te.loc[:, 2] = te.loc[:, 2] - te.loc[:, 3]

pred_classes = pd.concat([baseclass_prob, te], ignore_index=True, axis = 1).idxmax(axis=1)
        
cm = confusion_matrix(y_full, [classlist[pred] for pred in pred_classes], labels = classlist)
cm = pd.DataFrame(cm)
sns.heatmap(cm.div(cm.sum(axis=1), axis=0), annot = True)

## Ordinal classification with variable weights

In [None]:
skf = StratifiedKFold(n_splits=5)
skf.get_n_splits(features_imp, y_full)

classlist = ["LC", "NT", "VU", "EN", "CR"]
mean_accuracy = []

for c_val in [1,2,3,4,5]:
    
    accuracy = []
    
    for train_index, test_index in skf.split(features_imp, y_full):

        # Setting features
        x_train = features_imp.loc[train_index]
        x_test = features_imp.loc[test_index]

        # Setting labels
        y_train = return_y_ordinal(y_full[train_index])
        y_test = return_y_ordinal(y_full[test_index])

        # Standardizing numeric variables
        stdscl = StandardScaler()
        stdscl.fit(x_train.iloc[:,57:60])
        x_train.iloc[:,57:60] = stdscl.transform(x_train.iloc[:,57:60])
        x_test.iloc[:,57:60] = stdscl.transform(x_test.iloc[:,57:60])

        # Running ordinal classification 
        classifier = SVC(kernel = "rbf", probability = True, C = c_val, class_weight = 'balanced')
        tp, te = OrdinalClassifier(classifier, x_train, y_train, x_test, y_test)

        # Calculating predicted class probabilities
        baseclass_prob = 1 - te.loc[:,0]
        te.loc[:, 0] = te.loc[:, 0] - te.loc[:, 1]
        te.loc[:, 1] = te.loc[:, 1] - te.loc[:, 2]
        te.loc[:, 2] = te.loc[:, 2] - te.loc[:, 3]
        
        # Selecting max probability in each row as estimated class
        pred_classes = pd.concat([baseclass_prob, te], ignore_index=True, axis = 1).idxmax(axis=1)

        accuracy.append(accuracy_score(y_full[test_index], [classlist[pred] for pred in pred_classes]))
        
    print("CV run completed - C = %s" % str(c_val))
    
    mean_accuracy.append(np.mean(accuracy)) 

In [None]:
plt.plot([1,2,3,4,5], mean_accuracy)

In [None]:
x_train = x_test = features_imp
y_train = y_test = return_y_ordinal(y_full)

stdscl = StandardScaler()
stdscl.fit(x_train.iloc[:,57:60])
x_train.iloc[:,57:60] = stdscl.transform(x_train.iloc[:,57:60])
        
classifier = SVC(kernel = "rbf", probability = True, C = 1, class_weight = 'balanced')
tp, te = OrdinalClassifier(classifier, x_train, y_train, x_train, y_train)
     
baseclass_prob = 1 - te.loc[:,0]
te.loc[:, 0] = te.loc[:, 0] - te.loc[:, 1]
te.loc[:, 1] = te.loc[:, 1] - te.loc[:, 2]
te.loc[:, 2] = te.loc[:, 2] - te.loc[:, 3]

pred_classes = pd.concat([baseclass_prob, te], ignore_index=True, axis = 1).idxmax(axis=1)
        
cm = confusion_matrix(y_full, [classlist[pred] for pred in pred_classes], labels = classlist)
cm = pd.DataFrame(cm)
sns.heatmap(cm.div(cm.sum(axis=1), axis=0), annot = True)

## Multi-Class Classification for Comparison

In [None]:
skf = StratifiedKFold(n_splits=5)
skf.get_n_splits(features_imp, y_full)

classlist = ["LC", "NT", "VU", "EN", "CR"]
mean_accuracy = []

for c_val in [1,2,3,4,5]:
    
    accuracy = []
    
    for train_index, test_index in skf.split(features_imp, y_full):

        # Setting features
        x_train = features_imp.loc[train_index]
        x_test = features_imp.loc[test_index]

        # Setting labels
        y_train = y_full[train_index]
        y_test = y_full[test_index]

        # Standardizing numeric variables
        stdscl = StandardScaler()
        stdscl.fit(x_train.iloc[:,57:60])
        x_train.iloc[:,57:60] = stdscl.transform(x_train.iloc[:,57:60])
        x_test.iloc[:,57:60] = stdscl.transform(x_test.iloc[:,57:60])

        # Running ordinal classification 
        classifier = SVC(kernel = "rbf", probability = True, C = c_val, 
                         class_weight = 'balanced', decision_function_shape = "ovr")
        classifier.fit(x_train, y_train)
        
        # Selecting max probability in each row as estimated class
        pred_classes = classifier.predict(x_test)

        accuracy.append(accuracy_score(y_test, pred_classes))
        
    print("CV run completed - C = %s" % str(c_val))
    
    mean_accuracy.append(np.mean(accuracy)) 

In [None]:
plt.plot([1,2,3,4,5], mean_accuracy)

In [None]:
x_train = x_test = features_imp
y_train = y_test = y_full

stdscl = StandardScaler()
stdscl.fit(x_train.iloc[:,57:60])
x_train.iloc[:,57:60] = stdscl.transform(x_train.iloc[:,57:60])
        
classifier = SVC(kernel = "rbf", probability = True, C = 1, 
                         class_weight = 'balanced', decision_function_shape = "ovr")

classifier.fit(x_train, y_train)

pred_classes = classifier.predict(x_test)
        
cm = confusion_matrix(y_full, pred_classes, labels = classlist)
cm = pd.DataFrame(cm)
sns.heatmap(cm.div(cm.sum(axis=1), axis=0), annot = True)