In [1]:
from scipy.io import arff
import pandas as pd
import numpy as np

from ctbn import CTBN
from sklearn.multioutput import ClassifierChain
from sklearn.linear_model import LogisticRegression

In [2]:
def get_data_numpy(data_arff):
    df = pd.DataFrame(train_data[0])
    features, labels = df.columns[:294], df.columns[294:]
    X, Y = df[features].to_numpy(), np.uint8(df[labels].to_numpy())
    return X, Y

In [3]:
def calc_ema(y_test, y_pred):
    tot_samples = y_test.shape[0]
    num_exact_equal_rows = np.sum(np.all(y_test == y_pred, axis=1))
    return (num_exact_equal_rows/tot_samples) * 100

def calc_cll_loss(log_probs):
    return -1 * np.sum(log_probs)

In [4]:
train_data = arff.loadarff("data/scene/scene-train.arff")
X_train, Y_train = get_data_numpy(train_data)

test_data = arff.loadarff("data/scene/scene-test.arff")
X_test, Y_test = get_data_numpy(test_data)

In [5]:
model = CTBN()
model.fit(X_train, Y_train)

In [6]:
predictions = np.zeros_like(Y_test)
log_probs = []
prediction = [None] * Y_test.shape[1]
for i, sample in enumerate(X_test):
    max_prob, pred = model.predict(sample)
    for node, value in pred.items():
        prediction[int(node)-1] = value 
    predictions[i] = prediction
    log_probs.append(max_prob)
print(f"The exact match accuracy of the CTBN model is {calc_ema(Y_test, predictions)}")

The exact match accuracy of the CTBN model is 77.20891824938067


In [7]:
base_lr = LogisticRegression(solver='lbfgs', random_state=0, max_iter=1000)
chain = ClassifierChain(base_lr, order='random', random_state=0)
chain.fit(X_train, Y_train)
chain_predictions = chain.predict(X_test)
print(f"The exact match accuracy of the classifier chain model is {calc_ema(Y_test, chain_predictions)}")

The exact match accuracy of the classifier chain model is 78.77786952931461
