In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import torch

from helpers.data_generation.file_management import read_hdf5
from helpers.data_generation.error_generation_ki2 import Residual, CombineDataset
from helpers.model.helpers_model import NeuralNet

import warnings
warnings.filterwarnings('ignore')

In [2]:
ratio = 0.75
percent = np.array([0.005, 0.015, 0.005])
size = 600

batch_size = 50

res = Residual()
res.build(size, ratio = ratio, per_error = percent)

In [3]:
ratio = 0.75
percent = np.array([0.005, 0.015, 0.005])
size = 600

batch_size = 50

str_ID =  "S"+str(size)+"R"+str(int(ratio*100))
[final_array, metadata] = read_hdf5(str_ID)
metadata ['ID'] = np.arange(0,final_array.shape[0])
data_set = CombineDataset(metadata,'ID','class',final_array)

data_train, data_test = train_test_split(data_set,train_size=0.85,random_state=42)
loader_test = DataLoader(data_test, batch_size = batch_size, num_workers = 0, drop_last=True)

In [4]:
net_name = ['AlexNet', 'ResNet18', 'SqueezeNet']

k = 0

final_pred = np.array([])
for model_i in net_name:
    net = NeuralNet(model_i, 'SGD/momentum')
    net.load_checkpoint()

    with torch.no_grad():
        predictions = []; targets = []
        for data in loader_test:
            images, _, labels = data
            m = nn.
            outputs = net.net(images)

            predictions.extend(outputs.cpu().numpy())
            targets.extend(labels.cpu().numpy())
    if k == 0:
        final_pred = np.round(np.asarray(predictions))
        final_targets = np.asarray(targets)
        k = 1
    else: 
        final_pred = np.append(final_pred, np.round(np.asarray(predictions)), axis = 1)
        
X_train, X_test, y_train, y_test = train_test_split(final_pred, final_targets, test_size=0.5, random_state=42)

## Ensemble Machine learning

### 1. Binary relevance 

In [5]:
from skmultilearn.problem_transform import BinaryRelevance
from sklearn.svm import SVC

# initialize Binary Relevance multi-label classifier
# with an SVM classifier
# SVM in scikit only supports the X matrix in sparse representation

classifier = BinaryRelevance(
    classifier = SVC(),
    require_dense = [False, True]
)

# train
classifier.fit(X_train, y_train)

# predict
predictions = classifier.predict(X_test)
net.calculate_metrics(predictions.toarray(),y_test)

{'hamming': 0.3,
 'precision': 0.72,
 'recall': 0.92,
 'f1': 0.6933333333333332,
 'exactmatch': 0.48,
 'accuracy': 0.7,
 'auc': 0.7222985347985349}

### 2. RAkELd: random label space partitioning with Label Powerset

In [6]:
from sklearn.naive_bayes import GaussianNB
from skmultilearn.ensemble import RakelD

classifier = RakelD(
    base_classifier=GaussianNB(),
    base_classifier_require_dense=[True, True],
    labelset_size=2
)

classifier.fit(X_train, y_train)
prediction = classifier.predict(X_test)
net.calculate_metrics(prediction.toarray(),y_test)

{'hamming': 0.26,
 'precision': 0.78,
 'recall': 0.86,
 'f1': 0.6666666666666665,
 'exactmatch': 0.56,
 'accuracy': 0.74,
 'auc': 0.6846001221001221}

### 3. Label Power Set

In [7]:
from skmultilearn.problem_transform import LabelPowerset
from sklearn.ensemble import RandomForestClassifier

# initialize LabelPowerset multi-label classifier with a RandomForest
classifier = LabelPowerset(
    classifier = RandomForestClassifier(n_estimators=100),
    require_dense = [False, True]
)

# train
classifier.fit(X_train, y_train)

# predict
predictions = classifier.predict(X_test)
net.calculate_metrics(predictions.toarray(),y_test)

{'hamming': 0.38,
 'precision': 0.68,
 'recall': 0.92,
 'f1': 0.6399999999999999,
 'exactmatch': 0.48,
 'accuracy': 0.62,
 'auc': 0.651862026862027}

### 4. Multi Label Nearest Neighbors

In [8]:
from skmultilearn.adapt import MLkNN
from sklearn.model_selection import GridSearchCV

parameters = {'k': range(1,3), 's': [0.5, 0.7, 1.0]}
score = 'roc_auc'

clf = GridSearchCV(MLkNN(), parameters, scoring=score)
clf.fit(X_train, y_train)

print (clf.best_params_, clf.best_score_)
classifier = MLkNN(k=1, s=0.5)

# train
classifier.fit(X_train, y_train)

# predict
predictions = classifier.predict(X_test)
net.calculate_metrics(predictions.toarray(),y_test)

{'k': 1, 's': 0.5} nan


{'hamming': 0.3,
 'precision': 0.94,
 'recall': 0.64,
 'f1': 0.5866666666666667,
 'exactmatch': 0.56,
 'accuracy': 0.7,
 'auc': 0.636523199023199}

### 5. Majority Voting Classifier

In [9]:
from skmultilearn.ensemble import MajorityVotingClassifier
from skmultilearn.cluster import FixedLabelSpaceClusterer
from skmultilearn.problem_transform import ClassifierChain
from sklearn.naive_bayes import GaussianNB

classifier = MajorityVotingClassifier(
    clusterer = FixedLabelSpaceClusterer(clusters = [[1,1], [0, 0], [1,0], [0,1]]),
    classifier = ClassifierChain(classifier=GaussianNB())
)
classifier.fit(X_train,y_train)
predictions = classifier.predict(X_test)
net.calculate_metrics(predictions.toarray(),y_test)

{'hamming': 0.32,
 'precision': 0.54,
 'recall': 0.92,
 'f1': 0.4933333333333333,
 'exactmatch': 0.36,
 'accuracy': 0.68,
 'auc': 0.6317155067155067}