In [150]:
import numpy as np
import numpy.random as nprand
import matplotlib.pyplot as plt
import pandas as pd
import math as math

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2, f_classif 
from sklearn.model_selection import cross_val_score

In [151]:
# The original data = df
df = pd.read_csv('data.csv',
    header=None,
    index_col=False).T

In [152]:
lesions=df.iloc[:,1].apply(pd.to_numeric)
light=df.iloc[:,2].apply(pd.to_numeric)
features=df.iloc[:,3:-1].apply(pd.to_numeric)
labelNames=['hyperplasic', 'serrated', 'adenoma']

In [153]:
from sklearn.model_selection import train_test_split
import copy
X_train, X_test, y_train, y_test = train_test_split(features, lesions, test_size=0.3,stratify=lesions)

class Problem:
    def __init__(self,index,fTrain,yTrain,fTest,yTest):
        self.problemNumber=index
        self.featuresTrain=fTrain
        self.targetsTrain=yTrain
        
        self.featuresTest=fTest
        self.targetsTest=yTest
        
        self.models=[]

trainingTargets=[]
testTargets=[]
problems=[]
for classNumber in range(1,len(labelNames)+1):
    y_train_temp=copy.copy(y_train)
    y_test_temp=copy.copy(y_test)
    
    y_train_temp[lesions==classNumber]=-1
    y_train_temp[lesions!=classNumber]=1
    y_test_temp[lesions==classNumber]=-1
    y_test_temp[lesions!=classNumber]=1
    problems.append(Problem(classNumber,X_train,y_train_temp,X_test,y_test_temp))

In [154]:
import tensorflow as tf
class ManifoldMixupBinary:
    def __init__(self,features=None,targets=None,mixup_alpha=None):
        self.alpha = mixup_alpha
        self.features = features
        self.tragets = targets

    def mixup(self, lmbda, inputs_a, inputs_b):
        return lmbda * inputs_a + (1 - lmbda) * inputs_b

    def syntesize(self):
        lmbda = np.random.beta(self.alpha,self.alpha,1)
        indices=np.arange(0,self.features.shape[0])
        np.random.shuffle(indices)
        x_mixup = self.mixup(lmbda, self.features, self.features[indices])
        y_mixup = self.mixup(lmbda, self.tragets, self.tragets[indices])
        return x_mixup, y_mixup
X=problems[0].featuresTrain.values
y=problems[0].targetsTrain.values



### Create base regressor models using manifold mixup

In [155]:
from sklearn.model_selection import KFold
#!pip install xgboost
from sklearn.metrics import classification_report
import xgboost as xgb

kf = KFold(n_splits=3)
for problemNumber in range(len(problems)):
    problem=problems[problemNumber]
    X=problem.featuresTrain.values
    y=problem.targetsTrain.values
    manifoldMixup = ManifoldMixupBinary(features=X, targets=y, mixup_alpha=2)
    xMixup=[]
    yMixup=[]
    for m in range(200):
        x_mixup, y_mixup=manifoldMixup.syntesize()
        xMixup.append(x_mixup)
        yMixup.append(y_mixup)
    xMixup=np.concatenate(xMixup, axis=0)
    X=np.concatenate([xMixup,X], axis=0)
    yMixup=np.concatenate(yMixup, axis=0)
    y=np.concatenate([yMixup,y], axis=0)
    for train_index, test_index in kf.split(X, y):
        ytrain=y[train_index]
        ytest_target=y[test_index]
        xgb_model = xgb.XGBRFRegressor(random_state=42).fit(X[train_index], y[train_index])
        preds = xgb_model.predict(X[test_index])
        preds[preds>0]=1    
        preds[preds<0]=-1
        ytest_target[ytest_target>0]=1    
        ytest_target[ytest_target<0]=-1
        problem.models.append(xgb_model)

        
        print("problem {}".format(problem.problemNumber))
        print(classification_report(ytest_target, preds))

problem 1
              precision    recall  f1-score   support

        -1.0       0.93      0.84      0.88       493
         1.0       0.94      0.98      0.96      1309

    accuracy                           0.94      1802
   macro avg       0.94      0.91      0.92      1802
weighted avg       0.94      0.94      0.94      1802

problem 1
              precision    recall  f1-score   support

        -1.0       0.86      0.80      0.83       493
         1.0       0.92      0.95      0.94      1309

    accuracy                           0.91      1802
   macro avg       0.89      0.87      0.88      1802
weighted avg       0.91      0.91      0.91      1802

problem 1
              precision    recall  f1-score   support

        -1.0       0.94      0.86      0.90       493
         1.0       0.95      0.98      0.96      1309

    accuracy                           0.95      1802
   macro avg       0.94      0.92      0.93      1802
weighted avg       0.95      0.95      0.94 

In [156]:
predsAllModelsTrain=[]
for problemNumber in range(len(problems)):
    problem=problems[problemNumber]
    X=problem.featuresTrain.values
    y=problem.targetsTrain.values
    for xgb_model in problem.models:
        predsAllModelsTrain.append(xgb_model.predict(X))

predsAllModelsTest=[]
for problemNumber in range(len(problems)):
    problem=problems[problemNumber]
    X=problem.featuresTest.values
    y=problem.targetsTest.values
    for xgb_model in problem.models:
        predsAllModelsTest.append(xgb_model.predict(X))

predsAllModelsTrain=np.array(predsAllModelsTrain).transpose()
predsAllModelsTest=np.array(predsAllModelsTest).transpose()

In [166]:
xgb_model = xgb.XGBRFRegressor().fit(predsAllModelsTrain, y_train)
preds = xgb_model.predict(predsAllModelsTest)
preds=np.round(preds)
print(classification_report(y_test, preds))

              precision    recall  f1-score   support

           1       0.86      0.46      0.60        13
           2       0.29      0.56      0.38         9
           3       0.73      0.67      0.70        24

    accuracy                           0.59        46
   macro avg       0.63      0.56      0.56        46
weighted avg       0.68      0.59      0.61        46



In [159]:
preds

array([3.0000024, 1.9899986, 1.9999986, 3.0000024, 1.4199991, 3.0000024,
       1.9999986, 3.0000024, 2.850002 , 1.9999986, 3.0000024, 3.0000024,
       0.9999995, 2.770002 , 1.1399994, 3.0000024, 2.850002 , 0.9999995,
       3.0000024, 1.9999986, 0.9999995, 2.2099998, 1.9999986, 2.9900024,
       1.9999986, 3.0000024, 1.9399986, 1.9799986, 0.9999995, 1.9999986,
       2.7100017, 2.1799996, 3.0000024, 3.0000024, 2.8600023, 2.069999 ,
       3.0000024, 0.9999995, 3.0000024, 0.9999995, 1.9999986, 2.9800024,
       3.0000024, 1.9999986, 1.9999986, 3.0000024], dtype=float32)

In [116]:
for problemNumber in range(len(problems)):
    problem=problems[problemNumber]
    X=problem.featuresTest.values
    y=problem.targetsTest.values
    for xgb_model in problem.models:
        preds = xgb_model.predict(X)
        preds[preds>0]=1    
        preds[preds<0]=-1
        y[y>0]=1    
        y[y<0]=-1
        print("problem {}".format(problem.problemNumber))
        print(classification_report(y, preds))


problem 1
              precision    recall  f1-score   support

          -1       0.76      1.00      0.87        13
           1       1.00      0.88      0.94        33

    accuracy                           0.91        46
   macro avg       0.88      0.94      0.90        46
weighted avg       0.93      0.91      0.92        46

problem 1
              precision    recall  f1-score   support

          -1       0.72      1.00      0.84        13
           1       1.00      0.85      0.92        33

    accuracy                           0.89        46
   macro avg       0.86      0.92      0.88        46
weighted avg       0.92      0.89      0.90        46

problem 1
              precision    recall  f1-score   support

          -1       0.86      0.92      0.89        13
           1       0.97      0.94      0.95        33

    accuracy                           0.93        46
   macro avg       0.91      0.93      0.92        46
weighted avg       0.94      0.93      0.94 