In [1]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import preprocessing 
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix 
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report 


In [2]:

def acc(X, y):
    normalized_X= preprocessing.normalize(X)
    normalized_X=pd.DataFrame(normalized_X)
    X_train, X_test, y_train, y_test = train_test_split(normalized_X, y, test_size = 0.2, random_state = 0)
    clf=RandomForestClassifier(n_estimators=100, random_state = 0)
    clf.fit(X_train,y_train)
    y_pred=clf.predict(X_test)
    cm = metrics.confusion_matrix(y_test, y_pred)
    print(cm)
    s=metrics.classification_report(y_test,y_pred)
    print(s)
    return(metrics.accuracy_score(y_test,y_pred))

def fdr(data1, data2):
    mu1=data1.mean(0)  # columnwise mean of a matrix
    mu2=data2.mean(0)  # columnwise mean of a matrix
    sd1=data1.std(0)   # columnwise standard deviation of a matrix
    sd2=data2.std(0)   # columnwise standard deviation of a matrix
    fdr=list(((mu1-mu2)**2)/(sd1**2+sd2**2))
    ind=[i[0] for i in sorted(enumerate(fdr), key=lambda x:x[1])]
    return ind
    
def snr(data1, data2):
    mu1=data1.mean(0)  # columnwise mean of a matrix
    mu2=data2.mean(0)  # columnwise mean of a matrix
    sd1=data1.std(0)   # columnwise standard deviation of a matrix
    sd2=data2.std(0)   # columnwise standard deviation of a matrix
    snr = list((mu1-mu2)/(sd1+sd2))
    ind=[i[0] for i in sorted(enumerate(snr), key=lambda x:x[1])]
    return ind
    
def cc(df):
    corr_matrix = df.corr()
    # Select upper triangle of correlation matrix
    upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))
    # Find features with correlation greater than 0.8
    to_drop = [column for column in upper.columns if any(upper[column] > 0.8)]
    # Drop features 
    df.drop(to_drop, axis=1, inplace=True)
    return df


In [3]:


def FDR_SNR_CC(data1, data2):
    ind_fdr = fdr(data1, data2)
    data1=pd.DataFrame(dataset.iloc[0:40,ind_fdr[0:200]].values)
    data2=pd.DataFrame(dataset.iloc[40:,ind_fdr[0:200]].values)
    ind_snr = snr(data1, data2)
    df = cc(pd.DataFrame(dataset.iloc[:,ind_snr[0:100]].values))
    return df

def FDR_CC_SNR(data1, data2):
    ind_fdr = fdr(data1, data2)
    df = cc(pd.DataFrame(dataset.iloc[:,ind_fdr[0:200]].values))
    data1=pd.DataFrame(df.iloc[0:40,:].values)
    data2=pd.DataFrame(df.iloc[40:,:].values)
    ind_snr = snr(data1, data2)
    df = cc(pd.DataFrame(dataset.iloc[:,ind_snr[0:100]].values))
    return df

def CC_SNR_FDR(dataset):
    df = cc(dataset)
    data1=pd.DataFrame(df.iloc[0:40,:].values)
    data2=pd.DataFrame(df.iloc[40:,:].values)
    ind_snr = snr(data1, data2)
    data1=pd.DataFrame(dataset.iloc[0:40,ind_snr[0:200]].values)
    data2=pd.DataFrame(dataset.iloc[40:,ind_snr[0:200]].values)
    ind_fdr = fdr(data1, data2)
    df = cc(pd.DataFrame(dataset.iloc[:,ind_fdr[0:100]].values))
    return df

def CC_FDR_SNR(dataset):
    df = cc(dataset)
    data1=pd.DataFrame(df.iloc[0:40,:].values)
    data2=pd.DataFrame(df.iloc[40:,:].values)
    ind_fdr = fdr(data1, data2)
    data1=pd.DataFrame(dataset.iloc[0:40,ind_fdr[0:200]].values)
    data2=pd.DataFrame(dataset.iloc[40:,ind_fdr[0:200]].values)
    ind_snr = snr(data1, data2)    
    df = cc(pd.DataFrame(dataset.iloc[:,ind_snr[0:100]].values))
    return df

def SNR_CC_FDR(data1, data2):
    ind_snr = snr(data1, data2)
    df = cc(pd.DataFrame(dataset.iloc[:,ind_snr[0:200]].values))
    data1=pd.DataFrame(df.iloc[0:40,:].values)
    data2=pd.DataFrame(df.iloc[40:,:].values)
    ind_fdr = fdr(data1, data2)
    df = cc(pd.DataFrame(dataset.iloc[:,ind_fdr[0:100]].values))
    return df

def SNR_FDR_CC(data1, data2):
    ind_snr = snr(data1, data2)
    data1=pd.DataFrame(dataset.iloc[0:40,ind_snr[0:200]].values)
    data2=pd.DataFrame(dataset.iloc[40:,ind_snr[0:200]].values)
    ind_fdr = fdr(data1, data2)
    df = cc(pd.DataFrame(dataset.iloc[:,ind_fdr[0:100]].values))
    return df

In [6]:

dataset=pd.read_csv('colon.csv', header=None)
data1=pd.DataFrame(dataset.iloc[0:40,:-1].values)
data2=pd.DataFrame(dataset.iloc[40:,:-1].values)


In [7]:

# FDR->SNR->CC
X = FDR_SNR_CC(data1, data2)
y = dataset.iloc[:,2000].values
print('Accuracy for FDR->SNR->CC is ',acc(X, y))

# FDR->CC->SNR
X = FDR_CC_SNR(data1, data2)
y = dataset.iloc[:,2000].values
print('Accuracy for FDR->CC->SNR is ',acc(X, y))

# CC->SNR->FDR
X = CC_SNR_FDR(pd.DataFrame(dataset.iloc[:,:-1].values))
y = dataset.iloc[:,2000].values
print('Accuracy for CC->SNR->FDR is ',acc(X, y))

# CC->FDR->SNR
X = CC_FDR_SNR(pd.DataFrame(dataset.iloc[:,:-1].values))
y = dataset.iloc[:,2000].values
print('Accuracy for CC->FDR->SNR is ',acc(X, y))

# SNR->CC->FDR
X = SNR_CC_FDR(data1, data2)
y = dataset.iloc[:,2000].values
print('Accuracy for SNR->CC->FDR is ',acc(X, y))

# SNR->FDR->CC
X = SNR_FDR_CC(data1, data2)
y = dataset.iloc[:,2000].values
print('Accuracy for SNR->FDR->CC is ',acc(X, y))

[[9 3]
 [0 1]]
              precision    recall  f1-score   support

           0       1.00      0.75      0.86        12
           1       0.25      1.00      0.40         1

    accuracy                           0.77        13
   macro avg       0.62      0.88      0.63        13
weighted avg       0.94      0.77      0.82        13

Accuracy for FDR->SNR->CC is  0.7692307692307693
[[11  1]
 [ 1  0]]
              precision    recall  f1-score   support

           0       0.92      0.92      0.92        12
           1       0.00      0.00      0.00         1

    accuracy                           0.85        13
   macro avg       0.46      0.46      0.46        13
weighted avg       0.85      0.85      0.85        13

Accuracy for FDR->CC->SNR is  0.8461538461538461
[[10  2]
 [ 0  1]]
              precision    recall  f1-score   support

           0       1.00      0.83      0.91        12
           1       0.33      1.00      0.50         1

    accuracy                   