In [1]:
import numpy as np
import pandas as pd
import os

import weka.core.jvm as jvm
from weka.core.dataset import Instances
from weka.classifiers import Classifier, Evaluation
from weka.filters import Filter
from weka.core import converters

from sklearn.model_selection import KFold
from sklearn import preprocessing

from scipy.stats import t
from math import sqrt
from statistics import stdev
import pickle

In [None]:
jvm.start(packages=True)

## Importing data

In [3]:
# Moving to project directory
os.chdir('..')

In [4]:
data_set = pd.read_pickle("data\processed\data_set.pkl")

In [5]:
# target features
y = data_set.iloc[:,-1]

# Encode categorical target values
le = preprocessing.LabelEncoder()
le.fit(y)
y = le.transform(y)
y = pd.DataFrame(y)

#dependent variables
X = data_set.iloc[:,4:-1] 
# X = data_set.iloc[:29,4:-1]   #test

# Group used in cross-validation accordind to original dataset
groups = data_set.iloc[:,:].datasetName
# groups = data_set.iloc[:29,:].datasetName  #test

#Attributes
attributes = data_set.iloc[:,:].attributeName

#All datasets that will be in test set each run
test_cols =list(groups.unique())

In [6]:
arff_files={}
# Set the path to the folder containing ARFF files
folder_path = "data\\raw"  # Replace with your folder path

# Iterate over the files in the folder
for filename in os.listdir(folder_path):
    file_path = os.path.join(folder_path, filename)
    if os.path.isfile(file_path) and filename.endswith(".arff"):
        print("\nLoading file:", file_path)
        try:
            data = converters.load_any_file(file_path)
            
            # Perform further processing or analysis on the data
            # ...
            # Your code here
            
            # Example: Print the dataset summary
            print("Number of instances:", data.num_instances)
            print("Number of attributes:", data.num_attributes)
            print("----------------------")

            nome = filename.split("\\")[-1]
            nome = nome.replace(".arff", "")
            nome = nome.replace("-", "_")
            print(nome)
            exec('arff_files["{KEY}"] = data'.format(KEY = nome))
            
            
        except Exception as e:
            print("Error loading file:", file_path)
            print("Error message:", str(e))


Loading file: data\raw\abalone_classification.arff
Number of instances: 4177
Number of attributes: 9
----------------------
abalone_classification

Loading file: data\raw\accent-mfcc-data-1.arff
Number of instances: 329
Number of attributes: 13
----------------------
accent_mfcc_data_1

Loading file: data\raw\avila-tr.arff
Number of instances: 10430
Number of attributes: 11
----------------------
avila_tr

Loading file: data\raw\avila-ts.arff
Number of instances: 10437
Number of attributes: 11
----------------------
avila_ts

Loading file: data\raw\balance-scale.arff
Number of instances: 625
Number of attributes: 5
----------------------
balance_scale

Loading file: data\raw\breast-cancer.arff
Number of instances: 286
Number of attributes: 10
----------------------
breast_cancer

Loading file: data\raw\cmc.arff
Number of instances: 1473
Number of attributes: 10
----------------------
cmc

Loading file: data\raw\contact-lenses.arff
Number of instances: 24
Number of attributes: 5
------

In [7]:
# Set the path to the folder containing ARFF files
folder_path = "data\\interim"  # Replace with your folder path

# Iterate over the files in the folder
for filename in os.listdir(folder_path):
    file_path = os.path.join(folder_path, filename)
    if os.path.isfile(file_path) and filename.endswith(".arff"):
        print("\nLoading file:", file_path)
        try:
            data = converters.load_any_file(file_path)
            
            # Perform further processing or analysis on the data
            # ...
            # Your code here
            
            # Example: Print the dataset summary
            print("Number of instances:", data.num_instances)
            print("Number of attributes:", data.num_attributes)
            print("----------------------")

            nome = filename.split("\\")[-1]
            nome = nome.replace(".arff", "")
            nome = nome.replace("-", "_")
            print(nome)
            exec('arff_files["{KEY}"] = data'.format(KEY = nome))
            
            
        except Exception as e:
            print("Error loading file:", file_path)
            print("Error message:", str(e))


Loading file: data\interim\credit_g_part1.arff
Number of instances: 1000
Number of attributes: 11
----------------------
credit_g_part1

Loading file: data\interim\credit_g_part2.arff
Number of instances: 1000
Number of attributes: 11
----------------------
credit_g_part2

Loading file: data\interim\hypothyroid_part1.arff
Number of instances: 3772
Number of attributes: 11
----------------------
hypothyroid_part1

Loading file: data\interim\hypothyroid_part2.arff
Number of instances: 3772
Number of attributes: 11
----------------------
hypothyroid_part2

Loading file: data\interim\hypothyroid_part3.arff
Number of instances: 3772
Number of attributes: 10
----------------------
hypothyroid_part3

Loading file: data\interim\ionosphere_part1.arff
Number of instances: 351
Number of attributes: 13
----------------------
ionosphere_part1

Loading file: data\interim\ionosphere_part2.arff
Number of instances: 351
Number of attributes: 12
----------------------
ionosphere_part2

Loading file: da

In [8]:
for i in groups.unique():
    exec('dataset_{KEY} = pd.read_pickle("data\processed\dataset_{KEY}.pkl")'.format(KEY = i))

In [9]:
variaveis_weka = data_set[data_set.wrapperRelevance=='yes'].iloc[:,1:3].groupby('datasetName')['attributeName'].apply(lambda x: x.tolist()).to_dict()
variaveis_weka_remove = data_set[data_set.wrapperRelevance=='no'].iloc[:,1:3].groupby('datasetName')['attributeName'].apply(lambda x: x.tolist()).to_dict()

In [10]:
variaveis_new_model_test_set = {}
variaveis_new_model_test_set_remove = {}
CV_best_model_validation_score = {}

accuracy_aux = 0


# Specify the folder where the dictionaries are stored
folder_path = 'models\\results'

# Iterate over each file in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.pkl'):
        file_path = os.path.join(folder_path, filename)

        # Load the dictionary from the file using pickle.load
        with open(file_path, 'rb') as file:
            loaded_dict = pickle.load(file)
        
        aux={}
        aux[loaded_dict['dataset']]=loaded_dict['variaveis_new_model_test_set']             
        variaveis_new_model_test_set.update(aux)
        
        aux={}
        aux[loaded_dict['dataset']]=loaded_dict['variaveis_new_model_test_set_remove']             
        variaveis_new_model_test_set_remove.update(aux)        

        aux={}
        aux[loaded_dict['dataset']]=loaded_dict['CV_best_model_validation_score']             
        CV_best_model_validation_score.update(aux)          
        
        #storing best model overall
        if accuracy_aux < loaded_dict['CV_best_model_validation_score']:
            best_model = loaded_dict['dataset']
            accuracy_aux = loaded_dict['CV_best_model_validation_score']

In [11]:
 # load the model from disk
# os.chdir('models')
filename = f'models\\{best_model}_model.sav'
loaded_model = pickle.load(open(filename, 'rb'))
loaded_model

# save each the model to disk
filename = f'models\\best_model.sav'
pickle.dump(loaded_model, open(filename, 'wb'))

In [12]:
# Python implementation of the Nadeau and Bengio correction of dependent Student's t-test (https://gist.github.com/jensdebruijn/13e8eeda85eb8644ac2a4ac4c3b8e732)
# using the equation stated in https://www.cs.waikato.ac.nz/~eibe/pubs/bouckaert_and_frank.pdf
def corrected_dependent_ttest(data1, data2, n_training_samples, n_test_samples, alpha):
    """
    data1: accuracies WRAPPER model
    data2: accuracies NEW model
    
    """
    n = len(data1)
    differences = [(data1[i]-data2[i]) for i in range(n)]
    sd = stdev(differences)
    divisor = 1 / n * sum(differences)
    test_training_ratio = n_test_samples / n_training_samples
    denominator = sqrt(1 / n + test_training_ratio) * sd
    t_stat = divisor / denominator
    # degrees of freedom
    df = n - 1
    # calculate the critical value
    cv = t.ppf(1.0 - alpha, df)
    # calculate the p-value
    p = (1.0 - t.cdf(abs(t_stat), df)) * 2.0
    # return everything
    return t_stat, df, cv, p

In [13]:
col31 = []
col32 = []
col33 = []

classifier_weka = Classifier(classname="weka.classifiers.functions.SimpleLogistic")

n_train={}
n_test={}
ten_tencv_corrected_t_test={}
scores_weka = {}
scores_new = {}
scores_complete = {}

for i in test_cols:
    print(i)
    

    col31.append(i)
    
    # real data values to ajust a logistic regression model     
    exec('X = dataset_{KEY}.iloc[:,:-1]'.format(KEY = i))


    exec('X_weka = X.loc[:, variaveis_weka["{KEY}"]]'.format(KEY = i))
    exec('X_new = X.loc[:, variaveis_new_model_test_set["{KEY}"]]'.format(KEY = i)) #ver
    
    exec('print("WEKA:",variaveis_weka["{KEY}"])'.format(KEY = i))
    exec('print("New Model:",variaveis_new_model_test_set["{KEY}"])'.format(KEY = i))

    # loading arff_files
    
    exec('dataset_arff = arff_files["{KEY}"]'.format(KEY = i))
    
    # Complete dataset    

    dataset0 = Instances.copy_instances(dataset_arff)  # Copy instances from dataset
    dataset0.class_is_last()   # set class attribute
   
    
    # WEKA subset
    try:
        exec('lista_weka = variaveis_weka_remove["{KEY}"]'.format(KEY = i))
        for p,remove in enumerate(lista_weka):
            if p==0:
                dataset1 = Instances.copy_instances(dataset_arff)  # Copy instances from dataset1

                dataset1.class_is_last()   # set class attribute
            remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveByName", options=["-E", remove])
            remove.inputformat(dataset1)
            dataset1 = remove.filter(dataset1)
    
    except:
        dataset1 = Instances.copy_instances(dataset_arff)
        dataset1.class_is_last()
    
    wrapper_subset = [attr.name for attr in dataset1.attributes()]


    # Model subset
    try:
        exec('lista_weka = variaveis_new_model_test_set_remove["{KEY}"]'.format(KEY = i))
        for p,remove in enumerate(lista_weka):
            if p==0:
                dataset2 = Instances.copy_instances(dataset_arff)  # Copy instances from dataset1
                dataset2.class_is_last()   # set class attribute
            remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveByName", options=["-E", remove])
            remove.inputformat(dataset2)
            dataset2 = remove.filter(dataset2)

    except:
        dataset2 = Instances.copy_instances(dataset_arff)  
        dataset2.class_is_last()  
    
    model_subset = [attr.name for attr in dataset2.attributes()]


    col32.append(len(variaveis_weka[f"{i}"])/(dataset_arff.num_attributes-1))
    col33.append(len(variaveis_new_model_test_set[f"{i}"])/(dataset_arff.num_attributes-1))

  
    
    num_runs = 10
    num_folds = 10
    kf = KFold(n_splits=num_folds, shuffle=True)
    score_weka_aux=[]
    score_new_aux=[]
    score_complete_aux=[]       
    
        
    for run in np.arange(num_runs):

        # Perform cross-validation on the datasets
        iteration=1
        for train_indices, test_indices in kf.split(dataset_arff):

            
            train_set0 = Instances.copy_instances(dataset0)  # Copy instances from dataset1
            test_set0 = Instances.copy_instances(dataset0)   # Copy instances from dataset1
            
            train_set1 = Instances.copy_instances(dataset1)  # Copy instances from dataset1
            test_set1 = Instances.copy_instances(dataset1)   # Copy instances from dataset1

            train_set2 = Instances.copy_instances(dataset2)  # Copy instances from dataset2
            test_set2 = Instances.copy_instances(dataset2)   # Copy instances from dataset2

            # Set instances indices for training and testing sets
            train_set0.delete()
            train_set1.delete()
            train_set2.delete()
            test_set0.delete()
            test_set1.delete()
            test_set2.delete()

            for index in train_indices:
                train_set0.add_instance(dataset0.get_instance(index))
                train_set1.add_instance(dataset1.get_instance(index))
                train_set2.add_instance(dataset2.get_instance(index))

            for index in test_indices:
                test_set0.add_instance(dataset0.get_instance(index))
                test_set1.add_instance(dataset1.get_instance(index))
                test_set2.add_instance(dataset2.get_instance(index))

            # Build and evaluate model on complete dataset
            classifier_weka.build_classifier(train_set0)    
            evaluation0 = Evaluation(train_set0)
            evaluation0.test_model(classifier_weka, test_set0)
            score_complete_aux.append(evaluation0.percent_correct)                

            # Build and evaluate model on Wrapper Subset
            classifier_weka.build_classifier(train_set1)    
            evaluation1 = Evaluation(train_set1)
            evaluation1.test_model(classifier_weka, test_set1)
            score_weka_aux.append(evaluation1.percent_correct)


            # Build and evaluate model on New Model Subset
            classifier_weka.build_classifier(train_set2)
            evaluation2 = Evaluation(train_set2)
            evaluation2.test_model(classifier_weka, test_set2)
            score_new_aux.append(evaluation2.percent_correct)
            iteration=iteration+1

    exec('n_train["{KEY}"] = len(train_indices)'.format(KEY = i))
    exec('n_test["{KEY}"] = len(test_indices)'.format(KEY = i))
    exec('scores_weka["{KEY}"] = score_weka_aux'.format(KEY = i))
    exec('scores_new["{KEY}"] = score_new_aux'.format(KEY = i))
    exec('scores_complete["{KEY}"] = score_complete_aux'.format(KEY = i))    
    score_weka_aux=[]
    score_new_aux=[]
    score_complete_aux =[]   

    exec('dif = (np.array(scores_new["{KEY}"]) - np.array(scores_weka["{KEY}"]))'.format(KEY = i))
    exec('dif2 = (np.array(scores_new["{KEY}"]) - np.array(scores_complete["{KEY}"]))'.format(KEY = i))    
    
    if wrapper_subset==model_subset:
        exec('ten_tencv_corrected_t_test["{KEY}"]= [np.nan,np.nan,np.nan,1]'.format(KEY = i))
        print("Model subset equals Wrapper subset")    
        print('------------------------------------------------------------------------------------------------')
    
    elif np.sum(dif)==0:
        exec('ten_tencv_corrected_t_test["{KEY}"]= [np.nan,np.nan,np.nan,1]'.format(KEY = i))
        print("Model subset differs Wrapper subset, but they had same predicitons")
        print('------------------------------------------------------------------------------------------------')
        
    else:
        exec('ten_tencv_corrected_t_test["{KEY}"]= corrected_dependent_ttest(scores_weka["{KEY}"],scores_new["{KEY}"],n_train["{KEY}"],n_test["{KEY}"],0.05)'.format(KEY = i))  
        exec('print("10x10CV corrected paired t test:",ten_tencv_corrected_t_test["{KEY}"])'.format(KEY = i))    
        print('------------------------------------------------------------------------------------------------')

iris
WEKA: ['petalwidth']
New Model: ['petallength']
10x10CV corrected paired t test: (0.35879704526413525, 99, 1.6603911559963895, 0.7205109646088039)
------------------------------------------------------------------------------------------------
labor_part1
WEKA: ['wage-increase-second-year', 'longterm-disability-assistance', 'duration']
New Model: ['wage-increase-second-year', 'statutory-holidays', 'longterm-disability-assistance', 'pension']
10x10CV corrected paired t test: (0.3411429770763928, 99, 1.6603911559963895, 0.7337189075221424)
------------------------------------------------------------------------------------------------
labor_part2
WEKA: ['wage-increase-first-year', 'contribution-to-health-plan', 'working-hours']
New Model: ['wage-increase-first-year', 'contribution-to-dental-plan']
10x10CV corrected paired t test: (1.8066367603555484, 99, 1.6603911559963895, 0.07385688532663393)
-----------------------------------------------------------------------------------------

In [14]:
table3 = pd.DataFrame(
 {'Dataset': col31,
  'Wrapper': col32,
  'New Model': col33
    }
)
# table3

In [15]:
col22=[]
col23=[]
col24=[]
for i in scores_weka:
    # exec('print("{KEY} \tWrapper:","%0.2f"% np.mean(scores_weka["{KEY}"]),"+-","%0.2f" %np.std(scores_weka["{KEY}"]),"\t New:","%0.2f" % np.mean(scores_new["{KEY}"]),"+-","%0.2f" % np.std(scores_new["{KEY}"]),"\t","%0.2f" %ten_tencv_corrected_t_test[i][3])'.format(KEY = i))
    exec('col22.append("%0.2f"% np.mean(scores_weka["{KEY}"])+"+-"+"%0.2f" %np.std(scores_weka["{KEY}"]))'.format(KEY = i))
    exec('col23.append("%0.2f"% np.mean(scores_new["{KEY}"])+"+-"+"%0.2f" %np.std(scores_new["{KEY}"]))'.format(KEY = i))
    exec('col24.append(ten_tencv_corrected_t_test[i][3]<0.05)'.format(KEY = i))    

In [16]:
table2 = pd.DataFrame(
 {'Dataset': col31,
  'Wrapper': col22,
  'New Model': col23,
  'Difference': col24,  
    }
)
# table2

In [17]:
ten_tencv_corrected_t_test2={}
for j in scores_new.keys():
    exec('dif2 = (np.array(scores_new["{KEY}"]) - np.array(scores_complete["{KEY}"]))'.format(KEY = j))
#     print(j)
    if np.sum(dif2)==0:
        exec('ten_tencv_corrected_t_test2["{KEY}"]= [np.nan,np.nan,np.nan,1]'.format(KEY = j))
        # print("same predicitons")
        # print('------------------------------------------------------------------------------------------------')        
    else:
        exec('ten_tencv_corrected_t_test2["{KEY}"]= corrected_dependent_ttest(scores_new["{KEY}"],scores_complete["{KEY}"],n_train["{KEY}"],n_test["{KEY}"],0.05)'.format(KEY = j))  
        # exec('print("10x10CV corrected paired t test:",ten_tencv_corrected_t_test2["{KEY}"])'.format(KEY = j))    
        # print('------------------------------------------------------------------------------------------------')    

In [18]:
col42=[]
col44=[]
for i in scores_weka:
    exec('col42.append("%0.2f"% np.mean(scores_complete["{KEY}"])+"+-"+"%0.2f" %np.std(scores_complete["{KEY}"]))'.format(KEY = i))
    exec('col44.append(ten_tencv_corrected_t_test2[i][3]<0.05)'.format(KEY = i))  

In [19]:
table4 = pd.DataFrame(
 {'Dataset': col31,
  'Complete': col42,
  'New Model': col23,
  'Difference': col44,  
    }
)
# table4

In [20]:
#save DataFrame to pickle file
# table1.to_pickle("reports\table1.pkl")
table2.to_pickle("reports\\table2.pkl")
table3.to_pickle("reports\\table3.pkl")
table4.to_pickle("reports\\table4.pkl")
#df= pd.read_pickle("my_data.pkl")

## Results

### Cross-Validation Mean Scores

In [21]:
# Convert the dictionary to a DataFrame
df = pd.DataFrame.from_dict(CV_best_model_validation_score, orient='index', columns=['Mean Score'])

# Display the DataFrame
df

Unnamed: 0,Mean Score
abalone_classification,0.675623
accent_mfcc_data_1,0.683285
avila_tr,0.664684
avila_ts,0.670369
balance_scale,0.665962
breast_cancer,0.670001
cmc,0.676897
contact_lenses,0.665652
credit_g_part1,0.67735
credit_g_part2,0.685323


In [22]:
print('Average validation score of best models:',np.mean(df['Mean Score']))

Average validation score of best models: 0.6738129629901118


### Best Model

In [23]:
best_model

'EEG_Eye_State'

In [24]:
 # load the model from disk
# os.chdir('models')
filename = 'models\\best_model.sav'
best_model = pickle.load(open(filename, 'rb'))
best_model

### New Model x Wrapper

% of selected features (compared to complete dataset)

In [25]:
table3

Unnamed: 0,Dataset,Wrapper,New Model
0,iris,0.25,0.25
1,labor_part1,0.375,0.5
2,labor_part2,0.375,0.25
3,glass,0.666667,0.888889
4,breast_cancer,0.333333,0.888889
5,contact_lenses,0.5,0.25
6,credit_g_part1,0.5,0.3
7,credit_g_part2,0.4,0.7
8,diabetes,0.875,0.25
9,hypothyroid_part1,0.6,0.9


In [26]:
print('Mean % of selected features by Wrapper :',np.mean(table3.Wrapper))

Mean % of selected features by Wrapper : 0.6083643722532612


In [27]:
print('Mean % of selected features by Proposed Model :',np.mean(table3['New Model']))

Mean % of selected features by Proposed Model : 0.6985942555386999


Mean Scores +- One Standard Deviation and Significant Statistical Difference

In [28]:
table2

Unnamed: 0,Dataset,Wrapper,New Model,Difference
0,iris,95.73+-4.76,94.87+-5.40,False
1,labor_part1,89.70+-13.06,88.20+-14.00,False
2,labor_part2,91.87+-11.55,82.20+-16.37,False
3,glass,64.83+-9.84,62.91+-10.66,False
4,breast_cancer,75.75+-7.60,75.15+-8.13,False
5,contact_lenses,83.67+-23.69,65.83+-32.09,True
6,credit_g_part1,74.29+-4.30,72.01+-4.23,True
7,credit_g_part2,72.46+-4.17,71.44+-4.36,False
8,diabetes,76.51+-4.62,76.24+-4.93,False
9,hypothyroid_part1,93.53+-1.15,93.51+-1.15,False


In [29]:
print('Corrected t test statistic (Wrapper x New Model):')
print()
for i in ten_tencv_corrected_t_test:
    print(i,f'{ten_tencv_corrected_t_test[i][3]:.2f}')
    if ten_tencv_corrected_t_test[i][3]<0.05:
        exec('print("\tWrapper:",np.mean(scores_weka["{KEY}"]),"New:",np.mean(scores_new["{KEY}"]))'.format(KEY = i))

Corrected t test statistic (Wrapper x New Model):

iris 0.72
labor_part1 0.73
labor_part2 0.07
glass 0.37
breast_cancer 0.49
contact_lenses 0.05
	Wrapper: 83.66666666666669 New: 65.83333333333334
credit_g_part1 0.03
	Wrapper: 74.29 New: 72.01
credit_g_part2 0.21
diabetes 0.78
hypothyroid_part1 0.74
hypothyroid_part2 0.30
hypothyroid_part3 0.97
segment_challenge_part1 0.00
	Wrapper: 83.07333333333334 New: 76.36
segment_challenge_part2 0.63
soybean_part1 0.76
soybean_part2 0.17
soybean_part3 0.00
	Wrapper: 78.76555839727195 New: 71.519820971867
vote_part1 0.81
vote_part2 0.77
weather_nominal 1.00
abalone_classification 0.08
balance_scale 1.00
data_banknote_authentication 0.00
	Wrapper: 98.62937691738074 New: 88.33973341796255
DishonestInternetUsers 1.00
ecoli 0.15
hayes_roth_trainAndtest 0.00
	Wrapper: 84.125 New: 61.75
hcvdat0 0.19
mammographic_masses 0.66
monks_1_train 0.67
monks_2_train 1.00
wilt 0.99
seeds_dataset 0.13
Surveillance 0.29
wine 0.17
winequality_red_classification 0.92
w

In [30]:
print('Proportion of runs where results of a simple logistic regression applied over a subset defined by Proposed model versus  subset defined by Wrapper didn\'t have significative difference :')
print()
print(sum(table2['Difference']==False) / len(table2['Difference']))

Proportion of runs where results of a simple logistic regression applied over a subset defined by Proposed model versus  subset defined by Wrapper didn't have significative difference :

0.8148148148148148


### New Model x Complete Dataset 

In [31]:
table4

Unnamed: 0,Dataset,Complete,New Model,Difference
0,iris,96.27+-4.65,94.87+-5.40,False
1,labor_part1,90.47+-12.11,88.20+-14.00,False
2,labor_part2,86.10+-14.06,82.20+-16.37,False
3,glass,64.70+-10.17,62.91+-10.66,False
4,breast_cancer,75.18+-7.87,75.15+-8.13,False
5,contact_lenses,71.67+-26.72,65.83+-32.09,False
6,credit_g_part1,72.96+-4.33,72.01+-4.23,False
7,credit_g_part2,71.79+-4.31,71.44+-4.36,False
8,diabetes,76.93+-4.51,76.24+-4.93,False
9,hypothyroid_part1,93.51+-1.15,93.51+-1.15,False


In [32]:
print('Corrected t test statistic (Complete Model x New Model):')
print()
for i in ten_tencv_corrected_t_test2:
    print(i,f'{ten_tencv_corrected_t_test2[i][3]:.2f}')
    if ten_tencv_corrected_t_test2[i][3]<0.05:
        exec('print("\tComplete:",np.mean(scores_complete["{KEY}"]),"New:",np.mean(scores_new["{KEY}"]))'.format(KEY = i))

Corrected t test statistic (Complete Model x New Model):

iris 0.44
labor_part1 0.44
labor_part2 0.48
glass 0.31
breast_cancer 0.94
contact_lenses 0.42
credit_g_part1 0.36
credit_g_part2 0.69
diabetes 0.50
hypothyroid_part1 1.00
hypothyroid_part2 0.44
hypothyroid_part3 0.98
segment_challenge_part1 0.03
	Complete: 79.24 New: 76.36
segment_challenge_part2 1.00
soybean_part1 1.00
soybean_part2 1.00
soybean_part3 0.00
	Complete: 78.18307757885762 New: 71.519820971867
vote_part1 0.79
vote_part2 0.77
weather_nominal 0.12
abalone_classification 1.00
balance_scale 1.00
data_banknote_authentication 0.00
	Complete: 98.62937691738074 New: 88.33973341796255
DishonestInternetUsers 1.00
ecoli 1.00
hayes_roth_trainAndtest 0.00
	Complete: 84.125 New: 61.75
hcvdat0 0.19
mammographic_masses 0.66
monks_1_train 0.62
monks_2_train 0.96
wilt 1.00
seeds_dataset 0.49
Surveillance 0.04
	Complete: 46.0 New: 70.0
wine 0.23
winequality_red_classification 1.00
winequality_white_classification 0.18
ionosphere_part1

In [33]:
print('Proportion of runs where results of a simple logistic regression applied over a subset defined by Proposed model versus the complete dataset didn\'t have significative difference :')
print()
print(sum(table4['Difference']==False) / len(table4['Difference']))

Proportion of runs where results of a simple logistic regression applied over a subset defined by Proposed model versus the complete dataset didn't have significative difference :

0.8703703703703703
