# <center> Evaluate ML Algorithms With Resampling Techinique </center>

In [1]:
# Import libraries
import warnings
warnings.filterwarnings('ignore')
from pandas import read_csv
# Machine Learning Module
from sklearn.model_selection import train_test_split
# Machine Learning Algorithm (classifier, model)
from sklearn.linear_model import LogisticRegression
filename = './datasets/pima-indians-diabetes.data.csv'
col_names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
data = read_csv(filename, names=col_names)

## I. Train and Test Split

In [2]:
# Get values from dataframe
array = data.values
# Get inputs (all variables except the class)
X = array[ : , 0:-1]
# Get the output (class)
Y = array[ : , -1]
test_proportion = 0.30
seed = 7 # Controler le fait aleatoire dans la division de donnees
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = test_proportion
                                                    , random_state = seed)
# Training
model = LogisticRegression(solver='newton-cg')
model.fit(X_train, Y_train)
print("Training completed successfully!")
result = model.score(X_test, Y_test)
print('Accuracy : %.3f%%' % (result*100))
print('Manual evaluation\n-----------------')
Y_predicted = model.predict(X_test)
Y_proba = model.predict_proba(X_test)
TP = TN = FP = FN = 0
for reel, predict, proba in zip(Y_test, Y_predicted, Y_proba):
    if reel == 1.0 and predict == 1.0:
        TP += 1
    elif reel == 1.0 and predict == 0.0:
        FN += 1
    elif reel == 0.0 and predict == 0.0:
        TN += 1
    elif reel == 0.0 and predict == 1.0:
        FP += 1
    print(f'Reel: {reel} | Predicted: {predict} | > Probaility: {proba}')
Acc = ((TP+TN)/(TP+TN+FP+FN))*100
print(f'Manual Accuracy: {Acc}')

Training completed successfully!
Accuracy : 77.922%
Manual evaluation
-----------------
Reel: 0.0 | Predicted: 0.0 | > Probaility: [0.93286382 0.06713618]
Reel: 1.0 | Predicted: 1.0 | > Probaility: [0.1224743 0.8775257]
Reel: 1.0 | Predicted: 1.0 | > Probaility: [0.24880202 0.75119798]
Reel: 0.0 | Predicted: 0.0 | > Probaility: [0.85500372 0.14499628]
Reel: 1.0 | Predicted: 0.0 | > Probaility: [0.62185733 0.37814267]
Reel: 1.0 | Predicted: 0.0 | > Probaility: [0.50296573 0.49703427]
Reel: 0.0 | Predicted: 0.0 | > Probaility: [0.92790713 0.07209287]
Reel: 1.0 | Predicted: 0.0 | > Probaility: [0.72737007 0.27262993]
Reel: 0.0 | Predicted: 1.0 | > Probaility: [0.0594588 0.9405412]
Reel: 0.0 | Predicted: 0.0 | > Probaility: [0.86946249 0.13053751]
Reel: 1.0 | Predicted: 1.0 | > Probaility: [0.16048137 0.83951863]
Reel: 0.0 | Predicted: 0.0 | > Probaility: [0.89622661 0.10377339]
Reel: 0.0 | Predicted: 1.0 | > Probaility: [0.28553362 0.71446638]
Reel: 1.0 | Predicted: 1.0 | > Probaility: [0

## II. K-fold Cross Validation

In [10]:
from sklearn.model_selection import KFold, cross_val_score
# Define kfold configuration
num_fold = 17
seed = 10
kfold = KFold(n_splits=num_fold, random_state=seed, shuffle=True)
model = LogisticRegression(solver='newton-cg')
results = cross_val_score(model, X, Y, cv=kfold)
print("Accuracy: %.3f%% (%.3f%%)" % (results.mean()*100.0, results.std()*100.0))

Accuracy: 77.477% (6.377%)


## III. Leave-One-Out Cross Validation

In [11]:
from sklearn.model_selection import LeaveOneOut, cross_val_score
loocv = LeaveOneOut()
model = LogisticRegression()
results = cross_val_score(model, X, Y, cv=loocv)
print("Accuracy: %.3f%% (%.3f%%)" % (results.mean()*100.0, results.std()*100.0))

Accuracy: 77.865% (41.516%)


## IV. Repeated Random Train-test Split

In [16]:
from sklearn.model_selection import ShuffleSplit, cross_val_score
test_proportion = 0.30
seed = 7
num_fold = 100000
kflod = ShuffleSplit(n_splits=num_fold, test_size= test_proportion, random_state=seed)
model = LogisticRegression()
results = cross_val_score(model, X, Y, cv=kflod)
print("Accuracy: %.3f%% (%.3f%%)" % (results.mean()*100.0, results.std()*100.0))

Accuracy: 76.897% (2.435%)
