In [175]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
import pandas as pd
data = load_wine()
df = pd.DataFrame(data=data.data,columns=data.feature_names)
df_targets = pd.DataFrame(data=data.target,columns=['target'])
df_wine = pd.concat([df,df_targets],axis=1)
df_wine

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
0,14.23,1.71,2.43,15.6,127.0,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0
1,13.20,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050.0,0
2,13.16,2.36,2.67,18.6,101.0,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185.0,0
3,14.37,1.95,2.50,16.8,113.0,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480.0,0
4,13.24,2.59,2.87,21.0,118.0,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,13.71,5.65,2.45,20.5,95.0,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740.0,2
174,13.40,3.91,2.48,23.0,102.0,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750.0,2
175,13.27,4.28,2.26,20.0,120.0,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835.0,2
176,13.17,2.59,2.37,20.0,120.0,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840.0,2


In [176]:
#1. split
X_train,X_test,y_train,y_test = train_test_split(df,df_targets,test_size=0.3)
print(f'Shape X train {X_train.shape}')
print(f'Shape y train {y_train.shape}')
print(f'Shape X test {X_test.shape}')
print(f'Shape y test {y_test.shape}')


Shape X train (124, 13)
Shape y train (124, 1)
Shape X test (54, 13)
Shape y test (54, 1)


In [177]:
#2. decision tree for a single split
from sklearn.tree import DecisionTreeClassifier
decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train,y_train)

In [178]:
#calculate 1-error ->accuracy 
predicted = decision_tree.predict(X_test)
correct_ones = (y_test['target']==predicted).sum()
total_test = y_test.shape[0]
mistakes = total_test-correct_ones
error = mistakes/total_test
accuracy = 1-error
print(f'Accuracy : {accuracy}')

Accuracy : 0.8888888888888888


The probability that on the $N=54$ trials there are e errors is:
$P\{x\leq e\}=\sum_{j=1}^{e}\binom{N}{j}p_0^{j}(1-p_0)^{N-j} $

Accept if $P< 1-\alpha$

In [179]:
import math
p0 = error
N = total_test
probability_confidence_interval = 0
alpha=0.05
for i in range(1,mistakes+1):
    probability_confidence_interval += math.comb(N,i)*(p0**i)*(accuracy**(N-i))

print(f'P of Confidende Interval {probability_confidence_interval}')

if probability_confidence_interval<1-alpha:
    print(f'Accuracy is {accuracy}, with 95% confidence')


P of Confidende Interval 0.6047596483541414
Accuracy is 0.8888888888888888, with 95% confidence


In [180]:
#2 boostrap
df_train = pd.concat([X_train,y_train],axis=1)
accuracies =[]
N_train = y_train.shape[0]
# x_t = []
for i in range(30):
    boost_train = df_train.sample(n=int(N_train*0.63),replace=True)
    boost_val = df_train[~df_train.index.isin(boost_train.index)]
    decision_tree = DecisionTreeClassifier()
    decision_tree.fit(boost_train[data.feature_names],boost_train[['target']])
    predicted = decision_tree.predict(boost_val[data.feature_names])
    correct_ones = (boost_val['target']==predicted).sum()
    # x_t.append(((boost_val['target']==predicted)*1).to_list())
    total_test = boost_val.shape[0]
    mistakes = total_test-correct_ones
    error = mistakes/total_test
    accuracy = 1-error
    accuracies.append(accuracy)
print(accuracies)

[0.8955223880597015, 0.8064516129032258, 0.8153846153846154, 0.9230769230769231, 0.9692307692307692, 0.9393939393939394, 0.8181818181818181, 0.8405797101449275, 0.9090909090909091, 0.9384615384615385, 0.8732394366197183, 0.782608695652174, 0.953125, 0.921875, 0.8939393939393939, 0.8382352941176471, 0.8888888888888888, 0.8615384615384616, 0.8472222222222222, 0.9411764705882353, 0.9076923076923077, 0.9090909090909091, 0.9402985074626866, 0.8857142857142857, 0.9117647058823529, 0.873015873015873, 0.8461538461538461, 0.9411764705882353, 0.921875, 0.875]


In [181]:
#t test
import numpy as np
import scipy.stats
t_critical_value = scipy.stats.t.ppf(0.025, 29)
m = sum(accuracies)/len(accuracies)
S = sum(np.array(accuracies-m)**2)/(len(accuracies)-1)
interval_left = m - t_critical_value*S/math.sqrt(30)
interval_right = m + t_critical_value*S/math.sqrt(30)
print('interval -> [{:.4f},{:.4f}]'.format(interval_left,interval_right))
print('Mean is in this interval with 95% confidence')

interval -> [0.8898,0.8881]
Mean is in this interval with 95% confidence


In [None]:
# 3. same t test but with RF
from sklearn.ensemble import RandomForestClassifier

df_train = pd.concat([X_train,y_train],axis=1)
accuracies =[]
N_train = y_train.shape[0]
for i in range(30):
    boost_train = df_train.sample(n=int(N_train*0.63),replace=True)
    boost_val = df_train[~df_train.index.isin(boost_train.index)]
    decision_tree = RandomForestClassifier()
    decision_tree.fit(boost_train[data.feature_names],boost_train[['target']])
    predicted = decision_tree.predict(boost_val[data.feature_names])
    correct_ones = (boost_val['target']==predicted).sum()
    total_test = boost_val.shape[0]
    mistakes = total_test-correct_ones
    error = mistakes/total_test
    accuracy = 1-error
    accuracies.append(accuracy)


In [183]:
print(accuracies)

[0.9552238805970149, 0.9705882352941176, 0.9402985074626866, 0.9848484848484849, 0.9836065573770492, 0.9375, 0.9857142857142858, 0.9841269841269842, 0.9696969696969697, 1.0, 0.953125, 0.9545454545454546, 0.984375, 0.9130434782608696, 0.9411764705882353, 0.984375, 1.0, 0.9841269841269842, 0.9696969696969697, 1.0, 0.9393939393939394, 0.9857142857142858, 0.9076923076923077, 0.9722222222222222, 0.9855072463768116, 0.9855072463768116, 0.9402985074626866, 0.9710144927536232, 0.9545454545454546, 0.9696969696969697]


In [184]:
#t test
import numpy as np
import scipy.stats
t_critical_value = scipy.stats.t.ppf(0.025, 29)
m = sum(accuracies)/len(accuracies)
S = sum(np.array(accuracies-m)**2)/(len(accuracies)-1)
interval_left = m - t_critical_value*S/math.sqrt(30)
interval_right = m + t_critical_value*S/math.sqrt(30)
print('interval -> [{:.4f},{:.4f}]'.format(interval_left,interval_right))
print('Mean is in this interval with 95% confidence')

interval -> [0.9671,0.9667]
Mean is in this interval with 95% confidence


In [220]:
#4. McNemar's test only one split needed

#classifier 1
decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train,y_train)

predicted_tree = decision_tree.predict(X_test)

# classifier 2
decision_trees = RandomForestClassifier()
decision_trees.fit(X_train,y_train.to_numpy().ravel())

predicted_trees = decision_trees.predict(X_test)

e_11 = ((y_test['target']==predicted_tree)&(y_test['target']==predicted_trees )).sum()
e_00 = ((y_test['target']!=predicted_tree)&(y_test['target']!=predicted_trees )).sum()
e_01 = ((y_test['target']!=predicted_tree)&(y_test['target']==predicted_trees )).sum()
e_10 = ((y_test['target']==predicted_tree)&(y_test['target']!=predicted_trees )).sum()
value = (abs(e_01-e_10)-1)**2/(e_01+e_10)
xi2 = scipy.stats.chi2.ppf(0.05, df=1)
print(f'value : {value}')
print(f'chi square critical value: {xi2}')
if value<xi2:
    print('Hypothesis accepted')
else:
    print('Hypothesis rejected')  
    print('The Error is significantly different in both classifiers') 

value : 6.125
chi square critical value: 0.003932140000019522
Hypothesis rejected
The Error is significantly different in both classifiers
