# Determining if there is a significant difference between models 

## 1. Import Libraries

In [58]:
from sklearn import metrics
import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency
from scipy.stats import chi2

## 2. Load Actual and Predicted data for each model and calculate confusion matrix for each

### 2a. SVMSmote

In [107]:
svmsmote_df = pd.read_csv('svmsmote_standard_Model.csv')
y_act = list(svmsmote_df.Acc)
y_pred = list(svmsmote_df.Pred)
conf_svmsmote = metrics.confusion_matrix(y_act, y_pred, labels = [1,0])
print(conf_svmsmote)
print(metrics.classification_report(y_act, y_pred, labels = [1,0]))

conf_svmsmote.flatten()

[[ 4 16]
 [11 69]]
              precision    recall  f1-score   support

           1       0.27      0.20      0.23        20
           0       0.81      0.86      0.84        80

    accuracy                           0.73       100
   macro avg       0.54      0.53      0.53       100
weighted avg       0.70      0.73      0.71       100



array([ 4, 16, 11, 69], dtype=int64)

### 2b. Randomsampler

In [108]:
randomsampler_df = pd.read_csv('randomsampler_standard_Model.csv')
y_act = list(randomsampler_df.Acc)
y_pred = list(randomsampler_df.pred)
conf_rand = metrics.confusion_matrix(y_act, y_pred, labels = [1,0])
print(conf_rand)
print(metrics.classification_report(y_act, y_pred, labels = [1,0]))

[[ 5 15]
 [ 9 71]]
              precision    recall  f1-score   support

           1       0.36      0.25      0.29        20
           0       0.83      0.89      0.86        80

    accuracy                           0.76       100
   macro avg       0.59      0.57      0.57       100
weighted avg       0.73      0.76      0.74       100



### 2c. Smote

In [109]:
smote_df = pd.read_csv('Smote_standard_Model.csv')
y_act = list(smote_df.Acc)
y_pred = list(smote_df.pred)
conf_smote = metrics.confusion_matrix(y_act, y_pred, labels = [1,0])
print(conf_smote)
print(metrics.classification_report(y_act, y_pred, labels = [1,0]))

[[ 4 16]
 [ 9 71]]
              precision    recall  f1-score   support

           1       0.31      0.20      0.24        20
           0       0.82      0.89      0.85        80

    accuracy                           0.75       100
   macro avg       0.56      0.54      0.55       100
weighted avg       0.71      0.75      0.73       100



### 2d. Adasyn

In [110]:
adasyn_df = pd.read_csv('adasyn_standard_Model.csv')
y_act = list(adasyn_df.Acc)
y_pred = list(adasyn_df.Pred)
conf_adasyn = metrics.confusion_matrix(y_act, y_pred, labels = [1,0])
print(conf_adasyn)
print(metrics.classification_report(y_act, y_pred, labels = [1,0]))



[[ 4 16]
 [ 9 71]]
              precision    recall  f1-score   support

           1       0.31      0.20      0.24        20
           0       0.82      0.89      0.85        80

    accuracy                           0.75       100
   macro avg       0.56      0.54      0.55       100
weighted avg       0.71      0.75      0.73       100



## 3. Convert confusion matrices above into a contingency matrix (stored as DataFrame)

In [97]:
np.array([conf_svmsmote.flatten(), conf_rand.flatten(), conf_smote.flatten(), conf_adasyn.flatten()])
contingency = pd.DataFrame(np.array([conf_svmsmote.flatten(), conf_rand.flatten(), conf_smote.flatten(), conf_smote.flatten()]))
contingency.index = ['svmsmote', 'random', 'smote', 'adasyn']
contingency.columns = ['TP', 'FN', 'FP', 'TN'] #TP = True +ve, FN = False -ve, FP = False +ve, TN = True -ve
contingency= contingency.iloc[:,:3] #exclude TN because it is a function of TP,FN and FP
contingency

Unnamed: 0,TP,FN,FP
svmsmote,4,16,11
random,5,15,9
smote,4,16,9
adasyn,4,16,9


In [111]:
contingency.to_excel('cont1.xlsx') #incase you need to store and test the process by using excel

## 4. Calculate Chi-Square parameters

In [99]:
stat, p, dof, expected = chi2_contingency(contingency)
print(stat)
print(p)
print(dof)
print(expected)

0.4339755143710005
0.9985516783166436
6
[[ 4.46610169 16.55084746  9.98305085]
 [ 4.1779661  15.48305085  9.33898305]
 [ 4.1779661  15.48305085  9.33898305]
 [ 4.1779661  15.48305085  9.33898305]]


## 5. Hypothesis Testing

In [112]:
'''
Null Hypothesis (Ho): Performance  and type of model are independent
Alternative Hypothesis (H1): Performance and type of model are not independent
'''


#set alpha = 0.05 i.e probability = 0.95 (left side)
alpha = 0.05
prob = 1-alpha 
#critival value = cv

#By using test statistic X2
cv = chi2.ppf(prob, dof)
print(f'Critical Value = {cv}')
if abs(stat) >= cv:
    print('Reject Ho, dependent')
else:
    print('Fail to reject Ho, independent')
    
#By using probability distribution (p)
print(f'Significance = {alpha}, p-value = {p}')
if p < alpha:
    print('Reject Ho, dependent')
else:
    print('Fail to reject Ho, independent')

Critical Value = 12.591587243743977
Fail to reject Ho, independent
Significance = 0.05, p-value = 0.9985516783166436
Fail to reject Ho, independent


# Conclusion

#### The results mean that performance is independent of the model used
#### i.e. difference between the given models are not strong enough to impact the results
#### However:
#### This does not say whether the models are performing well or otherwise,  it only means that models have similar level of performance