## Expectation Reflection

Test ER package.

In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score,precision_score,recall_score,roc_curve,auc

from sklearn.preprocessing import MinMaxScaler

from expectation_reflection import classification as ER
from expectation_reflection import function

#from ER import classification as ER
#from ER import classification_self as ER
#from ER import function

In [2]:
np.random.seed(1)

In [3]:
Xy = np.loadtxt('kidney.dat') 
X = Xy[:,:-1]
# 2020.07.15: convert y from {-1,+1} to {0,1}:
y = (Xy[:,-1]+1)/2. 

X,y = function.make_data_balance(X,y)
print(np.unique(y,return_counts=True))

X, y = shuffle(X, y, random_state=1)

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.5,random_state = 1)

sc = MinMaxScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

(array([0., 1.]), array([149, 149]))


In [4]:
model = ER.model(max_iter=100,regu=0.01,random_state=1)

In [5]:
model.fit(X_train,y_train)

<expectation_reflection.classification.model at 0x7f9b6ae961d0>

In [6]:
y_pred = model.predict(X_test)

In [7]:
acc = accuracy_score(y_test,y_pred)

In [8]:
print(acc)

0.9731543624161074


In [9]:
from sklearn.model_selection import GridSearchCV

In [10]:
model = ER.model(max_iter=100,random_state=1)

In [11]:
regu = [0.001,0.01,0.1,0.5]

hyper_parameters = dict(regu=regu)

In [12]:
hyper_parameters

{'regu': [0.001, 0.01, 0.1, 0.5]}

In [13]:
# Create grid search using cross validation
clf = GridSearchCV(model, hyper_parameters, cv=4, iid='deprecated')

In [14]:
# Fit grid search
best_model = clf.fit(X_train, y_train)

In [15]:
 # best hyper parameters
print('best_hyper_parameters:',best_model.best_params_)

best_hyper_parameters: {'regu': 0.01}


In [16]:
# performance:
y_pred = best_model.best_estimator_.predict(X_test)
p_pred = best_model.best_estimator_.predict_proba(X_test)
#print('predicted probability:', p_pred)

In [17]:
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,\
roc_auc_score,roc_curve,auc

In [18]:
acc = accuracy_score(y_test,y_pred)
print('accuracy:', acc)

precision = precision_score(y_test,y_pred)
print('precision:', precision)

recall = recall_score(y_test,y_pred)
print('recall:', recall)

f1score = f1_score(y_test,y_pred)
print('f1score:', f1score)

roc_auc = roc_auc_score(y_test,p_pred) ## note: it is p_pred, not y_pred
print('roc auc:', roc_auc)

accuracy: 0.9731543624161074
precision: 0.9523809523809523
recall: 1.0
f1score: 0.975609756097561
roc auc: 1.0


In [19]:
fp,tp,thresholds = roc_curve(y_test, p_pred, drop_intermediate=False)
roc_auc = auc(fp,tp)
print('roc auc:', roc_auc)

roc auc: 1.0


In [20]:
print('intercept:', best_model.best_estimator_.intercept_)
print('interaction weights:', best_model.best_estimator_.coef_)

intercept: -4.650109026584381
interaction weights: [-0.00815286 -0.17548152  0.010372   -2.6403905  -2.62953412  2.22325146
  3.5263851   2.95018099 -0.22568444 -0.06151069 -0.55264658 -0.40159625
  0.          0.20910532  0.01741422  0.03186429 -0.12614174 -0.13897804
  0.00754123  0.51000303 -0.34501901 -0.30999637 -0.48745579 -0.08638279
 -0.4446416   2.06650661  1.17528435 -2.67367338 -2.05940181 -0.33405106
 -1.14161845 -0.61911912 -0.80590147]
