## Expectation Reflection

Test ER package.

In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score,precision_score,recall_score,roc_curve,auc
from sklearn.preprocessing import MinMaxScaler

# pypi:
#from expectation_reflection import classification as ER
#from expectation_reflection import function

# local:
from expectation_reflection_local import classification as ER
from expectation_reflection_local import function

In [2]:
np.random.seed(1)

In [3]:
Xy = np.loadtxt('kidney.dat') 
X = Xy[:,:-1]
# 2020.07.15: convert y from {-1,+1} to {0,1}:
y = (Xy[:,-1]+1)/2. 

X,y = function.make_data_balance(X,y)
print(np.unique(y,return_counts=True))

X, y = shuffle(X, y, random_state=1)

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.5,random_state = 1)

sc = MinMaxScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

(array([0., 1.]), array([149, 149]))


## Hyper Parameter Optimization

In [4]:
from sklearn.model_selection import GridSearchCV

In [5]:
model = ER.model(max_iter=200,random_state=2)

In [6]:
regu = [0.001,0.01,0.1,0.5]

hyper_parameters = dict(regu=regu)

In [7]:
hyper_parameters

{'regu': [0.001, 0.01, 0.1, 0.5]}

In [8]:
# Create grid search using cross validation
clf = GridSearchCV(model, hyper_parameters, cv=4, n_jobs = -1, iid='deprecated')

In [9]:
# Fit grid search
best_model = clf.fit(X_train, y_train)

In [10]:
 # best hyper parameters
print('best_hyper_parameters:',best_model.best_params_)

best_hyper_parameters: {'regu': 0.01}


In [11]:
# performance:
y_test_pred = best_model.best_estimator_.predict(X_test)
acc = accuracy_score(y_test,y_test_pred)
print(acc)

0.9731543624161074


In [12]:
y

array([1., 0., 0., 1., 0., 1., 0., 0., 1., 1., 0., 1., 1., 1., 0., 0., 1.,
       0., 0., 1., 0., 1., 0., 0., 1., 0., 0., 0., 1., 0., 1., 0., 1., 0.,
       1., 0., 1., 1., 0., 0., 1., 1., 0., 0., 1., 0., 1., 0., 1., 0., 1.,
       1., 0., 1., 0., 1., 1., 0., 0., 1., 1., 0., 0., 1., 0., 0., 1., 0.,
       0., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 1., 1., 0., 0., 1.,
       0., 1., 1., 1., 1., 0., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 1.,
       0., 1., 0., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 1., 1.,
       1., 0., 1., 1., 0., 1., 1., 1., 1., 0., 1., 0., 1., 0., 0., 0., 0.,
       1., 0., 1., 1., 1., 0., 0., 1., 1., 0., 1., 1., 0., 0., 1., 0., 0.,
       0., 0., 0., 1., 1., 1., 0., 1., 1., 0., 1., 0., 0., 1., 0., 1., 1.,
       1., 1., 1., 1., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 0., 0., 1.,
       1., 0., 0., 0., 1., 0., 0., 0., 1., 1., 0., 1., 1., 0., 0., 1., 1.,
       0., 0., 1., 0., 0., 0., 0., 1., 0., 1., 0., 1., 0., 1., 1., 1., 0.,
       0., 0., 1., 0., 0.

In [22]:
def convert_labels(y):
    # convert y from any kind of labels ({'yes', 'no'}, {'A', 'B'}, etc.) to {1,0}
    # input: y[n_samples], 
    # output: y_new[n_samples] 
    y_uniques = np.unique(y)
    n_labels = len(y_uniques)
    n_samples = y.shape[0]
    y_new = np.ones(n_samples)
    y_new[y == y_uniques[0]] = 0
    
    return y_new

In [23]:
y = np.array(['yes','no','yes','no','no'])

In [24]:
y_new = convert_labels(y)
y_new

array([1., 0., 1., 0., 0.])

In [25]:
from sklearn.preprocessing import OneHotEncoder

In [40]:
onehot_encoder = OneHotEncoder(sparse=False,categories='auto')
y_onehot = onehot_encoder.fit_transform(y.reshape(-1,1))  # y[t,:] = {0,1,...,0}

y_pred = onehot_encoder.inverse_transform(y_onehot)
y_pred = y_pred.reshape(-1,)
print(y_pred)

acc = accuracy_score(y,y_pred)
print(acc)

['yes' 'no' 'yes' 'no' 'no']
1.0


In [41]:
y = np.array([1, 2, 3, 1, 2])
y

array([1, 2, 3, 1, 2])

In [42]:
onehot_encoder = OneHotEncoder(sparse=False,categories='auto')
y_onehot = onehot_encoder.fit_transform(y.reshape(-1,1))  # y[t,:] = {0,1,...,0}

y_pred = onehot_encoder.inverse_transform(y_onehot)
y_pred = y_pred.reshape(-1,)
print(y_pred)

acc = accuracy_score(y,y_pred)
print(acc)

[1 2 3 1 2]
1.0


In [59]:
y = np.array(['yes','no','yes','no','no'])

In [60]:
y_uniques = np.unique(y)
n_labels = len(y_uniques)
n_samples = y.shape[0]
y_new = np.ones(n_samples)
y_new[y == y_uniques[0]] = 0

In [61]:
y_new

array([1., 0., 1., 0., 0.])

In [64]:
y_recovered = np.full(n_samples,y_uniques[1])
y_recovered

array(['yes', 'yes', 'yes', 'yes', 'yes'], dtype='<U3')

In [65]:
y_recovered[y_new== 0] = y_uniques[0]
#y_recovered[y_new== 1] = y_uniques[1]
y_recovered

array(['yes', 'no', 'yes', 'no', 'no'], dtype='<U3')

In [66]:
y = np.array(['yes','no','yes','no','no'])

In [67]:
onehot_encoder = OneHotEncoder(sparse=False,categories='auto')
y_onehot = onehot_encoder.fit_transform(y.reshape(-1,1))  # y[t,:] = {0,1,...,0}

In [68]:
y_onehot

array([[0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.]])

In [72]:
y1 = np.argmax(y_onehot,axis=1)
y1

array([1, 0, 1, 0, 0])

In [71]:
y1_onehot = onehot_encoder.fit_transform(y1.reshape(-1,1))  # y[t,:] = {0,1,...,0}
y1_onehot

array([[0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.]])

In [75]:
y1_inv = onehot_encoder.inverse_transform(y1_onehot).reshape(-1,)

In [76]:
y1_inv

array([1, 0, 1, 0, 0])

In [None]:
y = 