In [1]:
import os
import shutil
import math
import pandas as pd
import sqlite3
import pymongo
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
from sklearn import cross_validation, preprocessing, decomposition
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import f1_score, fbeta_score, recall_score, precision_score, confusion_matrix
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler, SMOTE



In [2]:
receipts = [
    'MED_CM_TBL_2016',
    'MED_CO_TBL_2016',
    'MED_GR_TBL_2016',
    'MED_HOKO_TBL_2016',
    'MED_IY_TBL_2016',
    'MED_SI_TBL_2016',
    'MED_SJ_TBL_2016',
    'MED_SY_TBL_2016',
    'MED_TO_TBL_2016',
    'PHA_CM_TBL_2016',
    'PHA_CO_TBL_2016',
    'PHA_HOKO_TBL_2016',
    'PHA_IY_TBL_2016',
    'PHA_TO_TBL_2016']

In [3]:
client = pymongo.MongoClient('localhost', 27017)
db = client.kikin

In [4]:
dbname = 'kikin.sqlite3'
con = sqlite3.connect(dbname)
c = con.cursor()
dfs = {}
for receipt in receipts:
    q = 'select * from {}'.format(receipt)
    df = pd.io.sql.read_sql(q,con)
    dfs[receipt] = df
c.close()

### 傷病、診療、医薬から説明変数を取り出す

In [5]:
df = dfs['MED_SY_TBL_2016']
sy = df['shobyo_code'].drop_duplicates()
n_sy = df['shobyo_code'].drop_duplicates().count()
n_sy

2930

In [6]:
df = dfs['MED_SI_TBL_2016']
si = df['s_tekiyo_code'].drop_duplicates()
n_si = df['s_tekiyo_code'].drop_duplicates().count()
n_si

1371

In [7]:
df = dfs['MED_IY_TBL_2016']
iy = df['s_tekiyo_code'].drop_duplicates()
n_iy = df['s_tekiyo_code'].drop_duplicates().count()
n_iy

2908

In [8]:
dd = pd.concat([sy,iy,si]).dropna()
dd.count()

7209

In [9]:
x_values = dd.values#.tolist()
x_size = x_values.size
x_dic = { v:k for (k,v) in enumerate(x_values)}

### 素性をつくる

In [10]:
xs = []
ys = []
doc = db.med.find()
for row in doc:
    x = np.zeros(x_size)
    
    # 該当する傷病があれば、その傷病に対応するindexの値を1, なければ0
    if 'MED_SY_TBL_2016' in row:
        for d in row['MED_SY_TBL_2016']:
            idx = x_dic[d['shobyo_code']]
            x[idx] = 1
            
    # 医薬品に対応するindexに点数を挿入
    if 'MED_IY_TBL_2016' in row:
        for d in row['MED_IY_TBL_2016']:
            s_code = d['s_tekiyo_code']
            k_code = d['k_tekiyo_code']
            if math.isnan(s_code) and math.isnan(k_code):
                continue
            
            code = s_code if not math.isnan(s_code) else k_code
            idx = x_dic[code]
            s = d['s_tensu']
            k = d['k_tensu']
            x[idx] =\
                s if not math.isnan(s) else\
                k if not math.isnan(k) else\
                0
    
    # 診療に対応するindexに点数を挿入
    if 'MED_SI_TBL_2016' in row:
        for d in row['MED_SI_TBL_2016']:
            s_code = d['s_tekiyo_code']
            k_code = d['k_tekiyo_code']
            if math.isnan(s_code) and math.isnan(k_code):
                continue
            
            code = s_code if not math.isnan(s_code) else k_code
            idx = x_dic[code]
            s = d['s_tensu']
            k = d['k_tensu']
            x[idx] =\
                s if not math.isnan(s) else\
                k if not math.isnan(k) else\
                0
    

    # 保険者レコードか公費レコード、どちらかに請求点数と決定点数に差があれば異常として0, 正常な値に1
    diff =  sum([d['diff_tensu'] for d in row['MED_HOKO_TBL_2016']])
    y = 1 if diff else 0
    
    xs.append(x)
    ys.append(y)
x_data = np.array(xs)
y_data = np.array(ys)

In [11]:
y_data.sum()

138

In [15]:
def recall(y_true, y_pred):
    from keras import backend as K
    # Calculates the recall
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision(y_true, y_pred):
    from keras import backend as K
    # Calculates the precision
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1(y_true, y_pred):
    p = precision(y_true, y_pred)
    r = recall(y_true, y_pred)
    return 2 * p * r / (p+r)

### PCAのち、MLP

In [None]:
sc= preprocessing.StandardScaler()
sc.fit(x_data)
X = sc.transform(x_data)
pca = decomposition.PCA(n_components=100)
X_transformed = pca.fit_transform(X)

from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y_data, test_size=0.3, random_state=666)

# resampling
rus = RandomUnderSampler(random_state=0)
ros = RandomOverSampler(random_state=0)
smt = SMOTE(random_state=0)
X_train_under, y_train_under = rus.fit_sample(X_train, y_train)
X_train_over, y_train_over = ros.fit_sample(X_train, y_train)
X_train_smote, y_train_smote = smt.fit_sample(X_train, y_train)

In [14]:
from keras.utils import to_categorical
Y_train = to_categorical(y_train)
Y_train_under = to_categorical(y_train_under)
Y_train_over = to_categorical(y_train_over)
Y_train_smote = to_categorical(y_train_smote)
Y_test = to_categorical(y_test)

Using TensorFlow backend.


In [16]:
# modeling & evaluation
import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.models import Sequential
from keras.optimizers import Adam, SGD
from sklearn import datasets, preprocessing
import numpy as np

n_in = X_train.shape[-1]

model = Sequential()
model.add(Dense(200, input_dim=n_in, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(
    loss='binary_crossentropy', 
    optimizer='adam',#SGD(lr=0.01),
    metrics=['accuracy', f1, recall])

In [23]:
model.fit(
    X_train_smote, Y_train_smote,
    epochs=1,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test)
)

Train on 13812 samples, validate on 3000 samples
Epoch 1/1


<keras.callbacks.History at 0x7f9ead999e80>

In [24]:
f1_score(y_test, np.argmax(model.predict(X_test),axis=1))

0.98007153806847203

In [25]:
recall_score(y_test, np.argmax(model.predict(X_test),axis=1))

0.97327469553450607

In [36]:
model.predict(X_test)

array([[  0.00000000e+00,   1.00000000e+00],
       [  5.79111893e-21,   1.00000000e+00],
       [  0.00000000e+00,   1.00000000e+00],
       ..., 
       [  2.06120245e-30,   1.00000000e+00],
       [  0.00000000e+00,   1.00000000e+00],
       [  0.00000000e+00,   1.00000000e+00]], dtype=float32)

In [35]:
confusion_matrix(y_test, np.argmax(,axis=1))

array([[   0,   44],
       [   0, 2956]])

### PCAなしでMLP

In [12]:
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.3, random_state=666)

# resampling
rus = RandomUnderSampler(random_state=0)
ros = RandomOverSampler(random_state=0)
smt = SMOTE(random_state=0)
X_train_under, y_train_under = rus.fit_sample(X_train, y_train)
X_train_over, y_train_over = ros.fit_sample(X_train, y_train)
X_train_smote, y_train_smote = smt.fit_sample(X_train, y_train)

In [13]:
from keras.utils import to_categorical
Y_train = to_categorical(y_train)
Y_train_under = to_categorical(y_train_under)
Y_train_over = to_categorical(y_train_over)
Y_train_smote = to_categorical(y_train_smote)
Y_test = to_categorical(y_test)

Using TensorFlow backend.


In [22]:
# modeling & evaluation
import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.models import Sequential
from keras.optimizers import Adam, SGD
from sklearn import datasets, preprocessing
import numpy as np

n_in = X_train.shape[-1]

model = Sequential()
model.add(Dense(1200, input_dim=n_in, activation='sigmoid'))
model.add(Dense(2, activation='softmax'))
model.compile(
    loss='categorical_crossentropy', 
    optimizer=SGD(lr=0.01),
    metrics=['accuracy', f1, recall])
model.fit(
    X_train_smote, Y_train_smote,
    epochs=6,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test)
)

Train on 13812 samples, validate on 3000 samples
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.callbacks.History at 0x7f18c0f1aac8>

In [23]:
A = model.predict(X_test)#*np.array([1,1])
tn, fp, fn, tp = confusion_matrix(y_test, A.argmax(axis=1)).ravel()
print('f1:',f1_score(y_test, A.argmax(axis=1)))
print('recall:',recall_score(y_test, A.argmax(axis=1)))
confusion_matrix(y_test, A.argmax(axis=1)),tn, fp, fn, tp

f1: 0.308943089431
recall: 0.431818181818


(array([[2896,   60],
        [  25,   19]]), 2896, 60, 25, 19)

In [47]:
# modeling & evaluation
import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.models import Sequential
from keras.optimizers import Adam, SGD
from sklearn import datasets, preprocessing
import numpy as np

n_in = X_train.shape[-1]

model = Sequential()
model.add(Dense(300, input_dim=n_in, activation='elu'))
model.add(Dense(2, activation='softmax'))
model.compile(
    loss='categorical_crossentropy', 
    optimizer=SGD(lr=0.01),
    metrics=['accuracy', f1, recall])
model.fit(
    X_train_smote, Y_train_smote,
    epochs=3,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test)
)
print()
A = model.predict(X_test)#*np.array([1,1])
tn, fp, fn, tp = confusion_matrix(y_test, A.argmax(axis=1)).ravel()
print('f1:',f1_score(y_test, A.argmax(axis=1)))
print('recall:',recall_score(y_test, A.argmax(axis=1)))
confusion_matrix(y_test, A.argmax(axis=1)),tn, fp, fn, tp

Train on 13812 samples, validate on 3000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

f1: 0.0338983050847
recall: 0.0227272727273


(array([[2942,   14],
        [  43,    1]]), 2942, 14, 43, 1)

In [50]:
# modeling & evaluation
import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.models import Sequential
from keras.optimizers import Adam, SGD
from sklearn import datasets, preprocessing
import numpy as np

n_in = X_train.shape[-1]

model = Sequential()
model.add(Dense(300, input_dim=n_in, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(
    loss='categorical_crossentropy', 
    optimizer=SGD(lr=0.01),
    metrics=['accuracy', f1, recall])
model.fit(
    X_train_smote, Y_train_smote,
    epochs=3,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test)
)
print()
A = model.predict(X_test)#*np.array([1,1])
tn, fp, fn, tp = confusion_matrix(y_test, A.argmax(axis=1)).ravel()
print('f1:',f1_score(y_test, A.argmax(axis=1)))
print('recall:',recall_score(y_test, A.argmax(axis=1)))
confusion_matrix(y_test, A.argmax(axis=1)),tn, fp, fn, tp

Train on 13812 samples, validate on 3000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

f1: 0.149892933619
recall: 0.795454545455


(array([[2568,  388],
        [   9,   35]]), 2568, 388, 9, 35)

In [55]:
model.fit(
    X_train_smote, Y_train_smote,
    epochs=3,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test)
)
print()
A = model.predict(X_test)#*np.array([1,1])
tn, fp, fn, tp = confusion_matrix(y_test, A.argmax(axis=1)).ravel()
print('f1:',f1_score(y_test, A.argmax(axis=1)))
print('recall:',recall_score(y_test, A.argmax(axis=1)))
confusion_matrix(y_test, A.argmax(axis=1)),tn, fp, fn, tp

Train on 13812 samples, validate on 3000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

f1: 0.304093567251
recall: 0.590909090909


(array([[2855,  101],
        [  18,   26]]), 2855, 101, 18, 26)

In [57]:
model.save('f0.3.elu.adam.model.h5')
model.save_weights('f0.3.elu.adam.weights.h5')

In [63]:
# modeling & evaluation
import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.models import Sequential
from keras.optimizers import Adam, SGD
from sklearn import datasets, preprocessing
import numpy as np

n_in = X_train.shape[-1]

model = Sequential()
model.add(Dense(300, input_dim=n_in, activation='elu'))
model.add(Dense(2, activation='softmax'))
model.compile(
    loss='categorical_crossentropy', 
    optimizer=SGD(0.01),
    metrics=['accuracy', f1, recall])
model.fit(
    X_train_smote, Y_train_smote,
    epochs=3,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test)
)
print()
A = model.predict(X_test)#*np.array([1,1])
tn, fp, fn, tp = confusion_matrix(y_test, A.argmax(axis=1)).ravel()
print('f1:',f1_score(y_test, A.argmax(axis=1)))
print('recall:',recall_score(y_test, A.argmax(axis=1)))
confusion_matrix(y_test, A.argmax(axis=1)),tn, fp, fn, tp

Train on 13812 samples, validate on 3000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

f1: 0.134020618557
recall: 0.886363636364


(array([[2457,  499],
        [   5,   39]]), 2457, 499, 5, 39)

In [64]:
model.fit(
    X_train_smote, Y_train_smote,
    epochs=3,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test)
)
print()
A = model.predict(X_test)#*np.array([1,1])
tn, fp, fn, tp = confusion_matrix(y_test, A.argmax(axis=1)).ravel()
print('f1:',f1_score(y_test, A.argmax(axis=1)))
print('recall:',recall_score(y_test, A.argmax(axis=1)))
confusion_matrix(y_test, A.argmax(axis=1)),tn, fp, fn, tp

Train on 13812 samples, validate on 3000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

f1: 0.130952380952
recall: 0.75


(array([[2529,  427],
        [  11,   33]]), 2529, 427, 11, 33)

In [96]:
# modeling & evaluation
import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.models import Sequential
from keras.optimizers import Adam, SGD
from sklearn import datasets, preprocessing
import numpy as np

n_in = X_train.shape[-1]

model = Sequential()
model.add(Dense(64, input_dim=n_in, activation='sigmoid'))
model.add(Dense(64, activation='sigmoid'))
model.add(Dense(64, activation='sigmoid'))
model.add(Dense(2, activation='softmax'))
model.compile(
    loss='categorical_crossentropy', 
    optimizer='adam',
    metrics=['accuracy', f1, recall])
model.fit(
    X_train_smote, Y_train_smote,
    epochs=3,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test)
)
print()
A = model.predict(X_test)#*np.array([1,1])
tn, fp, fn, tp = confusion_matrix(y_test, A.argmax(axis=1)).ravel()
print('f1:',f1_score(y_test, A.argmax(axis=1)))
print('recall:',recall_score(y_test, A.argmax(axis=1)))
confusion_matrix(y_test, A.argmax(axis=1)),tn, fp, fn, tp

Train on 13812 samples, validate on 3000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

f1: 0.315789473684
recall: 0.409090909091


(array([[2904,   52],
        [  26,   18]]), 2904, 52, 26, 18)

In [99]:
# modeling & evaluation
import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.models import Sequential
from keras.optimizers import Adam, SGD
from sklearn import datasets, preprocessing
import numpy as np

n_in = X_train.shape[-1]

model = Sequential()
model.add(Dense(64, input_dim=n_in, activation='sigmoid'))
model.add(Dense(64, activation='sigmoid'))
model.add(Dense(64, activation='sigmoid'))
model.add(Dense(64, activation='sigmoid'))
model.add(Dense(2, activation='softmax'))
model.compile(
    loss='categorical_crossentropy', 
    optimizer='adam',
    metrics=['accuracy', f1, recall])
model.fit(
    X_train_smote, Y_train_smote,
    epochs=3,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test)
)
print()
A = model.predict(X_test)#*np.array([1,1])
tn, fp, fn, tp = confusion_matrix(y_test, A.argmax(axis=1)).ravel()
print('f1:',f1_score(y_test, A.argmax(axis=1)))
print('recall:',recall_score(y_test, A.argmax(axis=1)))
confusion_matrix(y_test, A.argmax(axis=1)),tn, fp, fn, tp

Train on 13812 samples, validate on 3000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

f1: 0.338709677419
recall: 0.477272727273


(array([[2897,   59],
        [  23,   21]]), 2897, 59, 23, 21)

In [118]:
# modeling & evaluation
import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.layers import Input, concatenate

from keras.models import Sequential, Model
from keras.optimizers import Adam, SGD
from sklearn import datasets, preprocessing
import numpy as np

n_in = X_train.shape[-1]

inputs = Input(shape=(n_in,))
x = Dense(64, input_dim=n_in, activation='sigmoid')(inputs)
x = Dense(64, activation='sigmoid')(concatenate([x,inputs]))
x = Dense(2, activation='sigmoid')(x)

model = Model(inputs=inputs, outputs=x)
model.compile(
    loss='categorical_crossentropy', 
    optimizer='adam',
    metrics=['accuracy', f1, recall])
model.fit(
    X_train_smote, Y_train_smote,
    epochs=3,
    batch_size=16,
    shuffle=True,
    validation_data=(X_test, Y_test)
)
print()
A = model.predict(X_test)#*np.array([1,1])
tn, fp, fn, tp = confusion_matrix(y_test, A.argmax(axis=1)).ravel()
print('f1:',f1_score(y_test, A.argmax(axis=1)))
print('recall:',recall_score(y_test, A.argmax(axis=1)))
confusion_matrix(y_test, A.argmax(axis=1)),tn, fp, fn, tp

Train on 13812 samples, validate on 3000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

f1: 0.321167883212
recall: 0.5


(array([[2885,   71],
        [  22,   22]]), 2885, 71, 22, 22)

In [120]:
## modeling & evaluation
import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.layers import Input, concatenate

from keras.models import Sequential, Model
from keras.optimizers import Adam, SGD
from sklearn import datasets, preprocessing
import numpy as np

n_in = X_train.shape[-1]

inputs = Input(shape=(n_in,))
sig = Dense(64, input_dim=n_in, activation='sigmoid')(inputs)
relu = Dense(64, input_dim=n_in, activation='relu')(inputs)
elu = Dense(64, input_dim=n_in, activation='elu')(inputs)
x = Dense(64, activation='sigmoid')(concatenate([sig,relu,elu]))
x = Dense(2, activation='sigmoid')(x)

model = Model(inputs=inputs, outputs=x)
model.compile(
    loss='categorical_crossentropy', 
    optimizer='adam',
    metrics=['accuracy', f1, recall])
model.fit(
    X_train_smote, Y_train_smote,
    epochs=3,
    batch_size=16,
    shuffle=True,
    validation_data=(X_test, Y_test)
)
print()
A = model.predict(X_test)#*np.array([1,1])
tn, fp, fn, tp = confusion_matrix(y_test, A.argmax(axis=1)).ravel()
print('f1:',f1_score(y_test, A.argmax(axis=1)))
print('recall:',recall_score(y_test, A.argmax(axis=1)))
confusion_matrix(y_test, A.argmax(axis=1)),tn, fp, fn, tp

Train on 13812 samples, validate on 3000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

f1: 0.293103448276
recall: 0.386363636364


(array([[2901,   55],
        [  27,   17]]), 2901, 55, 27, 17)

In [121]:
model.fit(
    X_train_smote, Y_train_smote,
    epochs=3,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test)
)
print()
A = model.predict(X_test)#*np.array([1,1])
tn, fp, fn, tp = confusion_matrix(y_test, A.argmax(axis=1)).ravel()
print('f1:',f1_score(y_test, A.argmax(axis=1)))
print('recall:',recall_score(y_test, A.argmax(axis=1)))
confusion_matrix(y_test, A.argmax(axis=1)),tn, fp, fn, tp

Train on 13812 samples, validate on 3000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

f1: 0.350877192982
recall: 0.454545454545


(array([[2906,   50],
        [  24,   20]]), 2906, 50, 24, 20)

In [122]:
model.fit(
    X_train_smote, Y_train_smote,
    epochs=3,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test)
)
print()
A = model.predict(X_test)#*np.array([1,1])
tn, fp, fn, tp = confusion_matrix(y_test, A.argmax(axis=1)).ravel()
print('f1:',f1_score(y_test, A.argmax(axis=1)))
print('recall:',recall_score(y_test, A.argmax(axis=1)))
confusion_matrix(y_test, A.argmax(axis=1)),tn, fp, fn, tp

Train on 13812 samples, validate on 3000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

f1: 0.323232323232
recall: 0.363636363636


(array([[2917,   39],
        [  28,   16]]), 2917, 39, 28, 16)

In [92]:
# modeling & evaluation
import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.models import Sequential
from keras.optimizers import Adam, SGD
from sklearn import datasets, preprocessing
import numpy as np

n_in = X_train.shape[-1]

model = Sequential()
model.add(Dense(64, input_dim=n_in, activation='sigmoid'))
model.add(Dense(64, activation='sigmoid'))
model.add(Dense(2, activation='softmax'))
model.compile(
    loss='categorical_crossentropy', 
    optimizer='adam',
    metrics=['accuracy', f1, recall])
model.fit(
    X_train_smote, Y_train_smote,
    epochs=3,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test)
)
print()
A = model.predict(X_test)#*np.array([1,1])
tn, fp, fn, tp = confusion_matrix(y_test, A.argmax(axis=1)).ravel()
print('f1:',f1_score(y_test, A.argmax(axis=1)))
print('recall:',recall_score(y_test, A.argmax(axis=1)))
confusion_matrix(y_test, A.argmax(axis=1)),tn, fp, fn, tp

Train on 13812 samples, validate on 3000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

f1: 0.309090909091
recall: 0.386363636364


(array([[2907,   49],
        [  27,   17]]), 2907, 49, 27, 17)

In [94]:
model.fit(
    X_train_smote, Y_train_smote,
    epochs=3,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test)
)
print()
A = model.predict(X_test)#*np.array([1,1])
tn, fp, fn, tp = confusion_matrix(y_test, A.argmax(axis=1)).ravel()
print('f1:',f1_score(y_test, A.argmax(axis=1)))
print('recall:',recall_score(y_test, A.argmax(axis=1)))
confusion_matrix(y_test, A.argmax(axis=1)),tn, fp, fn, tp

Train on 13812 samples, validate on 3000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

f1: 0.291262135922
recall: 0.340909090909


(array([[2912,   44],
        [  29,   15]]), 2912, 44, 29, 15)

In [34]:
# modeling & evaluation
import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.models import Sequential
from keras.optimizers import Adam, SGD
from sklearn import datasets, preprocessing
import numpy as np

n_in = X_train.shape[-1]

model = Sequential()
model.add(Dense(100, input_dim=n_in, activation='sigmoid'))
model.add(Dropout(0.1))
model.add(Dense(100, input_dim=n_in))
model.add(Dropout(0.1))
model.add(Dense(2, activation='softmax'))
model.compile(
    loss='categorical_crossentropy', 
    optimizer=SGD(lr=0.01),
    metrics=['accuracy', f1, recall])
model.fit(
    X_train_smote, Y_train_smote,
    epochs=3,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test)
)
print()
A = model.predict(X_test)#*np.array([1,1])
tn, fp, fn, tp = confusion_matrix(y_test, A.argmax(axis=1)).ravel()
print('f1:',f1_score(y_test, A.argmax(axis=1)))
print('recall:',recall_score(y_test, A.argmax(axis=1)))
confusion_matrix(y_test, A.argmax(axis=1)),tn, fp, fn, tp

Train on 13812 samples, validate on 3000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

f1: 0.225
recall: 0.613636363636


(array([[2787,  169],
        [  17,   27]]), 2787, 169, 17, 27)

In [28]:
# modeling & evaluation
import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.models import Sequential
from keras.optimizers import Adam, SGD
from sklearn import datasets, preprocessing
import numpy as np

n_in = X_train.shape[-1]

model = Sequential()
model.add(Dense(200, input_dim=n_in, activation='sigmoid'))
model.add(Dense(2, activation='softmax'))
model.compile(
    loss='categorical_crossentropy', 
    optimizer=SGD(lr=0.01),
    metrics=['accuracy', f1, recall])
model.fit(
    X_train_smote, Y_train_smote,
    epochs=3,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test)
)
print()
A = model.predict(X_test)#*np.array([1,1])
tn, fp, fn, tp = confusion_matrix(y_test, A.argmax(axis=1)).ravel()
print('f1:',f1_score(y_test, A.argmax(axis=1)))
print('recall:',recall_score(y_test, A.argmax(axis=1)))
confusion_matrix(y_test, A.argmax(axis=1)),tn, fp, fn, tp

Train on 13812 samples, validate on 3000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

f1: 0.219047619048
recall: 0.522727272727


(array([[2813,  143],
        [  21,   23]]), 2813, 143, 21, 23)

In [29]:
# modeling & evaluation
import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.models import Sequential
from keras.optimizers import Adam, SGD
from sklearn import datasets, preprocessing
import numpy as np

n_in = X_train.shape[-1]

model = Sequential()
model.add(Dense(200, input_dim=n_in, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(
    loss='categorical_crossentropy', 
    optimizer=SGD(lr=0.01),
    metrics=['accuracy', f1, recall])
model.fit(
    X_train_smote, Y_train_smote,
    epochs=3,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test)
)
print()
A = model.predict(X_test)#*np.array([1,1])
tn, fp, fn, tp = confusion_matrix(y_test, A.argmax(axis=1)).ravel()
print('f1:',f1_score(y_test, A.argmax(axis=1)))
print('recall:',recall_score(y_test, A.argmax(axis=1)))
confusion_matrix(y_test, A.argmax(axis=1)),tn, fp, fn, tp

Train on 13812 samples, validate on 3000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3

f1: 0.165374677003
recall: 0.727272727273


(array([[2645,  311],
        [  12,   32]]), 2645, 311, 12, 32)

In [42]:
# modeling & evaluation
import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.models import Sequential
from keras.optimizers import Adam, SGD
from sklearn import datasets, preprocessing
import numpy as np

n_in = X_train.shape[-1]

model = Sequential()
model.add(Dense(1200, input_dim=n_in, activation='relu'))
model.add(Dense(200, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(
    loss='binary_crossentropy', 
    optimizer='adam',#SGD(lr=0.01),
    metrics=['accuracy', f1, recall])

In [43]:
model.fit(
    X_train_over, Y_train_over,
    epochs=1,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test)
)

Train on 13812 samples, validate on 3000 samples
Epoch 1/1


<keras.callbacks.History at 0x7f9e9c7f8c50>

In [44]:
f1_score(y_test, np.argmax(model.predict(X_test),axis=1))

0.20666666666666669

In [47]:
recall_score(y_test, np.argmax(model.predict(X_test),axis=1))

0.70454545454545459

In [45]:
confusion_matrix(y_test, np.argmax(model.predict(X_test),axis=1))

array([[2731,  225],
       [  13,   31]])

In [56]:
# modeling & evaluation
import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.models import Sequential
from keras.optimizers import Adam, SGD
from sklearn import datasets, preprocessing
import numpy as np

n_in = X_train.shape[-1]

model = Sequential()
model.add(Dense(1000, input_dim=n_in, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(
    loss='categorical_crossentropy', 
    optimizer='adam',#SGD(lr=0.01),
    metrics=['accuracy', f1, recall])

In [57]:
model.fit(
    X_train_smote, Y_train_smote,
    epochs=1,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test)
)

Train on 13812 samples, validate on 3000 samples
Epoch 1/1


<keras.callbacks.History at 0x7f9e78af8e48>

In [59]:
confusion_matrix(y_test, np.argmax(model.predict(X_test),axis=1))

array([[2722,  234],
       [  13,   31]])

In [86]:
# modeling & evaluation
import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.models import Sequential
from keras.optimizers import Adam, SGD
from sklearn import datasets, preprocessing
import numpy as np

n_in = X_train.shape[-1]

model = Sequential()
model.add(Dense(1024, input_dim=n_in, activation='relu'))
model.add(Dense(256, input_dim=n_in, activation='relu'))
model.add(Dense(64, input_dim=n_in, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(
    loss='categorical_crossentropy', 
    optimizer='adam',#SGD(lr=0.01),
    metrics=['accuracy', f1, recall])

In [87]:
model.fit(
    X_train_smote, Y_train_smote,
    epochs=1,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test)
)

Train on 13812 samples, validate on 3000 samples
Epoch 1/1


<keras.callbacks.History at 0x7f9e4f5e1a90>

In [88]:
A = model.predict(X_test)#*np.array([1,1])
tn, fp, fn, tp = confusion_matrix(y_test, A.argmax(axis=1)).ravel()
confusion_matrix(y_test, A.argmax(axis=1)),tn, fp, fn, tp

(array([[2707,  249],
        [   7,   37]]), 2707, 249, 7, 37)

In [43]:

tp = 37
fn = 7
fp = 249
tn = 2707
r= tp/(tp+fn)
p = tp/(tp+fp)
f = 2*r*p/(r+p)
f, r

(0.22424242424242424, 0.8409090909090909)

In [90]:
A = model.predict(X_test)*np.array([1,1000])
recall_score(y_test, A.argmax(axis=1))

0.84090909090909094

In [95]:
# modeling & evaluation
import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.models import Sequential
from keras.optimizers import Adam, SGD
from sklearn import datasets, preprocessing
import numpy as np

n_in = X_train.shape[-1]

model = Sequential()
model.add(Dense(1024, input_dim=n_in, activation='elu'))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax'))
model.compile(
    loss='binary_crossentropy', 
    optimizer='adam',#SGD(lr=0.01),
    metrics=['accuracy', f1, recall])
model.fit(
    X_train_smote, Y_train_smote,
    epochs=1,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test)
)

Train on 13812 samples, validate on 3000 samples
Epoch 1/1


<keras.callbacks.History at 0x7f9e4c0c6f28>

In [96]:
A = model.predict(X_test)#*np.array([1,1])
tn, fp, fn, tp = confusion_matrix(y_test, A.argmax(axis=1)).ravel()
confusion_matrix(y_test, A.argmax(axis=1)),tn, fp, fn, tp

(array([[2588,  368],
        [   5,   39]]), 2588, 368, 5, 39)

In [98]:
f1_score(y_test, A.argmax(axis=1))

0.17294900221729492

In [97]:
recall_score(y_test, A.argmax(axis=1))

0.88636363636363635

In [168]:
# modeling & evaluation
import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.models import Sequential
from keras.optimizers import Adam, SGD
from sklearn import datasets, preprocessing
import numpy as np

n_in = X_train.shape[-1]

model = Sequential()
model.add(Dense(1024, input_dim=n_in, activation='elu'))
model.add(Dropout(0.2))
#model.add(Dense(64, input_dim=n_in, activation='elu'))
#model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax'))
model.compile(
    loss='binary_crossentropy', 
    optimizer='adam',#SGD(lr=0.01),
    metrics=['accuracy', f1, recall])
model.fit(
    X_train_smote, Y_train_smote,
    epochs=1,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test)
)

Train on 13812 samples, validate on 3000 samples
Epoch 1/1


<keras.callbacks.History at 0x7f9bde287f60>

In [115]:
A = model.predict(X_test)#*np.array([1,1])
tn, fp, fn, tp = confusion_matrix(y_test, A.argmax(axis=1)).ravel()
print('f1:',f1_score(y_test, A.argmax(axis=1)))
print('recall:',recall_score(y_test, A.argmax(axis=1)))
confusion_matrix(y_test, A.argmax(axis=1)),tn, fp, fn, tp

f1: 0.195718654434
recall: 0.727272727273


(array([[2705,  251],
        [  12,   32]]), 2705, 251, 12, 32)

In [138]:
# modeling & evaluation
import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.models import Sequential
from keras.optimizers import Adam, SGD
from sklearn import datasets, preprocessing
import numpy as np

n_in = X_train.shape[-1]

model = Sequential()
model.add(Dense(1024, input_dim=n_in, activation='elu'))
#model.add(Dropout(0.2))
#model.add(Dense(512, input_dim=n_in, activation='elu'))
#model.add(Dropout(0.2))
model.add(Dense(256, input_dim=n_in, activation='elu'))
#model.add(Dropout(0.2))
model.add(Dense(64, input_dim=n_in, activation='elu'))
#model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax'))
model.compile(
    loss='binary_crossentropy', 
    optimizer='adam',#SGD(lr=0.01),
    metrics=['accuracy', f1, recall])
model.fit(
    X_train_smote, Y_train_smote,
    epochs=1,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test)
)

Train on 13812 samples, validate on 3000 samples
Epoch 1/1


<keras.callbacks.History at 0x7f9c97437828>

In [139]:
A = model.predict(X_test)#*np.array([1,1])
tn, fp, fn, tp = confusion_matrix(y_test, A.argmax(axis=1)).ravel()
print('f1:',f1_score(y_test, A.argmax(axis=1)))
print('recall:',recall_score(y_test, A.argmax(axis=1)))
confusion_matrix(y_test, A.argmax(axis=1)),tn, fp, fn, tp

f1: 0.190709046455
recall: 0.886363636364


(array([[2630,  326],
        [   5,   39]]), 2630, 326, 5, 39)

In [41]:

tp = 39
fn = 5
fp = 326
r= tp/(tp+fn)
p = tp/(tp+fp)
f = 2*r*p/(r+p)
f, r

(0.19070904645476772, 0.8863636363636364)

In [154]:
# modeling & evaluation
import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.models import Sequential
from keras.optimizers import Adam, SGD
from sklearn import datasets, preprocessing
import numpy as np

n_in = X_train.shape[-1]

model = Sequential()
model.add(Dense(1024, input_dim=n_in, activation='elu'))
model.add(Dense(256, input_dim=n_in, activation='elu'))
model.add(Dense(64, input_dim=n_in, activation='elu'))
model.add(Dense(2, activation='softmax'))
model.compile(
    loss='binary_crossentropy', 
    optimizer='adam',#SGD(lr=0.01),
    metrics=['accuracy', f1, recall])
model.fit(
    X_train_over, Y_train_over,
    epochs=1,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test)
)

Train on 13812 samples, validate on 3000 samples
Epoch 1/1


<keras.callbacks.History at 0x7f9c2d743b70>

In [155]:
A = model.predict(X_test)#*np.array([1,1])
tn, fp, fn, tp = confusion_matrix(y_test, A.argmax(axis=1)).ravel()
print('f1:',f1_score(y_test, A.argmax(axis=1)))
print('recall:',recall_score(y_test, A.argmax(axis=1)))
confusion_matrix(y_test, A.argmax(axis=1)),tn, fp, fn, tp

f1: 0.107438016529
recall: 0.886363636364


(array([[2313,  643],
        [   5,   39]]), 2313, 643, 5, 39)

In [171]:
# modeling & evaluation
import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.models import Sequential
from keras.optimizers import Adam, SGD
from sklearn import datasets, preprocessing
import numpy as np

n_in = X_train.shape[-1]

model = Sequential()
model.add(Dense(1024, input_dim=n_in, activation='elu',kernel_initializer='uniform'))
model.add(Dense(256, activation='elu', kernel_initializer='uniform'))
model.add(Dense(64, activation='elu', kernel_initializer='uniform'))
model.add(Dense(2, activation='softmax'))
model.compile(
    loss='binary_crossentropy', 
    optimizer='adam',
    metrics=['accuracy', f1, recall])
model.fit(
    X_train_over, Y_train_over,
    epochs=1,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test)
)

Train on 13812 samples, validate on 3000 samples
Epoch 1/1


<keras.callbacks.History at 0x7f9bde0a5940>

In [173]:
A = model.predict(X_test)#*np.array([1,1])
tn, fp, fn, tp = confusion_matrix(y_test, A.argmax(axis=1)).ravel()
print('f1:',f1_score(y_test, A.argmax(axis=1)))
print('f0.5:',fbeta_score(y_test, A.argmax(axis=1), average='binary', beta=0.5))
print('f0.5:',fbeta_score(y_test, A.argmax(axis=1), average='weighted', beta=0.5))
print('recall:',recall_score(y_test, A.argmax(axis=1)))
confusion_matrix(y_test, A.argmax(axis=1)),tn, fp, fn, tp

f1: 0.142322097378
f0.5: 0.0948103792415
f0.5: 0.950634422138
recall: 0.863636363636


(array([[2504,  452],
        [   6,   38]]), 2504, 452, 6, 38)

In [19]:
# modeling & evaluation
import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.models import Sequential
from keras.optimizers import Adam, SGD
from sklearn import datasets, preprocessing
import numpy as np

n_in = X_train.shape[-1]

model = Sequential()
model.add(Dense(1024, input_dim=n_in, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(256, input_dim=n_in, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, input_dim=n_in, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax'))
model.compile(
    loss='categorical_crossentropy', 
    optimizer='adam',#SGD(lr=0.01),
    metrics=['accuracy', f1, recall])
model.fit(
    X_train_over, Y_train_over,
    epochs=1,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test))

Train on 13812 samples, validate on 3000 samples
Epoch 1/1


<keras.callbacks.History at 0x7f4fb1530eb8>

In [28]:
y_true = [0, 1, 0, 0, 1, 0]
y_pred = [0, 1, 1, 0, 0, 1]
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
confusion_matrix(y_true, y_pred), tn, fp, fn, tp

(array([[2, 2],
        [1, 1]]), 2, 2, 1, 1)

In [30]:
r = tp/(tp+fn)
p = tp/(tp+fp)
f05 = 1.25*p*r/(0.25*p+r)
f05

0.3571428571428571

In [26]:
fbeta_score(y_true, y_pred, 0.5)

0.3571428571428571

In [27]:
f1_score(y_test, A.argmax(axis=1), 0.5)

0.15903614457831325

In [31]:
A = model.predict(X_test)#*np.array([1,1])
tn, fp, fn, tp = confusion_matrix(y_test, A.argmax(axis=1)).ravel()
print('f1:',f1_score(y_test, A.argmax(axis=1)))
print('f0.5:',fbeta_score(y_test, A.argmax(axis=1), average='binary', beta=0.5))
print('f0.5:',fbeta_score(y_test, A.argmax(axis=1), average='weighted', beta=0.5))
print('recall:',recall_score(y_test, A.argmax(axis=1)))
confusion_matrix(y_test, A.argmax(axis=1)),tn, fp, fn, tp

f1: 0.159036144578
f0.5: 0.107984293194
f0.5: 0.95897787098
recall: 0.75


(array([[2618,  338],
        [  11,   33]]), 2618, 338, 11, 33)

In [37]:
print('f1:',f1_score(y_test, A.argmax(axis=1), average='weighted'))

f1: 0.92609355668


In [51]:
# modeling & evaluation
import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.models import Sequential
from keras.optimizers import Adam, SGD
from sklearn import datasets, preprocessing
import numpy as np

n_in = X_train.shape[-1]

model = Sequential()
model.add(Dense(1200, input_dim=n_in, activation='sigmoid'))
model.add(Dense(2, activation='softmax'))
model.compile(
    loss='binary_crossentropy', 
    optimizer='adam',#SGD(lr=0.01),
    metrics=['accuracy'])#, f1, recall])
model.fit(
    X_train_over, Y_train_over,
    epochs=1,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test))

Train on 13812 samples, validate on 3000 samples
Epoch 1/1


<keras.callbacks.History at 0x7f4f83225cf8>

In [52]:
A = model.predict(X_test)#*np.array([1,1])
tn, fp, fn, tp = confusion_matrix(y_test, A.argmax(axis=1)).ravel()
print('f1:',f1_score(y_test, A.argmax(axis=1)))
print('recall:',recall_score(y_test, A.argmax(axis=1)))
confusion_matrix(y_test, A.argmax(axis=1)),tn, fp, fn, tp

f1: 0.307692307692
recall: 0.318181818182


(array([[2923,   33],
        [  30,   14]]), 2923, 33, 30, 14)

In [60]:
# modeling & evaluation
import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.models import Sequential
from keras.optimizers import Adam, SGD
from sklearn import datasets, preprocessing
import numpy as np

n_in = X_train.shape[-1]

model = Sequential()
model.add(Dense(1024, input_dim=n_in, activation='elu'))
model.add(Dropout(0.2))
model.add(Dense(256, input_dim=n_in, activation='elu'))
model.add(Dropout(0.2))
model.add(Dense(64, input_dim=n_in, activation='elu'))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax'))
model.compile(
    loss='categorical_crossentropy', 
    optimizer='adam',
    metrics=['accuracy'])#, f1, recall])
model.fit(
    X_train_over, Y_train_over,
    epochs=1,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test))

Train on 13812 samples, validate on 3000 samples
Epoch 1/1


<keras.callbacks.History at 0x7f4ed61eefd0>

In [61]:
A = model.predict(X_test)#*np.array([1,1])
tn, fp, fn, tp = confusion_matrix(y_test, A.argmax(axis=1)).ravel()
print('f1:',f1_score(y_test, A.argmax(axis=1)))
print('recall:',recall_score(y_test, A.argmax(axis=1)))
confusion_matrix(y_test, A.argmax(axis=1)),tn, fp, fn, tp

f1: 0.152866242038
recall: 0.818181818182


(array([[2565,  391],
        [   8,   36]]), 2565, 391, 8, 36)

In [128]:
#171101
model.fit(
    X_train_smote, Y_train_smote,
    epochs=3,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, Y_test)
)

Train on 13812 samples, validate on 3000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f608c3f10f0>

In [122]:
f1_score(y_test, np.argmax(model.predict(X_test),axis=1))

0.71042471042471045

In [136]:
recall_score(y_test, np.argmax(model.predict(X_test),axis=1))

0.45454545454545453

In [135]:
confusion_matrix(y_test, np.argmax(model.predict(X_test),axis=1))

array([[2837,  119],
       [  24,   20]])

### グリッドサーチする

In [68]:
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.3, random_state=666)

# resampling
rus = RandomUnderSampler(random_state=0)
ros = RandomOverSampler(random_state=0)
smt = SMOTE(random_state=0)
X_train_under, y_train_under = rus.fit_sample(X_train, y_train)
X_train_over, y_train_over = ros.fit_sample(X_train, y_train)
X_train_smote, y_train_smote = smt.fit_sample(X_train, y_train)

In [80]:
def mlp(activation="relu", learning_rate=0.001,
        h1=128, h2=128):
    model = Sequential()
    model.add(Dense(h1, input_dim=n_in))
    model.add(Activation(activation))
    model.add(Dense(h2))
    model.add(Activation(activation))
    model.add(Dense(2))
    model.add(Activation('sigmoid'))
    model.compile(loss='binary_crossentropy',
              optimizer=SGD(lr=learning_rate),
              metrics=['accuracy',])
    return model

In [81]:
param_grid = {
    "activation": ["relu", "tanh", "sigmoid", 'elu'],
    "learning_rate": [0.0001, 0.001, 0.01],
    "h1": [256, 1024, 2048],
    "h2": [64, 128, 256],
    #"h3": [8, 32, 64],
}

In [82]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

model = KerasClassifier(build_fn=mlp, nb_epoch=50, batch_size=20, verbose=0)
clf = GridSearchCV(estimator=model, param_grid=param_grid, cv=4, scoring='f1')
res = clf.fit(X_train_over, y_train_over)

ValueError: Error when checking target: expected activation_15 to have shape (None, 2) but got array with shape (10359, 1)

In [None]:
doc = co.med.find()

In [None]:
doc[3]