In [0]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline


Using TensorFlow backend.


In [0]:
seed = 7
np.random.seed(seed)


In [0]:
# loading the dataset already downloaed in route folder

df = pd.read_csv("sonar.csv", header=None)

# limit to categorical data using df.select_dtypes()
y=df.select_dtypes(include=[object])
x=df.select_dtypes(include=[np.number])


In [0]:

le = LabelEncoder()


# 2/3. FIT AND TRANSFORM
# use df.apply() to apply le.fit_transform to all columns
y = y.apply(le.fit_transform)


In [0]:
# baseline model
def create_baseline():
    
	# create model, write code below
    model=Sequential()
    model.add(Dense(60,activation='relu',input_shape=(60,)))
    model.add(Dense(1,activation='sigmoid'))
    
    
	# Compile model, write code below
    model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
	
    return model


In [0]:
# evaluate model

estimator = KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=0)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, x, y, cv=kfold)
print("Results: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Results: 83.71% (6.13%)


In [0]:
# evaluate baseline model with standardized dataset

np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, x, y, cv=kfold)
print("Standardized: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))





Standardized: 85.59% (7.46%)


In [0]:
# smaller model
def create_smaller():
    # create model
    model=Sequential()
    #model.add(Dense(60,activation='relu',input_shape=(60,)))
    model.add(Dense(30,activation='relu',input_shape=(60,)))
    model.add(Dense(1,activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
    return model

#without data preparation
estimator=KerasClassifier(build_fn=create_smaller, epochs=100,batch_size=5,verbose=0)
kfold=StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results=cross_val_score(estimator,x,y,cv=kfold)
print("Smaller(without data prep): %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

#with data preparation
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_smaller, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, x, y, cv=kfold)
print("Smaller(with data prep): %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Smaller(without data prep): 81.21% (4.78%)
Smaller(with data prep): 85.59% (8.05%)


In [0]:
# larger model
def create_larger():
    # create model
    model=Sequential()
    model.add(Dense(60,activation='relu',input_shape=(60,)))
    model.add(Dense(30,activation='relu'))
    model.add(Dense(1,activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
    return model

#with data preparation
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_larger, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, x, y, cv=kfold)
print("larger(with data prep): %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


larger(with data prep): 83.18% (5.76%)


In [0]:
# overfitting model
def create_overitting_model():
    # create model
    model=Sequential()
    model.add(Dense(120,activation='relu',input_shape=(60,)))
    model.add(Dense(60,activation='relu'))
    model.add(Dense(60,activation='relu'))
    model.add(Dense(30,activation='relu'))
    model.add(Dense(1,activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
    return model

#with data preparation
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_overitting_model, epochs=200, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, x, y, cv=kfold)
print("larger(with data prep): %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
larger(with data prep): 87.92% (5.45%)


In [0]:
# tuned model
def tuned_model():
    # create model
    model=Sequential()
    model.add(Dense(60,activation='relu',input_shape=(60,)))
    model.add(Dense(30,activation='relu'))
    model.add(Dense(15,activation='relu'))
    model.add(Dense(1,activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
    return model

#with data preparation
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=tuned_model, epochs=80, batch_size=15, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, x, y, cv=kfold)
print("larger(with data prep): %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


larger(with data prep): 87.04% (6.67%)


In [0]:
#functional api

from keras import Model
from keras import Input

def func_model():
  inputs=Input(shape=(60,))
  x=Dense(60,activation='relu')(inputs)
  x=Dense(30,activation='relu')(x)
  x=Dense(15,activation='relu')(x)
  x=Dense(1,activation='sigmoid')(x)
  
  model=Model(inputs=inputs,outputs=x)
  model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
  
  return model

#with data preparation
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=func_model, epochs=80, batch_size=15, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, x, y, cv=kfold)
print("larger(with data prep): %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


larger(with data prep): 86.11% (7.85%)


In [0]:
import tensorflow as tf

class c_model(Model):
  
  def __init__(self):
    super(c_model,self).__init__()
    self.dense1= Dense(60,activation='relu')
    self.dense2= Dense(30,activation='relu')
    self.dense3= Dense(15,activation='relu')
    self.dense4= Dense(1,activation='softmax')
    
  def call(self, inputs):
    x=self.dense1(inputs)
    x=self.dense2(x)
    x=self.dense3(x)
    x=self.dense4(x)
    return x
  
  def compute_output_shape(self, input_shape):
    # You need to override this function if you want to use the subclassed model
    # as part of a functional-style model.
    # Otherwise, this method is optional.
    shape = tf.TensorShape(input_shape).as_list()
    shape[-1] = 1
    return tf.TensorShape(shape)
  
def get_model():
  
  model = c_model()

  model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
  inputs=Input(shape=(60,))
  outputs= model(inputs)
  return Model(inputs,outputs)

print(dir(c_model()))
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=get_model, epochs=80, batch_size=15)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, x, y, cv=kfold)
print("model subclassing(with data prep): %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


['__call__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_add_inbound_node', '_base_init', '_built', '_check_trainable_weights_consistency', '_expects_training_arg', '_get_node_attribute_at_index', '_inbound_nodes', '_init_graph_network', '_init_subclassed_network', '_initial_weights', '_is_compiled', '_is_graph_network', '_layers', '_losses', '_make_predict_function', '_make_test_function', '_make_train_function', '_node_key', '_outbound_nodes', '_per_input_losses', '_per_input_updates', '_set_inputs', '_standardize_user_data', '_updated_config', '_updates', '_uses_dynamic_learning_phase', '_uses_inputs_arg', 'add_loss', 'add_update', 'add_weight', 'assert_i



AttributeError: ignored

In [0]:
k = 10
num_val_samples = len(x) // k    #floor division
num_epochs = 80
all_scores = []

df = df.sample(frac=1,random_state=seed).reset_index(drop=True)
shuffled_y=df.select_dtypes(include=[object])
shuffled_x=df.select_dtypes(include=[np.number])

shuffled_y = shuffled_y.apply(le.fit_transform)

for i in range(k):
    print('processing fold #', i)
    val_data = shuffled_x[i * num_val_samples: (i + 1) * num_val_samples]
    val_targets = shuffled_y[i * num_val_samples: (i + 1) * num_val_samples]
    partial_train_data = np.concatenate( [shuffled_x[:i * num_val_samples], shuffled_x[(i + 1) * num_val_samples:]], axis=0)
    partial_train_targets = np.concatenate([shuffled_y[:i * num_val_samples], shuffled_y[(i + 1) * num_val_samples:]], axis=0)
    model = func_model()
    model.fit(partial_train_data, partial_train_targets, epochs=num_epochs, batch_size=15, verbose=0)
    val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
    all_scores.append(val_mae)
    
print(all_scores)
print(np.mean(all_scores))

processing fold # 0
processing fold # 1
processing fold # 2
processing fold # 3
processing fold # 4
processing fold # 5
processing fold # 6
processing fold # 7
processing fold # 8
processing fold # 9
[0.8500000238418579, 0.800000011920929, 0.949999988079071, 0.800000011920929, 0.8999999761581421, 0.8500000238418579, 0.949999988079071, 0.8999999761581421, 0.75, 0.8500000238418579]
0.8600000023841858
