NN 调参

In [None]:
import os
from google.colab import drive
drive.mount('/content/gdrive')
 
path = "/content/gdrive/My Drive/Independent"
os.chdir(path)

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
import numpy
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from scipy import sparse

def save(users_posts, users_label):
    sparse.save_npz("./result/posts.npz", users_posts, False)

    file = open("./result/label.txt", "w")
    for user_label in users_label:
        file.write(str(user_label) + "\n")
    file.close()

def read_list(path):
    result = []
    with open(path, "r") as f:
        for line in f:
          temp = line.strip("\n")
          result.append(int(temp))
    return result

def read_npz(path):
    sparse_matrix = sparse.load_npz(path)
    return sparse_matrix

In [None]:
# batch_size和epochs
def create_model():
  model = Sequential()
  model.add(Dense(d, input_dim=d, activation='relu', kernel_initializer='random_normal'))
  model.add(Dropout(0.1))
  model.add(Dense(64, activation='relu', kernel_initializer='random_normal'))
  model.add(Dense(16, activation='relu', kernel_initializer='random_normal'))
  model.add(Dense(1, activation='sigmoid', kernel_initializer='random_normal'))
  model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
  return model

# load dataset
X= read_npz("./result/v_count_posts.npz")
y= read_list("./result/v_label.txt")

scaler = StandardScaler()
X = X.toarray()
X = X[:, 0:4000]
X = scaler.fit_transform(X)
n, d= X.shape
d = 4000

# create model
model = KerasClassifier(build_fn=create_model, verbose=0)

# define the grid search parameters
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(X, y)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))



Best: 0.872000 using {'batch_size': 40, 'epochs': 10}
0.867000 (0.044565) with: {'batch_size': 10, 'epochs': 10}
0.842000 (0.042849) with: {'batch_size': 10, 'epochs': 50}
0.863000 (0.020149) with: {'batch_size': 10, 'epochs': 100}
0.859000 (0.056868) with: {'batch_size': 20, 'epochs': 10}
0.820000 (0.062048) with: {'batch_size': 20, 'epochs': 50}
0.870000 (0.022136) with: {'batch_size': 20, 'epochs': 100}
0.872000 (0.040447) with: {'batch_size': 40, 'epochs': 10}
0.818000 (0.037630) with: {'batch_size': 40, 'epochs': 50}
0.829000 (0.053423) with: {'batch_size': 40, 'epochs': 100}
0.798000 (0.041785) with: {'batch_size': 60, 'epochs': 10}
0.807000 (0.050259) with: {'batch_size': 60, 'epochs': 50}
0.779000 (0.073512) with: {'batch_size': 60, 'epochs': 100}
0.749000 (0.038131) with: {'batch_size': 80, 'epochs': 10}
0.766000 (0.054351) with: {'batch_size': 80, 'epochs': 50}
0.757000 (0.039573) with: {'batch_size': 80, 'epochs': 100}
0.739000 (0.041158) with: {'batch_size': 100, 'epochs': 

In [None]:
def create_model(optimizer='adam'):
  model = Sequential()
  model.add(Dense(d, input_dim=d, activation='relu', kernel_initializer='random_normal'))
  model.add(Dropout(0.1))
  model.add(Dense(64, activation='relu', kernel_initializer='random_normal'))
  model.add(Dense(16, activation='relu', kernel_initializer='random_normal'))
  model.add(Dense(1, activation='sigmoid', kernel_initializer='random_normal'))
  model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
  return model

# create model
model = KerasClassifier(build_fn=create_model, epochs=10, batch_size=40, verbose=0)

# define the grid search parameters
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
param_grid = dict(optimizer=optimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(X, y)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.841000 using {'optimizer': 'RMSprop'}
0.789000 (0.077421) with: {'optimizer': 'SGD'}
0.841000 (0.045431) with: {'optimizer': 'RMSprop'}
0.776000 (0.035693) with: {'optimizer': 'Adagrad'}
0.812000 (0.050754) with: {'optimizer': 'Adadelta'}
0.790000 (0.076354) with: {'optimizer': 'Adam'}
0.811000 (0.034986) with: {'optimizer': 'Adamax'}
0.814000 (0.037336) with: {'optimizer': 'Nadam'}


In [None]:
def create_model(activation='relu'):
  model = Sequential()
  model.add(Dense(d, input_dim=d, activation='relu', kernel_initializer='random_normal'))
  model.add(Dropout(0.1))
  model.add(Dense(64, activation='relu', kernel_initializer='random_normal'))
  model.add(Dense(16, activation='relu', kernel_initializer='random_normal'))
  model.add(Dense(1, activation='sigmoid', kernel_initializer='random_normal'))
  model.compile(loss='binary_crossentropy', optimizer='RMSprop', metrics=['accuracy'])
  return model

# create model
model = KerasClassifier(build_fn=create_model, epochs=10, batch_size=40, verbose=0)

# define the grid search parameters
activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
param_grid = dict(activation=activation)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(X, y)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))



Best: 0.890000 using {'activation': 'relu'}
0.875000 (0.038079) with: {'activation': 'softmax'}
0.863000 (0.032031) with: {'activation': 'softplus'}
0.863000 (0.027677) with: {'activation': 'softsign'}
0.890000 (0.013038) with: {'activation': 'relu'}
0.875000 (0.009487) with: {'activation': 'tanh'}
0.889000 (0.016248) with: {'activation': 'sigmoid'}
0.845000 (0.016733) with: {'activation': 'hard_sigmoid'}
0.889000 (0.012806) with: {'activation': 'linear'}
