In [4]:
#import 分類用模型
from keras.models import Sequential
from keras.layers import Dense
from sklearn.datasets.samples_generator import make_blobs
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pandas as pd
import sys
import warnings
%matplotlib inline
import matplotlib.pyplot as plt
import time
if not sys.warnoptions:
    warnings.simplefilter("ignore")

# 測試

In [5]:
# 分類測試用的數據
# 生成一個二分類問題的數據集
# n_samples 總數量；n_features：X一組內的資料數量；centers：數據的中間值(Y會根據這個產生int
X, y = make_blobs(n_samples=100, n_features=2, centers=2, random_state=1)
# n_samples是待生成的樣本的總數、n_features是每個樣本的特徵數、 centers表示類別數
scalar = MinMaxScaler()  #標準化數據-1~1
scalar.fit(X)
X = scalar.transform(X)

In [7]:
# 定義並擬合模型

model = Sequential() #

# 第一層要設定 輸入的物件的要求，範例為2D的矩陣X
# Dense 全連接層，activation 激勵函數，4是指定輸出的空間維度(只能是正整數)
model.add(Dense(4, input_dim=2, activation='relu')) # relu 整流線性單元

model.add(Dense(4, activation='relu'))

model.add(Dense(1, activation='hard_sigmoid'))

# 配置學習過程 (損失函數、優化契)
model.compile(loss='binary_crossentropy', optimizer='adam')

# 訓練起來  (epochs = 網絡更新數
model.fit(X, y, epochs=500, verbose=0)

<keras.callbacks.History at 0x1eac3b92940>

In [8]:
Xnew, _ = make_blobs(n_samples=3, centers=2, n_features=2, random_state=1)
Xnew = scalar.transform(Xnew)
ynew = model.predict_classes(Xnew)

for i in range(len(Xnew)):
    print("X=%s, Predicted=%s" % (Xnew[i], ynew[i]))

X=[0.89337759 0.65864154], Predicted=[0]
X=[0.29097707 0.12978982], Predicted=[1]
X=[0.78082614 0.75391697], Predicted=[0]


In [None]:
# 保存模型

model.save(filepath)#将Keras模型和权重保存在一个HDF5
keras.models.load_model(filepath) # 匯入模型


## 在二分類問題下 預測可能分類為該類別的機率--

In [9]:
X, y = make_blobs(n_samples=100, centers=2, n_features=2, random_state=1)
scalar = MinMaxScaler()
scalar.fit(X)
X = scalar.transform(X)

model = Sequential()
model.add(Dense(4, input_dim=2, activation='relu'))
model.add(Dense(4, activation='relu'))
# sigmoid  這層作為輸出層，用於預測概率
model.add(Dense(1, activation='sigmoid'))
# 編譯層
model.compile(loss='binary_crossentropy', optimizer='adam')

model.fit(X, y, epochs=500, verbose=0)

# 新的未知數據
Xnew, _ = make_blobs(n_samples=3, centers=2, n_features=2, random_state=1)
Xnew = scalar.transform(Xnew)

# 做预测
ynew = model.predict_proba(Xnew)


for i in range(len(Xnew)):
    print("X=%s, Predicted=%s" % (Xnew[i], ynew[i]))

X=[0.89337759 0.65864154], Predicted=[0.00582727]
X=[0.29097707 0.12978982], Predicted=[0.99573165]
X=[0.78082614 0.75391697], Predicted=[0.00613513]


# 回歸預測

In [10]:
from keras.models import Sequential
from keras.layers import Dense
from sklearn.datasets import make_regression
from sklearn.preprocessing import MinMaxScaler

# 製作隨機數據
X, y = make_regression(n_samples=100, n_features=2, noise=0.1, random_state=1)

scalarX, scalarY = MinMaxScaler(), MinMaxScaler()
scalarX.fit(X)
scalarY.fit(y.reshape(100,1))
X = scalarX.transform(X)
y = scalarY.transform(y.reshape(100,1))

# 定義並擬合模型

model = Sequential()
model.add(Dense(4, input_dim=2, activation='relu'))
model.add(Dense(4, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(loss='mse', optimizer='adam')
model.fit(X, y, epochs=1000, verbose=0)

# 未知的新數據
Xnew, a = make_regression(n_samples=3, n_features=2, noise=0.1, random_state=1)

Xnew = scalarX.transform(Xnew)

# K-Fold 

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.cross_validation import StratifiedKFold
from sklearn.cross_validation import cross_val_score
import numpy
import pandas
# Function to create model, required for KerasClassifier
def create_model():
    # create model
    model = Sequential()
    model.add(Dense(12, input_dim=8, init='uniform', activation='relu')) 
    model.add(Dense(8, init='uniform', activation='relu')) 
    model.add(Dense(1, init='uniform', activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) 
    return model

# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load pima indians dataset
# dataset = numpy.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = KerasClassifier(build_fn=create_model, nb_epoch=150, batch_size=10)
# evaluate using 10-fold cross validation
kfold = StratifiedKFold(y=Y, n_folds=10, shuffle=True, random_state=seed)
results = cross_val_score(model, X, Y, cv=kfold)
print(results.mean())

### 找到最適合神經網絡的參數

In [None]:
# MLP for Pima Indians Dataset with grid search via sklearn
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.grid_search import GridSearchCV
import numpy
import pandas
# Function to create model, required for KerasClassifier
def create_model(optimizer='rmsprop', init='glorot_uniform'):
    # create model
    model = Sequential()
    model.add(Dense(12, input_dim=8, init=init, activation='relu')) 
    model.add(Dense(8, init=init, activation='relu')) 
    model.add(Dense(1, init=init, activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) 
    return model
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load pima indians dataset
dataset = numpy.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = KerasClassifier(build_fn=create_model)
# grid search epochs, batch size and optimizer
optimizers = ['rmsprop', 'adam']
init = ['glorot_uniform', 'normal', 'uniform']
epochs = numpy.array([50, 100, 150])
batches = numpy.array([5, 10, 20])

param_grid = dict(optimizer=optimizers, nb_epoch=epochs, batch_size=batches,
                  init=init) 

grid = GridSearchCV(estimator=model, param_grid=param_grid)

grid_result = grid.fit(X, Y)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
for params, mean_score, scores in grid_result.grid_scores_:
    print("%f (%f) with: %r" % (scores.mean(), scores.std(), params))

In [None]:
# 類別變數設定
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)

# 實例測試 iris

In [19]:
import numpy
import pandas
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import train_test_split , KFold,cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline

# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
df = pd.read_csv(r'D:\AYA\test_file\iris.csv')
X = df[list(df)[0:4]].values.astype(float)
Y = df[list(df)[-1]].values

encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
# convert integers to dummy variables (i.e. one hot encoded)
dummy_y = np_utils.to_categorical(encoded_Y)# one hot encoded

# 把Y轉成1,0,0  0,1,0  這類的one hot encoded
df.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa


In [20]:
# 設計大綱
# 4個神經元 輸入層 -> [4個神經元 隱層] -> 3個神經元 輸出層
# 因為有三種類的花

def baseline_model():
    # create model
    model = Sequential()
    # 數據用整流函數 所以激勵用relu
    model.add(Dense(4, input_dim=4, init='normal', activation='relu')) 

    model.add(Dense(3, init='normal', activation='sigmoid'))
    # Compile model
    # 優化算法選擇ADAM隨機梯度下降，損失函數是對數函數
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) 
    return model

# verbose =0 是為了關閉調適訊息
estimator = KerasClassifier(build_fn=baseline_model, nb_epoch=200,
                            batch_size=5, verbose=0)

In [21]:
# K-Fold交叉檢驗
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)

results = cross_val_score(estimator, X, dummy_y, cv=kfold)
print("Accuracy: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Accuracy: 31.33% (9.91%)


# 波士頓房屋為例 簡單的神經網絡 做回歸預測

In [22]:
import numpy
import pandas
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection  import cross_val_score, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

from sklearn.datasets import load_boston
boston = load_boston()

In [23]:
boston.keys()

dict_keys(['data', 'target', 'feature_names', 'DESCR', 'filename'])

In [24]:
X = boston['data']
Y = boston['target']

In [25]:
'''有一個全連接層，神經元數量和輸入變量數一致，激活函數還是整流函數。輸出層沒有激活
函數，因為在回歸問題中我們希望直接取結果。優化函數是Adam，損失函數是MSE，和我們
要優化的函數一致'''
# MSE越小，說明模型的擬合實驗數據能力強

def baseline_model():
    # create model
    model = Sequential()
    # 全连接层，因為X每筆是13個變數，所以全連階層這邊要設定一樣
    model.add(Dense(13, input_dim=13, kernel_initializer="normal", activation='relu')) 
    model.add(Dense(1, kernel_initializer="normal"))
    # Compile model
    model.compile(loss='mean_squared_error', optimizer='adam') 
    return model

# 使用KerasRegressor封裝這個模型，任何其他的變量都會傳入fit()函數中，例如
# 訓練次數和批次大小，這裡我們取默認值。為了可以復現結果，指定隨機數種子
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# evaluate model with standardized dataset
estimator = KerasRegressor(build_fn=baseline_model, nb_epoch=100, batch_size=5, verbose=0)

# 10折測試
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(estimator, X, Y, cv=kfold)
print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Results: -114.95 (83.24) MSE


In [27]:
'''預處理數據以增加性能，
這個數據集的特點是變量的尺度不一致，所以標準化很有用。 scikit-learn的Pipeline可以
直接進行均一化處理並交叉檢驗，這樣模型不會預先知道新的數據。代碼如下：'''

numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', 
                   KerasRegressor(build_fn=baseline_model,
                                  nb_epoch=50,
                                  batch_size=5,
                                  verbose=0)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))


Standardized: -567.25 (277.43) MSE


In [28]:
# 增加神经网络的层数可以提高效果，这样模型可以提取并组合更多的特征。

def larger_model():
    # create model
    model = Sequential()
    model.add(Dense(13, input_dim=13, kernel_initializer='normal', activation='relu')) 
    #多這層
    model.add(Dense(6, kernel_initializer='normal', activation='relu')) 
    model.add(Dense(1, kernel_initializer='normal'))
    # Compile model
    model.compile(loss='mean_squared_error', optimizer='adam') 
    return model

# 測試並正則化數據
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=larger_model, nb_epoch=50, batch_size=5,
    verbose=0)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Larger: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Larger: -571.95 (283.38) MSE


In [30]:
# 加寬模型可以增加網絡容量。我們減去一層，把隱層的神經元數量加大，從13加到20：

def wider_model():
    # create model
    model = Sequential()
    model.add(Dense(20, input_dim=13, kernel_initializer='normal', activation='relu')) 
    model.add(Dense(1, kernel_initializer='normal'))
    # Compile model
    model.compile(loss='mean_squared_error', optimizer='adam') 
    return model

numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=wider_model, nb_epoch=100, batch_size=5,
    verbose=0)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Wider: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Wider: -560.86 (273.34) MSE


In [31]:
REGR= larger_model()
REGR

<keras.engine.sequential.Sequential at 0x1ead447fef0>

In [None]:
# 用validation_split參數可以指定驗證數據的比例，一般是總數據的20%或者33%
history = REGR.fit(X,Y, validation_split=0.33, epochs=40, batch_size=10)