In [19]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import OrdinalEncoder

In [20]:
X = datasets.fetch_covtype().data[:5000]
y = datasets.fetch_covtype().target[:5000]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

print(X_train.shape, y_train.shape)

enc = OrdinalEncoder()
y_train = enc.fit_transform(y_train.reshape(-1, 1)).reshape(-1, )
y_test = enc.transform(y_test.reshape(-1, 1)).reshape(-1, )
print(np.unique(y_train))  # 7分类任务

X_train, X_test, y_train, y_test = tf.constant(X_train), tf.constant(X_test), tf.constant(y_train), tf.constant(y_test)

(4500, 54) (4500,)
[0. 1. 2. 3. 4. 5. 6.]


In [21]:
inputs = tf.keras.Input(shape=(54,))
x = tf.keras.layers.Dense(256, activation='tanh')(inputs)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Dense(128, activation='relu')(x)
x = tf.keras.layers.Dense(128)(x)
predictions = tf.keras.layers.Dense(7)(x)
model = tf.keras.Model(inputs=inputs, outputs=predictions)

model.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 54)]              0         
                                                                 
 dense_8 (Dense)             (None, 256)               14080     
                                                                 
 dropout_2 (Dropout)         (None, 256)               0         
                                                                 
 dense_9 (Dense)             (None, 128)               32896     
                                                                 
 dense_10 (Dense)            (None, 128)               16512     
                                                                 
 dense_11 (Dense)            (None, 7)                 903       
                                                                 
Total params: 64,391
Trainable params: 64,391
Non-trainable

In [22]:
# Configures the model for training.
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),  # 优化器
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),  # 损失函数
              metrics=['accuracy'])  # 评估函数

In [23]:
# Trains the model for a fixed number of epochs (iterations on a dataset).
model.fit(X_train,  # Input data
          y_train,  # Target data
          # x是生成器或 tf.data.Dataset 的对象时,将忽略此参数
          # Boolean (whether to shuffle the training data before each epoch) or str (for 'batch'). This argument is ignored when x is a generator or an object of tf.data.
          shuffle=True,
          # Integer or None. Number of samples per gradient update. If unspecified, batch_size will default to 32.
          batch_size=64,
          epochs=10,  # Integer. Number of epochs to train the model.
          # 'auto', 0, 1, or 2. Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch. 'auto' defaults to 1 for most cases
          verbose=2,
          # Data on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data.
          validation_data=(X_test, y_test),
          # Float between 0 and 1. Fraction of the training data to be used as validation data. The model will set apart this fraction of the training data, will not train on it, and will evaluate the loss and any model metrics on this data at the end of each epoch.
          # validation_split=0.2 # 验证数据集占训练数据集的比例,取值范围为0~1
          )

Epoch 1/10
71/71 - 1s - loss: 1.4717 - accuracy: 0.4180 - val_loss: 1.2283 - val_accuracy: 0.5040 - 1s/epoch - 17ms/step
Epoch 2/10
71/71 - 0s - loss: 1.2364 - accuracy: 0.5007 - val_loss: 1.1889 - val_accuracy: 0.4900 - 255ms/epoch - 4ms/step
Epoch 3/10
71/71 - 0s - loss: 1.2003 - accuracy: 0.5016 - val_loss: 1.1062 - val_accuracy: 0.5380 - 279ms/epoch - 4ms/step
Epoch 4/10
71/71 - 0s - loss: 1.1677 - accuracy: 0.5211 - val_loss: 1.0849 - val_accuracy: 0.5520 - 264ms/epoch - 4ms/step
Epoch 5/10
71/71 - 0s - loss: 1.1824 - accuracy: 0.5256 - val_loss: 1.1241 - val_accuracy: 0.5320 - 280ms/epoch - 4ms/step
Epoch 6/10
71/71 - 0s - loss: 1.1419 - accuracy: 0.5444 - val_loss: 1.0926 - val_accuracy: 0.5500 - 273ms/epoch - 4ms/step
Epoch 7/10
71/71 - 0s - loss: 1.1190 - accuracy: 0.5489 - val_loss: 1.0357 - val_accuracy: 0.5580 - 271ms/epoch - 4ms/step
Epoch 8/10
71/71 - 0s - loss: 1.1016 - accuracy: 0.5478 - val_loss: 1.0889 - val_accuracy: 0.5700 - 253ms/epoch - 4ms/step
Epoch 9/10
71/71 -

<keras.callbacks.History at 0x28aa1dfd040>

In [24]:
model.history.history.items()

dict_items([('loss', [1.471740484237671, 1.2363803386688232, 1.2002722024917603, 1.1676579713821411, 1.1824363470077515, 1.1418557167053223, 1.1189563274383545, 1.1016052961349487, 1.0886876583099365, 1.0690973997116089]), ('accuracy', [0.4180000126361847, 0.5006666779518127, 0.5015555620193481, 0.5211111307144165, 0.5255555510520935, 0.5444444417953491, 0.5488888621330261, 0.5477777719497681, 0.5475555658340454, 0.5671111345291138]), ('val_loss', [1.2283138036727905, 1.1888833045959473, 1.106217622756958, 1.0848801136016846, 1.1241456270217896, 1.0925740003585815, 1.0357253551483154, 1.0888891220092773, 1.0139648914337158, 0.9796877503395081]), ('val_accuracy', [0.5040000081062317, 0.49000000953674316, 0.5379999876022339, 0.5519999861717224, 0.5320000052452087, 0.550000011920929, 0.5580000281333923, 0.5699999928474426, 0.5759999752044678, 0.5860000252723694])])

In [25]:
# Returns the loss value & metrics values for the model in test mode.
'''
batch_size:Integer or None. Number of samples per batch of computation. If unspecified, batch_size will default to 32. Do not specify the batch_size if your data is in the form of a dataset, generators, or keras.utils.Sequence instances (since they generate batches).
verbose:0 or 1. Verbosity mode. 0 = silent, 1 = progress bar.
'''
loss, metric = model.evaluate(X_test, y_test, batch_size=32, verbose=0)

print("test loss:", loss)
print("test acc:", metric)

test loss: 0.9796877503395081
test acc: 0.5860000252723694


In [26]:
# Generates output predictions for the input samples.
'''
batch_size:Integer or None. Number of samples per batch. If unspecified, batch_size will default to 32. Do not specify the batch_size if your data is in the form of dataset, generators, or keras.utils.Sequence instances (since they generate batches).
verbose:Verbosity mode, 0 or 1.
'''
predictions = model.predict(X_test[:10],  # 预测10条样本
                            batch_size=32, verbose=0)
print('predictions shape:', predictions)

predictions shape: [[-3.5170515  -0.6050846   4.145291    3.2827766   0.10767475  3.1674106
  -6.2613826 ]
 [-4.278873   -1.6365532   5.038116    5.8987365  -0.33728406  3.3239233
  -7.3653684 ]
 [-2.0400677   0.06075101  3.1786692   2.3567138   0.12059799  2.6694014
  -4.585171  ]
 [-3.5209007  -1.448727    3.5494657   6.5712767   0.35043392  2.4523618
  -7.2593274 ]
 [ 0.38359606  2.1430786  -2.2262008   1.4163525   5.915867   -2.7572339
  -6.6106715 ]
 [ 3.889718    3.9759626  -3.5077465  -4.3880277   2.7897003  -2.5785437
   1.0711036 ]
 [-2.8535764  -0.66872084  2.174545    3.783858    0.8995967   1.9356933
  -5.1803555 ]
 [ 2.4447854   2.229871   -3.2365537  -3.4057086   1.7557681  -2.3464875
   2.8480449 ]
 [ 3.141587    3.027517   -2.8610396  -3.2871904   2.8824985  -2.2985315
   1.2145189 ]
 [ 2.1436365   2.0264874   0.02591743 -0.37070662  2.6864734  -0.5036128
  -2.603583  ]]
