In [1]:
import tensorflow as tf 
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras import models, Model
from tensorflow import keras
from tensorflow.keras import layers, Sequential, datasets, optimizers
import matplotlib.pyplot as plt
import numpy as np 
import glob
import os
from tqdm import tqdm

In [2]:
import time
import random

In [3]:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [4]:
gpus = tf.config.experimental.list_physical_devices("GPU")
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)
        exit(-1)

# data

In [5]:
# Global Variables
BATCHSZ = 64
EPOCHS = 100

In [6]:
data_dir= "E:\Eric_HSI\hyper_data_preprocess\Salinas_w_size_9_num_200_for_2D"
data_root = glob.glob(data_dir + '/*')
for name in glob.glob(data_dir + '/*'):
    print(name)

E:\Eric_HSI\hyper_data_preprocess\Salinas_w_size_9_num_200_for_2D\data.npy
E:\Eric_HSI\hyper_data_preprocess\Salinas_w_size_9_num_200_for_2D\data_label.npy
E:\Eric_HSI\hyper_data_preprocess\Salinas_w_size_9_num_200_for_2D\test.npy
E:\Eric_HSI\hyper_data_preprocess\Salinas_w_size_9_num_200_for_2D\test_label.npy
E:\Eric_HSI\hyper_data_preprocess\Salinas_w_size_9_num_200_for_2D\train.npy
E:\Eric_HSI\hyper_data_preprocess\Salinas_w_size_9_num_200_for_2D\train_label.npy


In [7]:
train = np.load(data_root[4])
train_label = np.load(data_root[5])
test = np.load(data_root[2])
test_label = np.load(data_root[3])
train.shape, train_label.shape, test.shape, test_label.shape

((3200, 9, 9, 204), (3200,), (50929, 9, 9, 204), (50929,))

In [30]:
train_label = tf.keras.utils.to_categorical(train_label)
test_label = tf.keras.utils.to_categorical(test_label)

In [31]:
class_num = 16
im_height = 9
im_width = 9
im_channel = train.shape[3]
train_num = train.shape[0]
val_num = test.shape[0]

In [32]:
# 根据可用的CPU动态设置并行调用的数量， 应用于 num_parallel_calls
AUTOTUNE = tf.data.experimental.AUTOTUNE

- prefetch(AUTOTUNE)
- 当GPU执行在当前批次执行前向或者后向传播时，我们希望CPU处理下一个批次的数据，以便于数据批次能够迅速被GPU使用。我们希望GPU被完全、时刻用于训练。我们称这种机制为消费者/生产者重叠，消费者是GPU，生产者是CPU。

# dataset顺序：

- 创建实例                             from_tensor_slices                       
- 重组（较大的buffer_size）             shuffle
- 重复                                  repeat
- 数据预处理、数据扩增，使用多线程等                  map
- 批次化                                batch
- 预取数据                             prefectch

In [33]:
# load train dataset
train_db = tf.data.Dataset.from_tensor_slices((train, train_label))
# train_dataset = train_dataset.shuffle(buffer_size=train_num).repeat().batch(BATCHSZ).prefetch(AUTOTUNE)
train_db= train_db.shuffle(buffer_size=train_num).batch(BATCHSZ).prefetch(AUTOTUNE)

# load test dataset
test_db = tf.data.Dataset.from_tensor_slices((test, test_label))
# val_dataset = val_dataset.repeat().batch(BATCHSZ).prefetch(AUTOTUNE)
test_db = test_db.batch(BATCHSZ).prefetch(AUTOTUNE)

In [34]:
train_db, test_db 

(<PrefetchDataset shapes: ((None, 9, 9, 204), (None, 16)), types: (tf.float32, tf.float32)>,
 <PrefetchDataset shapes: ((None, 9, 9, 204), (None, 16)), types: (tf.float32, tf.float32)>)

# model

In [13]:
# ResNet 是多个 BasicBlock 顿叠而成
class VGG(keras.Model):
    
    # layer_dims [2,2,2,2]
    def __init__(self, num_calsses=16):   # layer_dims [2,2,2,2] 每一层的basic block个数
        super(VGG, self).__init__()
        self.conv_layers = Sequential([  # 5 units of conv + max pooling
                            # unit 1
                            layers.Conv2D(64, kernel_size=[3, 3], padding='same', activation='relu'),
                            layers.Conv2D(64, kernel_size=[3, 3], padding='same', activation='relu'),
                            layers.MaxPooling2D(pool_size=[2, 2], strides=2, padding='same'),

                            # unit 2
                            layers.Conv2D(128, kernel_size=[3, 3], padding='same', activation='relu'),
                            layers.Conv2D(128, kernel_size=[3, 3], padding='same', activation='relu'),
                            layers.MaxPooling2D(pool_size=[2, 2], strides=2, padding='same'),

                            # unit 3
                            layers.Conv2D(256, kernel_size=[3, 3], padding='same', activation='relu'),
                            layers.Conv2D(256, kernel_size=[3, 3], padding='same', activation='relu'),
                            layers.MaxPooling2D(pool_size=[2, 2], strides=2, padding='same'),

                            # unit 4
                            layers.Conv2D(512, kernel_size=[3, 3], padding='same', activation='relu'),
                            layers.Conv2D(512, kernel_size=[3, 3], padding='same', activation='relu'),
                            layers.MaxPooling2D(pool_size=[2, 2], strides=2, padding='same'),
                            
                            # unit 5
                            layers.Conv2D(512, kernel_size=[3, 3], padding='same', activation='relu'),
                            layers.Conv2D(512, kernel_size=[3, 3], padding='same', activation='relu'),
                            layers.MaxPooling2D(pool_size=[2, 2], strides=2, padding='same')
                            ])
        self.flatten = layers.Flatten()
        self.fc_net = Sequential([
                                 layers.Dense(256, activation='relu'),
                                 layers.Dense(128, activation='relu'),
                                 layers.Dense(16, activation='relu'),
                            ])
    def call(self, inputs, training = None):
        x = self.conv_layers(inputs)
        x = self.flatten(x)
        x = self.fc_net(x)
        return x

In [14]:
def VGG13():
    return VGG()

In [15]:
model = VGG()
model.build(input_shape=(None, 9, 9, 204))

In [16]:
model.summary()

Model: "vgg"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential (Sequential)      (None, 1, 1, 512)         9520768   
_________________________________________________________________
flatten (Flatten)            multiple                  0         
_________________________________________________________________
sequential_1 (Sequential)    (None, 16)                166288    
Total params: 9,687,056
Trainable params: 9,687,056
Non-trainable params: 0
_________________________________________________________________


In [17]:
# 现在的 loss 非常小， 因为这个问题比较复杂
optimizer = optimizers.Adam(lr=1e-4)

# 训练1

In [21]:
for epoch in tqdm(range(20)):
    for step, (x, y) in enumerate(train_db):
        with tf.GradientTape() as tape:
            # [b, 9, 9, 204] -> [b, 1, 1, 512]
            logits = model(x)
            y_onehot = tf.one_hot(y, depth=16)
            loss = tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True)
            loss = tf.reduce_mean(loss)

        # 反向传播
        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

        if step % 100 == 0:
            print(epoch, step, 'loss', float(loss))

    total_num = 0
    total_correct = 0

    # 再哪里进行测试，要自己把握，测试的时间影像训练效率
    for x, y in test_db:
        logits = model(x)
        prob = tf.nn.softmax(logits, axis=1)
        pred = tf.argmax(prob, axis=1)
        # int64 -> int32
        pred = tf.cast(pred, dtype=tf.int32)
        # booler -> int
        correct = tf.cast(tf.equal(pred, y), dtype=tf.int32)
        correct = tf.reduce_sum(correct)

        total_num += x.shape[0]
        total_correct += int(correct)

    acc = total_correct / total_num
    print(epoch, 'acc', acc)

  0%|          | 0/20 [00:00<?, ?it/s]0 0 loss 2.572810649871826
  5%|▌         | 1/20 [00:07<02:20,  7.38s/it]0 acc 0.10671719452571227
1 0 loss 2.276338577270508
 10%|█         | 2/20 [00:14<02:12,  7.35s/it]1 acc 0.19756916491586327
2 0 loss 1.9665740728378296
 15%|█▌        | 3/20 [00:21<02:04,  7.30s/it]2 acc 0.42205815939837815
3 0 loss 1.8189773559570312
 20%|██        | 4/20 [00:29<01:56,  7.28s/it]3 acc 0.2711421783266901
4 0 loss 1.6107630729675293
 25%|██▌       | 5/20 [00:36<01:48,  7.25s/it]4 acc 0.4230006479608867
5 0 loss 1.756507158279419
 30%|███       | 6/20 [00:43<01:41,  7.24s/it]5 acc 0.46150523277503974
6 0 loss 1.491671085357666
 35%|███▌      | 7/20 [00:50<01:33,  7.22s/it]6 acc 0.5392605391819985
7 0 loss 1.5345306396484375
 40%|████      | 8/20 [00:57<01:26,  7.23s/it]7 acc 0.47085157768658326
8 0 loss 1.4169940948486328
 45%|████▌     | 9/20 [01:05<01:19,  7.24s/it]8 acc 0.3544149698600012
9 0 loss 1.4634439945220947
 50%|█████     | 10/20 [01:12<01:12,  7.23

# 训练2

In [27]:
BATCHSZ = 64
EPOCHS = 10

In [28]:
model = VGG()
model.build(input_shape=(None, 9, 9, 204))

In [38]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
              # from_logits=False 如果没有进行 softmax 处理，这里执行 True
            #   loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              #################                如果这里 from_logits=False 则训练效果会越来越差？？？                      ####################
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
              metrics=["accuracy"])
# change + 
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.6, patience=3, min_lr=0.000001)
callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/myVGG.h5',
                                                save_best_only=True,
                                                save_weights_only=True,
                                                monitor='val_loss')]

# tensorflow2.1 recommend to using fit
# history = model.fit(train_dataset,
#                     steps_per_epoch=train_num // BATCHSZ,
#                     epochs=EPOCHS,
#                     validation_data=val_dataset,
#                     validation_steps=val_num // BATCHSZ,
#                     callbacks=[callbacks, reduce_lr])
history = model.fit(train_db,
                    steps_per_epoch= train_num // BATCHSZ,
                    epochs=EPOCHS,
                    validation_data=test_db,
                    validation_steps= val_num // BATCHSZ,
                    callbacks=[callbacks, reduce_lr])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
