# 在上一个代码的基础上，改进ResNet网络，并加一些注释
- 此代码来源于CNN1D， CNN1D 来源于github源码

In [1]:
import pathlib,random
import scipy.io as sio
import numpy as np
from tensorflow.keras.callbacks import ReduceLROnPlateau
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, Sequential, datasets, optimizers
import os

In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')
print(gpus)
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [3]:
# 数据标准化
def max_min(x):
    return (x-np.min(x))/(np.max(x)-np.min(x))

In [4]:
# 定义所有常数    
# 这里完成获取数据操作
BATCHSZ = 32
train_num = 200
seed = 666
data_name = r'Salinas_corrected'
data_gt_name = r'Salinas'
result = 'result'
fix_seed = False
num_calsses = 16 
cube_size = 9
data_dict = sio.loadmat(r"E:\Eric_HSI\hyperspectral_datasets\Salinas_corrected.mat")
data_gt_dict = sio.loadmat(r"E:\Eric_HSI\hyperspectral_datasets\Salinas_gt.mat")

# startswith 检查字符串是否以 "————" 开头, 取出数据集
data_name = [t for t in list(data_dict.keys()) if not t.startswith('__')][0]
data_gt_name = [t for t in list(data_gt_dict.keys()) if not t.startswith('__')][0]

data = data_dict[data_name]

# 标准化
data = max_min(data).astype(np.float32)
data_gt = data_gt_dict[data_gt_name].astype(np.int64)

dim = data.shape[2]
print('DataSet %s shape is %s'%(data_name,data.shape))

DataSet salinas_corrected shape is (512, 217, 204)


In [5]:
# 给出 row，col，返回 w_size 大小的cube???
# row, col 为像素值的位置索引

# 此处为1D CNN， 所定义的w_size 为 1
def neighbor_add(row, col, w_size=3):  
    t = w_size // 2
    # 初始化立方体 shape = 1, 1, 204
    cube = np.zeros(shape=[w_size, w_size, data.shape[2]])  # 9*9*204
    for i in range(-t, t + 1):
        for j in range(-t, t + 1):
            # 如果创建的 cube 在图像之外
            # 整个cube的图像都是由这一点的像素值得到
            if i + row < 0 or i + row >= data.shape[0] or j + col < 0 or j + col >= data.shape[1]:
                cube[i + t, j + t] = data[row, col]
            # 否则，这点的像素值由这点周围的值共同得到
            else:
                cube[i + t, j + t] = data[i + row, j + col]
    return cube

# 得到全部数据

In [6]:
# # 这个是未分类版本
# class_num = np.max(data_gt)
# data_pos = {i: [] for i in range(1, 2)}
# print(data_pos)

# for i in range(data_gt.shape[0]):
#     for j in range(data_gt.shape[1]):
#         if data_gt[i, j]:
#             data_pos[1].append([i, j])
# data_t = 0
# data_pos_all = list()

# for k,v in data_pos.items():
#     print('data-ID %s: %s'%(k,len(v)))
#     data_t += len(v)
#     for t in v:
#         data_pos_all.append([k,t])
# print('total data %s'%data_t)

In [7]:
# # 创建一个空的ndarray 用于装数据
# data_all = np.zeros((54129, cube_size, cube_size, 204))
# data_label_all = np.zeros((54129)).astype("int")

# k = 0
# for i in data_pos_all:
#     # print(i)
#     # 取出训练集中的一个数，随着 i 的改变， 所取的数也会发生改变
#     [r,c] = i[1]
#     # print(i[1])
#     # print(r, c)
#     # pixel_t = neighbor_add(r,c,w_size=cube_size).astype(np.float32).tostring()
#     pixel_t = neighbor_add(r,c,w_size=cube_size).astype(np.float32)
#     data_all[k] = pixel_t
#     # print(pixel_t.shape)
#     # print(train[1000])
    
#     # 标签值 - 1
#     label_t = np.array(np.array(i[0] - 1).astype(np.int64))
#     data_label_all[k] = label_t
#     k = k+ 1

# data_all.shape, data_label_all.shape

In [8]:
# for k, v in data_pos.items():
#     print(k, end=",")

# print()
# for k, v in data_pos.items():
#     print(len(v), end=",")

# 划分训练集和测试集

In [9]:
# 得到类别数 16类，从1开始
# DataSet salinas_corrected shape is (512, 217, 204)
# 在这里已经把标签为 0 的背景给删除了

class_num = np.max(data_gt)    

data_pos = {i: [] for i in range(1, class_num + 1)}
train_pos = {i: [] for i in range(1, class_num + 1)}
test_pos = {i: [] for i in range(1, class_num + 1)}

print(data_pos)

for i in range(data_gt.shape[0]):
    for j in range(data_gt.shape[1]):
        for k in range(1, class_num + 1):
            if data_gt[i, j] == k:
                data_pos[k].append([i, j])

{1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 11: [], 12: [], 13: [], 14: [], 15: [], 16: []}


In [10]:
# 是否用随机种子
if fix_seed:
    random.seed(seed)

# 划分训练集和测试集, 一共 3200 个
for k, v in data_pos.items():
    if len(v)<train_num:
        train_num = 15
    else:
        train_num = train_num
    train_pos[k] = random.sample(v, int(train_num))
    test_pos[k] = [i for i in v if i not in train_pos[k]]

In [11]:
train_pos_all = list()
test_pos_all = list()
for k,v in train_pos.items():
    for t in v:
        train_pos_all.append([k,t])
for k,v in test_pos.items():
    for t in v:
        test_pos_all.append([k,t])

In [12]:
train_t = 0
test_t = 0
for (k1,v1),(k2,v2) in zip(train_pos.items(), test_pos.items()):
    print('traindata-ID %s: %s; testdata-ID %s: %s'%(k1,len(v1),k2,len(v2)))
    train_t += len(v1)
    test_t += len(v2)
print('total train %s, total test %s'%(train_t,test_t))
# for k,v in self.test_pos.items():
#     print('testdata-ID %s: %s'%(k,len(v)))

traindata-ID 1: 200; testdata-ID 1: 1809
traindata-ID 2: 200; testdata-ID 2: 3526
traindata-ID 3: 200; testdata-ID 3: 1776
traindata-ID 4: 200; testdata-ID 4: 1194
traindata-ID 5: 200; testdata-ID 5: 2478
traindata-ID 6: 200; testdata-ID 6: 3759
traindata-ID 7: 200; testdata-ID 7: 3379
traindata-ID 8: 200; testdata-ID 8: 11071
traindata-ID 9: 200; testdata-ID 9: 6003
traindata-ID 10: 200; testdata-ID 10: 3078
traindata-ID 11: 200; testdata-ID 11: 868
traindata-ID 12: 200; testdata-ID 12: 1727
traindata-ID 13: 200; testdata-ID 13: 716
traindata-ID 14: 200; testdata-ID 14: 870
traindata-ID 15: 200; testdata-ID 15: 7068
traindata-ID 16: 200; testdata-ID 16: 1607
total train 3200, total test 50929


In [13]:
# 创建一个空的ndarray 用于装数据
train = np.zeros((3200, cube_size, cube_size, 204)).astype(np.float32)
train_label = np.zeros((3200)).astype(np.int32)

test = np.zeros((50929, cube_size, cube_size, 204)).astype(np.float32)
test_label = np.zeros((50929)).astype(np.int32)

In [14]:
# train data
k = 0
for i in train_pos_all:
    # print(i)
    # 取出训练集中的一个数，随着 i 的改变， 所取的数也会发生改变
    [r,c] = i[1]
    # print(i[1])
    # print(r, c)
    # pixel_t = neighbor_add(r,c,w_size=cube_size).astype(np.float32).tostring()
    pixel_t = neighbor_add(r,c,w_size=cube_size).astype(np.float32)
    train[k] = pixel_t
    # print(pixel_t.shape)
    # print(train[1000])
    
    # 标签值 - 1
    label_t = np.array(np.array(i[0] - 1).astype(np.int32))
    train_label[k] = label_t
    k = k+ 1

In [15]:
# test data
k = 0
for i in test_pos_all:
    # 取出测试集中的一个数，随着 i 的改变， 所取的数也会发生改变
    [r, c] = i[1]
    # pixel_t = neighbor_add(r,c,w_size=cube_size).astype(np.float32).tostring()
    pixel_t = neighbor_add(r,c,w_size=cube_size).astype(np.float32)
    test[k] = pixel_t

    label_t = np.array(np.array(i[0] - 1).astype(np.int32))
    test_label[k] = label_t
    # print('.', end='')
    k = k+ 1

In [16]:
train.shape, test.shape,train_label.shape, test.shape

((3200, 9, 9, 204), (50929, 9, 9, 204), (3200,), (50929, 9, 9, 204))

# 创建dataset

In [17]:
db_train = tf.data.Dataset.from_tensor_slices((train, train_label))
db_test = tf.data.Dataset.from_tensor_slices((test, test_label))

# db_train = db_train.shuffle(3800).batch(batch_size=BATCHSZ).repeat()
db_train = db_train.shuffle(3800).batch(batch_size=BATCHSZ)    ###############
db_test = db_test.batch(batch_size=BATCHSZ)

In [18]:
db_train, db_test

(<BatchDataset shapes: ((None, 9, 9, 204), (None,)), types: (tf.float32, tf.int32)>,
 <BatchDataset shapes: ((None, 9, 9, 204), (None,)), types: (tf.float32, tf.int32)>)

# 构建 ResNet

- add（）：直接对张量求和,add层将dense_1层的输入和dense_2层的输入加在了一起，是张量元素内容相加。
- conatenate（）：串联一个列表的输入张量。对一维进行了串联，通道数变成了x + x = 2x，可以指指定 axis = x 来指定空间的第 x 维串联。

In [19]:
# 实现残差块
# (3,3) (1,1) 是卷积核的大小

class BasicBlock(layers.Layer):

    def __init__(self, filter_num, stride=1):   
        super(BasicBlock, self).__init__()

        # padding 对于能够整除的四周均匀补全，对于不能整除的，自适应补全，保证能够用到原始图像的全部像素信息，
        # 并不是保证 padding 后图像大小一致，当 stride = 2， 使用 padding = same时，必使结果的图像大小变为一半
        # “第一层” strides=stride，有时进行下采样（stride > 1）,有时不进行下采样
        self.conv1 = layers.Conv2D(filter_num, (3,3), strides=stride, padding='same', use_bias=False)
        self.bn1 = layers.BatchNormalization()
        self.relu = layers.Activation('relu')

        # “第二层”
        self.conv2 = layers.Conv2D(filter_num, (3,3), strides=1, padding='same', use_bias=False)
        self.bn2 = layers.BatchNormalization()
        
        # "分支层"
        # 保证从 x 直接连到下面的线两端能够之间相加，如果上一个 Residual Block 的输出维度和当前的 Residual Block 的维度不一样，
        # 那就对这个 x 进行 downSample 操作，使得维度一致
        if stride != 1:
            # 短接层， identity layer，这里的 strides 与第一层的 strides 相同，保证结果可以和第二层直接相加
            self.downsample = Sequential()
            self.downsample.add(layers.Conv2D(filter_num, (1, 1), strides=stride))
        else:
            # 如果 strides = 1，保证结果可以和第二层直接相加，就不需要 downsample
            self.downsample = lambda x:x

    # 残差块内正向传播过程，包含两个卷积层
    def call(self, inputs, training=None):
        # 前向传播
        # [b, h ,w, c]
        out = self.conv1(inputs)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        
        # 此处调用 downsample  有两种形式，看stride 的值而定
        identity = self.downsample(inputs)

        output = layers.add([out, identity])    # 这里的相加是对应元素相加
        output = self.relu(output)              # 没有参数的层可以定义一个层用两次

        return output

In [20]:
# ResNet 是多个 BasicBlock 顿叠而成
class ResNet(keras.Model):
    
    # layer_dims [2,2,2,2]
    def __init__(self, layer_dims, num_calsses=16):   # layer_dims [2,2,2,2] 每一层的basic block个数
        super(ResNet, self).__init__()
        
        # 设置预处理层
        self.stem = Sequential([layers.Conv2D(64, (3,3), strides=(1, 1)),
                               layers.BatchNormalization(),
                               layers.Activation('relu'),
                               layers.MaxPooling2D(pool_size=(2,2), strides=(1, 1), padding='same')
                               ])

        # 中间层
        self.layers1 = self.build_resblock(64, layer_dims[0])   # 调用数组layer_dims中的第 0 维定义的 basicBlock个数
        
        # h, w 维会变小，这里stride 等于 2，使得 feature size 越来越小，channel 越来越多
        self.layers2 = self.build_resblock(128, layer_dims[1], stride=2)
        self.layers3 = self.build_resblock(256, layer_dims[2], stride=2)
        self.layers4 = self.build_resblock(512, layer_dims[3], stride=2)

        # 分类层
        # 全连接层 output : [b, 512, h, w]，自适应输出用于输出
        self.avgpool = layers.GlobalAveragePooling2D()  # 具体大小为6 × 6 × 3，经过GAP转换后，变成了大小为 1 × 1 × 3 的输出值，每一层 h × w 会被平均化成一个值
        self.fc = layers.Dense(16)  # TODO


    def call(self, inputs, training=None):
        # 前向运算过程，预处理
        x = self.stem(inputs)
        # 4 个 resBlock
        x = self.layers1(x)
        x = self.layers2(x)
        x = self.layers3(x)
        x = self.layers4(x)

        # [b, c]
        x = self.avgpool(x)
        # [b, 16]
        x = self.fc(x)
        return x
    
    # 实现 resblock
    # 创建一个resBlock， 一个 resBlock 中包含多个 basicBlock
    def build_resblock(self, filter_num, blocks, stride=1):    # 通道数，ResNet 会堆叠多少，默认步长为 1
        res_blocks = Sequential()
        # 添加第一层， may down sample
        res_blocks.add(BasicBlock(filter_num, stride))
        # 后面的 BasicBlock 不会下采样，因为此处定义了 stride 为 1
        for _ in range(1, blocks):
            res_blocks.add(BasicBlock(filter_num, stride=1))
        return res_blocks

- 传给类中的参数，都被 \__init__ ()中的形式变量接收了
- 因为类中已经实现，\__call__() 魔法方法,因此不用外部调用就可以运行build()、call()等函数？

In [21]:
def resnet18():
    return ResNet([2, 2, 2, 2])   #   1 + 4 * 4 + 1

In [22]:
sample = next(iter(db_train))
print(sample[0].shape, sample[1].shape, tf.reduce_min(sample[0]), tf.reduce_max(sample[0]))

(32, 9, 9, 204) (32,) tf.Tensor(0.0006509004, shape=(), dtype=float32) tf.Tensor(0.90160555, shape=(), dtype=float32)


In [23]:
def main():
    model = resnet18()
    model.build(input_shape=(None, 9, 9, 204))  # TODO 需要修改
    model.summary()
    optimizer = optimizers.Adam(lr=1e-3)
    
    for epoch in range(30):
        for step, (x, y) in enumerate(db_train):
            with tf.GradientTape() as tape:
                # [b, 32, 32, 3]  =>  [b, 100]
                logits = model(x)
                # print(logits.shape)
                # [b] => [b, 100]
                y_onthot = tf.one_hot(y, depth=num_calsses)         # TODO 需要修改
                # print(y_onthot.shape) 
                loss = tf.losses.categorical_crossentropy(y_onthot, logits, from_logits=True)
                loss = tf.reduce_mean(loss)

            grads = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))

            if step % 300 ==0:
                print(epoch, step, 'loss', float(loss))

        total_num = 0
        total_correct = 0

        for x, y in db_test:
            logits = model(x)
            prob = tf.nn.softmax(logits, axis=1)
            pred = tf.argmax(prob, axis=1)
            pred = tf.cast(pred, dtype=tf.int32)

            correct = tf.cast(tf.equal(pred, y), dtype=tf.int32)
            correct = tf.reduce_sum(correct)
            
            total_num += x.shape[0]
            
            total_correct += int(correct)

        acc = total_correct / total_num
        print(epoch, 'acc', acc)

In [24]:
main()

Model: "res_net"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential (Sequential)      (None, 7, 7, 64)          117824    
_________________________________________________________________
sequential_1 (Sequential)    (None, 7, 7, 64)          148736    
_________________________________________________________________
sequential_2 (Sequential)    (None, 4, 4, 128)         526976    
_________________________________________________________________
sequential_4 (Sequential)    (None, 2, 2, 256)         2102528   
_________________________________________________________________
sequential_6 (Sequential)    (None, 1, 1, 512)         8399360   
_________________________________________________________________
global_average_pooling2d (Gl multiple                  0         
_________________________________________________________________
dense (Dense)                multiple                  8208