In [1]:
from __future__ import print_function

'''Trains a simple convnet on the MNIST dataset.
Does flat increment from T. Xiao  "Error-Driven Incremental Learning in Deep Convolutional 
Neural Network for Large-Scale Image Classification"
Starts with just 3 classes, trains for 12 epochs then 
incrementally trains the rest of the classes by reusing 
the trained weights.
with theano 0.9.0
     keras 2.0.6
'''


import numpy as np
np.random.seed(1)  # for reproducibility

from keras.datasets import mnist
from keras.models import Sequential, model_from_json
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D,Conv2D
from keras.utils import np_utils
import csv

Using Theano backend.
 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: Tesla K20c (CNMeM is enabled with initial size: 95.0% of memory, cuDNN 5110)


# 准备数据

In [2]:
def build_data(classes,total_classes,X_train_all,y_train_all,X_test_all,y_test_all):
    '''
    从数据库中挑选由列表classes指定的标签的数据
    '''
    train_ind = []
    test_ind = []
    for c in classes:
        train_ind.extend(list(np.where(y_train_all==c)[0]))#找出c在y_train_all中的所有位置并按类别存入train_ind
        test_ind.extend(list(np.where(y_test_all==c)[0]))

    X_train = X_train_all[train_ind,:,:]#取出train_ind对应的训练数据
    X_test = X_test_all[test_ind,:,:]#取出test_ind对应的测试数据

    y_train_true = y_train_all[train_ind]#取出train_ind对应的训练数据标签
    y_train = np.zeros(y_train_true.shape)#train置零
    y_test_true = y_test_all[test_ind]#取出test_ind对应的测试数据标签
    y_test = np.zeros(y_test_true.shape)#y_test置零
    
    #重设标签
    for i,c in enumerate(classes):
        train_ind = list(np.where(y_train_true==c)[0])
        test_ind = list(np.where(y_test_true==c)[0])
        y_train[train_ind] = i
        y_test[test_ind] = i


    X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
    X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    X_train /= 255
    X_test /= 255

    # convert class vectors to binary class matrices
    Y_train = np_utils.to_categorical(y_train, total_classes)
    Y_test = np_utils.to_categorical(y_test, total_classes)
    
    return X_train, Y_train, X_test, Y_test

In [3]:


def load_data():
    trainX =  []
    trainY =  []
    testX = []
    testY = []
    with open('mstar_train.csv', 'r') as csv_file:
        for data in csv.reader(csv_file):
            # The first column is the label
            label = int(data[0])
            #label = np.array(label, dtype='int32')
            trainY.append(label)
            # The rest of columns are pixels
            pixels = data[1:]

            # Make those columns into a array of 8-bits pixels
            # This array will be of 1D with length 784
            # The pixel intensity values are integers from 0 to 255
            pixels = np.array(pixels, dtype='float32')
            pixels = pixels.reshape((128, 128))
            trainX.append(pixels)
    with open('mstar_test.csv', 'r') as csv_file:
        for data in csv.reader(csv_file):
            # The first column is the label
            label = int(data[0])
            #label = np.array(label, dtype='int32')
            testY.append(label)
            # The rest of columns are pixels
            pixels = data[1:]

            # Make those columns into a array of 8-bits pixels
            # This array will be of 1D with length 784
            # The pixel intensity values are integers from 0 to 255
            pixels = np.array(pixels, dtype='float32')
            pixels = pixels.reshape((128, 128))
            testX.append(pixels)
    return (np.array(trainX), np.array(trainY, dtype='int32')), (np.array(testX), np.array(testY, dtype='int32'))

# 建立模型

In [4]:
def build_model(old_model=None):
    model = Sequential()

    if old_model is None:
        model.add(Conv2D(nb_filters, (nb_conv, nb_conv), padding='valid',input_shape=(1, img_rows, img_cols)))
    else:
        weights = old_model.layers[0].get_weights()
        model.add(Conv2D(nb_filters, (nb_conv, nb_conv), padding='valid',weights=weights, input_shape=(1, img_rows, img_cols)))
    model.add(Activation('relu'))
    if old_model is None:
        model.add(Conv2D(nb_filters, (nb_conv, nb_conv)))
    else:
        weights = old_model.layers[2].get_weights()
        model.add(Conv2D(nb_filters, (nb_conv, nb_conv),weights=weights))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    #model.summary()
    if old_model is None:
        model.add(Dense(128))
    else:
        weights = old_model.layers[7].get_weights()
        model.add(Dense(128,weights=weights))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    
    return model

# 训练

## 参数设置

In [5]:
# 输入图像维度
img_rows, img_cols = 128, 128

batch_size = 10

#所有类的数量
total_classes = 10
nb_epoch = 12

# 用到的卷积核数量
nb_filters = 32
# 卷积核的大小
nb_conv = 3

# size of pooling area for max pooling
nb_pool = 2

#拟选取的初始类别
classes = [9,1,6]

## 准备数据

In [6]:
# the data, shuffled and split between train and test sets
(X_train_all, y_train_all), (X_test_all, y_test_all) = load_data()

X_train, Y_train, X_test, Y_test = build_data(classes,3,
                                              X_train_all,y_train_all,X_test_all,y_test_all)

array([[ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       ..., 
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.]])

## 建立模型

In [7]:
model1 = build_model()
model1.add(Dense(len(classes)))
model1.add(Activation('softmax'))

model1.compile(loss='categorical_crossentropy',optimizer='adadelta',metrics=['accuracy'])
model1.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 32, 126, 126)      320       
_________________________________________________________________
activation_1 (Activation)    (None, 32, 126, 126)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 32, 124, 124)      9248      
_________________________________________________________________
activation_2 (Activation)    (None, 32, 124, 124)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 62, 62)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 32, 62, 62)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 123008)            0         
__________

## 训练并保存网络参数

In [8]:
model1.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test))

# Save this model for later interrogation
json_string = model1.to_json() 
open('model1_incremental_architecture.json', 'w').write(json_string) 
model1.save_weights('model1_incremental_weights.h5') 

Train on 853 samples, validate on 743 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


## 评估网络

In [10]:
score = model1.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Test score: 0.744953671022
Test accuracy: 0.773889636729


# 建立新模型

## 建立模型并载入参数

In [11]:
model1.layers

[<keras.layers.convolutional.Conv2D at 0x7fac2d8f65d0>,
 <keras.layers.core.Activation at 0x7fac1828c050>,
 <keras.layers.convolutional.Conv2D at 0x7fac1828c1d0>,
 <keras.layers.core.Activation at 0x7fac181cbb90>,
 <keras.layers.pooling.MaxPooling2D at 0x7fac181cb5d0>,
 <keras.layers.core.Dropout at 0x7fac18212d10>,
 <keras.layers.core.Flatten at 0x7fac17d64b50>,
 <keras.layers.core.Dense at 0x7fac17d0c390>,
 <keras.layers.core.Activation at 0x7fac80cc03d0>,
 <keras.layers.core.Dropout at 0x7fac17d1d350>,
 <keras.layers.core.Dense at 0x7fac2d8f6550>,
 <keras.layers.core.Activation at 0x7fac1710bed0>]

In [12]:
model1.layers[-1]

<keras.layers.core.Activation at 0x7fac1710bed0>

In [13]:
#新模型在Softmax层包含了所有的10个类，用上面保存的网络权重初始化此网络，并随机初始化新类的连接

# Now create a new model with all total_classes in the softmax layer.  Copy over the weights to
# this new network and initialize the new class connections randomly.
model2 = build_model(old_model=model1)
model2.add(Dense(total_classes))

# Replace the corresponding weights of the new network with the previously trained class weights
weights = model2.layers[-1].get_weights()
old_weights = model1.layers[-2].get_weights() # Last dense layer is second to last layer
weights[0][:,-len(classes):] = old_weights[0]
weights[1][-len(classes):] = old_weights[1]
model2.layers[-1].set_weights(weights)
model2.add(Activation('softmax'))
model2.compile(loss='categorical_crossentropy',optimizer='adadelta',metrics=['accuracy'])
model2.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 32, 126, 126)      320       
_________________________________________________________________
activation_5 (Activation)    (None, 32, 126, 126)      0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 32, 124, 124)      9248      
_________________________________________________________________
activation_6 (Activation)    (None, 32, 124, 124)      0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 32, 62, 62)        0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 32, 62, 62)        0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 123008)            0         
__________

## 准备7类数据

In [14]:
new_classes = [7, 0, 3, 5, 2, 8, 4]
class_mapping = new_classes[:]
class_mapping.extend(classes)
X_train, Y_train, X_test, Y_test = build_data(new_classes, 10, X_train_all, y_train_all, X_test_all, y_test_all)



## 训练新网络，保存参数

In [15]:
model2.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test))

# Save the incrementally trained model
json_string = model2.to_json() 
open('model2_incremental_architecture.json', 'w').write(json_string) 
model2.save_weights('model2_incremental_weights.h5') 

Train on 1894 samples, validate on 1682 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


## 评估新网络

In [16]:
score = model2.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Test score: 0.374709201602
Test accuracy: 0.895362663283


# 测试新网络的泛化性能

In [17]:
X_test = X_test_all.reshape(X_test_all.shape[0], 1, img_rows, img_cols)
X_test = X_test.astype('float32')
X_test /= 255

# Convert class vectors to binary class matrices
# Note, that when a new image is presented to this network, the label of the image must be 
# fed into class_mapping to get the "real" label of the output
y_test = np.array([class_mapping.index(c) for c in y_test_all])
Y_test = np_utils.to_categorical(y_test, total_classes)

score = model2.evaluate(X_test, Y_test, verbose=1)

print('\nTotal Test score:', score[0])
print('Total Test accuracy:', score[1])


Total Test score: 4.04302053612
Total Test accuracy: 0.621030927835
