In [50]:
# keras相关包
from tensorflow.keras.applications import ResNet50
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Input
from keras.models import Model
from tensorflow.keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.utils.np_utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import load_img

import random
# sklearn相关包
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelBinarizer
from sklearn.feature_extraction.image import extract_patches_2d

import numpy as np
import cv2
import os
from imutils import paths
import progressbar

# 作图
import matplotlib.pyplot as plt
%matplotlib inline

#Basic Tool
import pandas as pd
import sys
import io

# Data Loading

In [51]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [52]:
#设置随机种子
np.random.seed(161)

from keras.datasets import cifar10

# PreProcessing

###The normalized

In [53]:
#读取数据集
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

#归一化
x_train = x_train / 255.0
x_test = x_test / 255.0

In [54]:
def imbalanceData(__format,X_train,y_train):
   """
   __format：The imbalance want to input
   X_train:Train Set
   y_train:Test Set
   """
   train_classes, train_class_counts=np.unique(y_train, return_counts=True)
   nb_train_classes = len(train_classes)
   class_indices = [np.where(y_train == i)[0] for i in range(nb_train_classes)]
   imbal_class_indices = [class_idx[:class_count] for class_idx, class_count in zip(class_indices, __format)]
   imbal_class_indices = np.hstack(imbal_class_indices)
   y = y_train[imbal_class_indices]
   X = x_train[imbal_class_indices]  
   return X,y

In [55]:
imbal_class_counts=[500,1000]*5 #you can use the format such:[500,1000]*5 [100,200,300,400,500,...,1000]
x_train,y_train=imbalanceData(imbal_class_counts,x_train,y_train)

### One-Hot

In [56]:
# Y 热编码
y_train_oh = to_categorical(y_train)
y_test_oh = to_categorical(y_test)

Regarding the saving and loading of models, there are generally three scenarios in Keras: save the entire model; only save the structure of the model;only save the weight of the model.
Based on the models of the previous few weeks, write the code for saving and loading the model.

# ResNet-50

In [57]:
# 加载不包含top层的ResNet50作为baseModel
baseModel = ResNet50(weights="imagenet", include_top=False, input_tensor=Input(shape=(32, 32, 3)))

In [58]:
def headmodel(baseModel):
    # 在baseModel基础上添加新的层
    headModel = baseModel.output
    
    # 新的 top 层
    headModel = Flatten()(headModel)
    headModel = Dense(10, activation="softmax")(headModel)
    return headModel

In [59]:
headModel = headmodel(baseModel)
model = Model(inputs=baseModel.input, outputs=headModel)
aug00 = ImageDataGenerator(rotation_range=30,
                         width_shift_range=0.1,
                         height_shift_range=0.1, 
                         shear_range=0.2, 
                         zoom_range=0.2,
                         horizontal_flip=True,
                         fill_mode="nearest")

In [60]:
# 保存模型快照的回调函数
modelname = "cifar10-resnet50-weights-{epoch:03d}-{val_loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(modelname, monitor="val_loss", mode="min", save_best_only=True)
# 冻结住baseModel所有层的参数不学习
for layer in baseModel.layers:
    layer.trainable = False
# 编译
sgd = SGD(lr=0.01, decay=0.01 / 20, momentum=0.9, nesterov=True)
model.compile(loss="categorical_crossentropy", optimizer=sgd,metrics=["accuracy"])

  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [None]:
H = model.fit_generator(aug00.flow(x_train, y_train_oh, batch_size=32),
                        validation_data=(x_test, y_test_oh),
                        callbacks=[checkpoint],
                        steps_per_epoch=len(x_train) // 32,
                        epochs=20)



Epoch 1/20




Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20

In [None]:
# 绘制loss和accuracy曲线
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, 20), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, 20), H.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, 20), H.history["accuracy"], label="train_acc")
plt.plot(np.arange(0, 20), H.history["val_accuracy"], label="val_acc")
plt.title("Training Loss and Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend()
plt.show()

#F1-Score

In [None]:
# Classified evaluation report
preds = model.predict(x_test, batch_size=32)
print(classification_report(y_test_oh.argmax(axis=1),preds.argmax(axis=1), digits=4))

In [None]:
final_report = classification_report(y_test_oh.argmax(axis=1),preds.argmax(axis=1), digits=4)
report_pd = pd.read_csv(io.StringIO(final_report), delim_whitespace=True, error_bad_lines=False)

In [None]:
report_re = report_pd[:10]
report_re = report_re.reset_index()
report_re = report_re[["index", "f1-score"]]
report_re.columns=['class_name','f1_score']
#1-F1 score used for uodating the sample size 
report_re['1-f1_score'] = 1-report_re['f1_score']
ds_sum = report_re['1-f1_score'].sum() 
report_re["class_weight"] = report_re["1-f1_score"]/ds_sum
report_re["re_class_counts"] = (report_re["1-f1_score"]/ds_sum)*len(x_train)
report_re["row_distribution"] = pd.DataFrame(imbal_class_counts)
report_re['tt'] = report_re['re_class_counts'] - report_re['row_distribution']
report_re.loc[report_re["tt"]<0,"tt"] = report_re["re_class_counts"]
report_re002 = report_re[["class_name","class_weight","re_class_counts","row_distribution","tt"]]
report_re002["tt"] = round(report_re002["tt"])
report_re002.columns=['class_name',"class_weight","re_class_counts","row_distribution","update_num"]

In [None]:
def choosingImages(x_train,y_train):
  """
  x_train:Train Data
  y_train:Test Data
  """
  c_X_train=[]
  for i in range(10):
    print(i)
    class_indices_=[np.where(y_train == i)[0]]
    x_train_ = x_train[class_indices_]
    c_X_train.append(x_train_)
  return c_X_train

In [None]:
X_train_list=choosingImages(x_train,y_train)

In [None]:
def data_augmentation(img, number):
  image = np.expand_dims(img, axis=0)
  # Instantiate the image generator class to specify some common image augmentation parameters
  aug = ImageDataGenerator(rotation_range=30,
                width_shift_range=0.1,
                height_shift_range=0.1, 
                shear_range=0.2, 
                zoom_range=0.2,
                horizontal_flip=True,
                fill_mode="nearest")
    
  imageGen = aug.flow(image, batch_size=1)
  # aug.flow??

  total = 0
  imglist = []
  for x in imageGen:
    total += 1
    imglist.append(x)
    if total == number:
      break

  # imgback = np.array(imglist)
  return imglist

In [None]:
def img_add(x_train_i, num):
  img_list = []
  for image in x_train_i:
    img_list_i = data_augmentation(image, 20)
    img_list.extend(img_list_i)
  
  imgback = random.sample(img_list, num)
  imgback = np.array(imgback)
  final_x_train_i = np.vstack((x_train_i,imgback.squeeze()))
  return final_x_train_i


In [None]:
len(X_train_list[])

In [None]:
ds_change_0 = img_add(X_train_list[0], 188)
ds_change_1 = np.array(random.sample(list(X_train_list[1]), 502))
ds_change_2 = img_add(X_train_list[2], 345)
ds_change_3 = np.array(random.sample(list(X_train_list[3]), 886))
ds_change_4 = img_add(X_train_list[4], 362)
ds_change_5 = np.array(random.sample(list(X_train_list[5]), 858))
ds_change_6 = img_add(X_train_list[6], 359)
ds_change_7 = np.array(random.sample(list(X_train_list[7]), 665))
ds_change_8 = np.array(random.sample(list(X_train_list[8]), 477))
ds_change_9 = np.array(random.sample(list(X_train_list[9]), 857))

In [None]:
# x_train_new = np.vstack((ds_change_0,ds_change_1))
# x_train_new = np.vstack((x_train_new,ds_change_2))
# x_train_new = np.vstack((x_train_new,ds_change_3))
# x_train_new = np.vstack((x_train_new,ds_change_4))
# x_train_new = np.vstack((x_train_new,ds_change_5))
# x_train_new = np.vstack((x_train_new,ds_change_6))
# x_train_new = np.vstack((x_train_new,ds_change_7))
# x_train_new = np.vstack((x_train_new,ds_change_8))
# x_train_new = np.vstack((x_train_new,ds_change_9))


In [None]:
def new_array(row,nmb):
  new_arr = np.zeros((row,1))
  for i in range(row):
    new_arr[i] = np.array([nmb])
  return new_arr

def change_y(update__):
  """
  update_list:The update size relate to F1-score
  """
  update_list=[]
  for i in range(len(update__)):
    _y=new_array(update__[i],i)
    update_list.append(_y)
  y_train_new = np.vstack((update_list[0],update_list[1]))
  for j in range(len(update_list)):
     if(j>=2):
      y_train_new = np.vstack((y_train_new,update_list[j]))

  #One-hot coding
  y_train_new_oh = to_categorical(y_train_new)

  return y_train_new_oh

In [None]:
y_train_new_oh = change_y([688,502,845,886,862,858,859,665,477,857])

In [None]:
# 允许baseModel所有层的参数都可学习
for layer in baseModel.layers:
    layer.trainable = True

In [None]:
# 保存模型快照的回调函数
modelname = "final_cifar10-resnet50-weights-{epoch:03d}-{val_loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(modelname, monitor="val_loss", mode="min", save_best_only=True)

In [None]:
# 编译
sgd = SGD(lr=0.001, decay=0.001 / 20, momentum=0.9, nesterov=True)
model.compile(loss="categorical_crossentropy", optimizer=sgd,metrics=["accuracy"])

In [None]:
H = model.fit_generator(aug00.flow(x_train_new, y_train_new_oh, batch_size=32),
                        validation_data=(x_test, y_test_oh),
                        callbacks=[checkpoint],
                        steps_per_epoch=len(x_train) // 32,
                        epochs=20)

# H = model.fit(x_train_new, y_train_new_oh, batch_size=32,
#                         validation_data=(x_test, y_test_oh),
#                         callbacks=[checkpoint],
#                         epochs=20)

In [None]:

# 绘制loss和accuracy曲线
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, 20), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, 20), H.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, 20), H.history["accuracy"], label="train_acc")
plt.plot(np.arange(0, 20), H.history["val_accuracy"], label="val_acc")
plt.title("Training Loss and Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend()
plt.show()

In [None]:
# 分类评估报告
preds = model.predict(x_test, batch_size=32)
print(classification_report(y_test_oh.argmax(axis=1),preds.argmax(axis=1), digits=4))