In [None]:
import matplotlib
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from keras.models import Sequential
from keras.layers import Dropout
from keras.layers import Dense
from keras.optimizers import SGD
from keras import initializers
from keras import regularizers

import sys
if '../input/train-model/' not in sys.path:
    sys.path.append('../input/train-model/')
#sys.path
#del sys
#sys.path.remove('../input/train-model/')
print(sys.path)
from my_utils import utils_paths
#del utils_paths

import matplotlib.pyplot as plt
import numpy as np

import random
import pickle
import cv2
import os


In [None]:
# 输入参数（jupter中用它https://blog.csdn.net/bagba/article/details/122087316）
try:
    import argparse
    ap = argparse.ArgumentParser()
    ap.add_argument("-d","--dataset",#required=True,
                    default="../input/tuberculosis-tb-chest-xray-dataset/TB_Chest_Radiography_Database",
                    help="path to input dataset of images")
    ap.add_argument("-m","--model",#required=True,
                    default="/kaggle/working/",
                    help="path to output trained model ")
    ap.add_argument("-l","-label-bin",#required=True,
                    default=114,
                    help="path to output label binarizer")
    ap.add_argument("-p","--plot",#required=True,
                    default="/kaggle/working/",
                    help="path to output accuracy/loss plot")
    args = vars(ap.parse_args())
except:
    args={}
    args["dataset"] = "../input/tuberculosis-tb-chest-xray-dataset/TB_Chest_Radiography_Database"
    #初始化超参数
    args["INIT_LR"]=0.01 #可以调的超参数
    args["EPOCHS"]=200 #可以调的超参数
print(args)



In [None]:
print("[INFO] 开始读数据")

In [None]:
# 映射到图像数据路径，方便后续读取
imagePaths = sorted(list(utils_paths.list_images(args["dataset"])))

In [None]:
random.seed(42)

In [None]:
random.shuffle(imagePaths)

In [None]:
print(len(imagePaths),imagePaths[0:10])

In [None]:
# 遍历读取数据
data=[]
labels = []
for imagePath in imagePaths:
    
    # 由于使用全链接神经网络，需要给定成一维
    # 卖取图像数据
    image = cv2.imread(imagePath)
    image = cv2.resize(image,(32,32)).flatten()
    data.append(image)
    
    #读取标签
    label = imagePath.split(os.path.sep)[-2]
    labels.append(label)

In [None]:
#查看测试
print(labels[0:10])
print(data[0],len(data[0]),len(data),len(labels))
print(32*32*3)

In [None]:
# scale图像数据
data = np.array(data, dtype="float") / 255.0
#data = np.array(data, dtype="float") / 1.0 
# 我用这行代码换掉上面那行，用来测试scaling有没有用
# 结果发现，梯度下降不仅初期不稳定，而且最后还陷入了局部最优
labels = np.array(labels)

In [None]:
#数据集切分
(trainX,testX,trainY,testY) = train_test_split(
                        data,labels,
                        test_size=0.1,
                        random_state=42)

In [None]:
print(trainX[0:5,0:5],len(trainX),testX[0:5,0:5],len(testX),
      trainY[0:5],len(trainY),testY[0:5],len(testY))

print(trainX.shape,testX.shape,
      trainY.shape,testY.shape)


In [None]:
#转换标签，one-hot格式
lb = LabelBinarizer()

trainY = lb.fit_transform(trainY) 
testY = lb.transform(testY)

In [None]:
#二分类问题要加上这两句，否则它出来的维度是（n_sample,1），而不是（n_sample,2），后面model.fit的时候要报错的
from keras.utils import to_categorical
trainY = to_categorical(trainY, 2)
testY = to_categorical(testY,2)

In [None]:
print(trainY,trainY.shape)
print(testY,testY.shape)

In [None]:
print(trainX.shape,testX.shape,
      trainY.shape,testY.shape)

In [None]:
## 建立模型
#网络模型结构:3072-512-256-3
model = Sequential()
# #kernel regularizer=regularizers,12(0.01)
# keras.initializers.TruncatedNormal(mean=0.0，stddey=0.05， seed=None)
# #initializers.random normal
# model.add(Dronout(0.8))
model.add(Dense(512,input_shape=(3072,),activation="relu")) #超参数input layer neuron number=512可调
model.add(Dense(256,activation="relu",)) #超参数hidden layer neuron number=256可调
model.add(Dense(len(lb.classes_),activation="softmax",))

In [None]:
print(lb.classes_)
print(len(lb.classes_))

In [None]:
#设置超参数
INIT_LR=args["INIT_LR"]
EPOCHS=args["EPOCHS"]

In [None]:
print(INIT_LR,EPOCHS)

In [None]:
print("[INFO] 准备训练网络罢！（喜）")


In [None]:
# 给定损失函数和评估方法
from tensorflow.keras.optimizers import Adam
opt = Adam(lr=INIT_LR) #不一定要用这种优化方式
model.compile(loss="categorical_crossentropy",
              optimizer=opt,
              metrics=["accuracy"]) #?？?可以再添加新指标，或者手算新指标
# 训练网络模型
H = model.fit(trainX,trainY,validation_data=(testX, testY),epochs=EPOCHS,batch_size=32)
#测试网络模型
print("[INFO] 正在评估模型")