In [None]:
#!/usr/bin/env python
# coding: utf-8

import gc
import time 
import scipy.io 
import numpy as np
import pandas as pd
import seaborn as sns 
import tensorflow as tf
import tqdm as notebook_tqdm
import matplotlib.pyplot as plt
from numpy import ravel
from sklearn.preprocessing import OneHotEncoder,KBinsDiscretizer,LabelEncoder,OrdinalEncoder
from sklearn.preprocessing import normalize
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import confusion_matrix,classification_report
from tensorflow.keras import layers, models
from keras.callbacks import EarlyStopping, ModelCheckpoint


print("="*100)
print("Now keras tensorflow 1D CNN for MRL+STD strians classification is carried out...\n")
## Set memory_limit and GPU
using_gpu_index = 0 # 使用的 GPU 号码
gpu_list = tf.config.experimental.list_physical_devices('GPU')
if len(gpu_list) > 0:
    try:
        tf.config.experimental.set_virtual_device_configuration(
            gpu_list[using_gpu_index], 
            [tf.config.LogicalDeviceConfiguration(memory_limit=5120)]
        )
        print("We Got GPUs")
    except RuntimeError as e:
        print(e)
else:
    print("Got no GPUs")


# load dataset
data2 = pd.read_csv('../Raman_MRL+STD_data/Raman_MRL+STD_data_400to1799_preprocessed_strains_1.csv',sep=',', header=0, names=None, index_col=None)

X2 = data2.iloc[:,:-1]
Y = data2.iloc[:, -1]

# Encode the labels into unique integers
encoder = LabelEncoder()
Y2 = encoder.fit_transform(ravel(Y))
label_code = set(zip(Y2,Y))
label_code = {k:v for (k,v) in sorted(label_code, key=lambda label_code : label_code[0])} 
#print("\nlabel_code:\n",label_code)

encoder = OneHotEncoder()
Y3 = encoder.fit_transform(Y2.reshape(-1, 1)).toarray()
print(Y3)

# label_code2 = set(zip(Y3,Y))
# label_code2 = {k:v for (k,v) in sorted(label_code2, key=lambda label_code2 : label_code2[0])} 
# print("\nlabel_code:\n",label_code2)

X = np.array(X2)
Y = np.array(Y)
Y_CNN = Y3
n_class = 199

print('Shape of Input Data =', X.shape)
print('Shape of Label Y_CNN =', Y_CNN.shape)
print('Shape of Label Y =', Y.shape)
print('Number of Classification n_class =', n_class)

gc.collect()
print("\nDataset is Loaded.\n")
print("="*100)
print("\nStart training...")

kSplits = 15

kfold = KFold(n_splits=kSplits, random_state=32, shuffle=True)
Input_1D = X.reshape([-1,467,1])
X_1D_train, X_1D_test, y_1D_train, y_1D_test = train_test_split(Input_1D, Y_CNN, train_size=0.8,test_size=0.2, random_state=101)

class CNN_1D():
    def __init__(self):
        self.model = self.CreateModel()

    def CreateModel(self):
        att='relu'# The best activation of relu.
        model = models.Sequential([
            layers.Conv1D(filters=16, kernel_size=3, strides=1, activation=att),
            layers.BatchNormalization(),
            layers.Conv1D(filters=32, kernel_size=3, strides=1, activation=att),
            layers.BatchNormalization(),
            layers.Conv1D(filters=64, kernel_size=3, strides=1, activation=att),
            layers.BatchNormalization(),
            layers.Conv1D(filters=128, kernel_size=3, strides=1, activation=att),
            layers.BatchNormalization(),
            layers.Conv1D(filters=256, kernel_size=3, strides=1, activation=att),
            layers.BatchNormalization(),
            layers.Flatten(),
            layers.InputLayer(),
            layers.BatchNormalization(),
            layers.Dense(1024,activation=att),
            layers.BatchNormalization(),
            layers.Dense(512,activation=att),
            layers.BatchNormalization(),
            layers.Dense(256,activation=att),
            layers.BatchNormalization(),
            layers.Dense(199),
            layers.Softmax()
            ])
    
        model.compile(optimizer='adam', # adam ,adamax,Nadam,RMSprop,SGD   X Adadelta,adagrad
                  loss=tf.keras.losses.CategoricalCrossentropy(),
                  metrics=['accuracy'])
                  
        #model.summary()
        
        return model
        
earlystop = EarlyStopping(monitor="val_accuracy", 
                                   min_delta=0.0001,
                                   patience=200,
                                   restore_best_weights=True)

# 模型训练
i = 0
loss_1D = []
accuracy_1D = []
best_accuracy = 0.0
for train, validation in kfold.split(X_1D_train,y_1D_train):
    i += 1
    print("\nLoops: ",i)
    if i== 1:
        Classification_1D = CNN_1D()
    else:
        Classification_1D = tf.keras.models.load_model('Raman-keras-tf-1D-CNN-MRL+STD_data1_Strains_model.h5')
        
    history = Classification_1D.model.fit(X_1D_train[train], y_1D_train[train], 
                                        verbose=2, 
                                        epochs=2000
                                        batch_size=32,
                                        validation_data=(X_1D_train[validation], y_1D_train[validation]),
                                        callbacks=[earlystop], 
                                        validation_freq=1,
                                       )
    kf_loss, kf_accuracy = Classification_1D.model.evaluate(X_1D_test, y_1D_test,
                                                            verbose=1,
                                                            batch_size=32,) 
    loss_1D.append(kf_loss)
    accuracy_1D.append(kf_accuracy)
    Classification_1D.model.save("Raman-keras-tf-1D-CNN-MRL+STD_data1_Strains_model-loops"+str(i)+"-"+str(kf_accuracy)[:6]+".h5")
    if kf_accuracy > best_accuracy: 
        del Classification_1D
        gc.collect()
        Classification_1D.model.save('Raman-keras-tf-1D-CNN-MRL+STD_data1_Strains_model.h5')
        
        
print("\nThe training is normally terminated.\n")  
print("="*100)
print("\nShow finial training results:")

CNN_1D_train_loss = np.average(loss_1D)
print('CNN 1D train loss =', CNN_1D_train_loss)

CNN_1D_train_accuracy = np.average(accuracy_1D)*100
print('CNN 1D train accuracy =', CNN_1D_train_accuracy)

CNN_1D_test_loss, CNN_1D_test_accuracy = Classification_1D.model.evaluate(X_1D_test, y_1D_test)
CNN_1D_test_accuracy*=100
print('CNN 1D test loss =', CNN_1D_test_loss)
print('CNN 1D test accuracy =', CNN_1D_test_accuracy)

# 保存模型
# Classification_1D.model.save('Raman-keras-tf-1D-CNN-MRL+STD_data1_Strains_model.h5')
# filepath = "Raman-keras-tf-1D-CNN-MRL+STD_data1_Strains_model-last.h5" #模型写入名字带dict的epoch，val_accuracy，还可以是val_loss
# checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, save_weights_only=False, mode='auto')
 
# 模型加载
# best_model=tf.keras.models.load_model('Raman-keras-tf-1D-CNN-MRL+STD_data1_Strains_model.h5')

def ConfusionMatrix(model,X,Y,job="train"):
    '''
    '''
    import time
    
    localtime = time.strftime("%Y%m%d%H%M", time.localtime())
    Y_true = np.argmax(Y, axis=1)
    Y_pred = np.argmax(model.predict(X), axis=1)
    print(classification_report(Y_true,Y_pred))
    
    ConfusionMat1 = confusion_matrix(Y_true,Y_pred)
    print(ConfusionMat1.shape)
    print(ConfusionMat1)

    plt.figure(figsize=(int(ConfusionMat1.shape[0]/2),int(ConfusionMat1.shape[1]/2)))
    plt.title('Confusion Matrix - '+job) 
    plt.xticks(fontsize=8)
    plt.yticks(fontsize=8)
    sns.heatmap(ConfusionMat1,annot=True,fmt='d',annot_kws={"fontsize":7},cmap="Greens")
    plt.savefig("ConfusionMatrix_"+job+"_"+localtime+"-1.png", dpi=300)
    #plt.show()
    
    df_cm = pd.DataFrame(ConfusionMat1)
    df_cm.to_csv("ConfusionMatrix_"+job+"_"+localtime+"-1.csv")
    
    ConfusionMat2 = ConfusionMat1.astype(np.float64)
    ConfusionMat2 /= np.sum(ConfusionMat2, axis=1)
    ConfusionMat2 *= 100
    print(ConfusionMat2)

    plt.figure(figsize=(int(ConfusionMat2.shape[0]/2),int(ConfusionMat2.shape[1]/2)))
    plt.title('Confusion Matrix - '+job) 
    plt.xticks(fontsize=8)
    plt.yticks(fontsize=8)
    sns.heatmap(ConfusionMat2,annot=True,fmt="5.2f",annot_kws={"fontsize":7},cmap="Greens")   # for int data,fmt=".d",
    plt.savefig("ConfusionMatrix_"+job+"_"+localtime+"-2.png", dpi=300)
    #plt.show()

    df_cm = pd.DataFrame(ConfusionMat2)
    df_cm.to_csv("ConfusionMatrix_"+job+"_"+localtime+"-2.csv")

    #return ConfusionMat1,ConfusionMat2
        
localtime = time.strftime("%Y%m%d%H%M", time.localtime())

ConfusionMatrix(Classification_1D.model, X_1D_test, y_1D_test, job="keras-tf-1D-CNN-test" )
ConfusionMatrix(Classification_1D.model, X_1D_train,y_1D_train,job="keras-tf-1D-CNN-train")

