In [1]:
import csv
import numpy as np
import tensorflow.keras
import tensorflow as tf
import keras.backend as K
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
import tensorflow_addons as tfa
#from tensorflow.keras.utils import np_utils
from tensorflow.keras.utils import to_categorical
from datetime import date, datetime
import matplotlib.pyplot as plt
path = ''
# path = 'data 2'
num_classes = 11
input_dim = 31
train_epochs = 6200
tf.random.set_seed(1000)


def read_datasets(path):
    with open(path, newline='') as csvfile:
        rows = csv.reader(csvfile)
        data = []
        for row in rows:
            data.append(row)
        print('load success from', path)
        return data[1:]


def split_data(train_data):
    x_train = train_data
    y_train = []
    print(len(train_data))
    for row in x_train:
        result = row.pop()
        y_train.append(result)
    return x_train, y_train


def process_data(x_train, x_test):
    new_x_train = []
    for data in x_train:
        new_data = []
        data = [data[1]]+data[4:]
        for value in data:
            new_data.append(float(value))
        new_x_train.append(new_data)

    new_x_test = []
    for data in x_test:
        new_data = []
        data_ = [data[1]]+data[4:]
        for value in data_:
            new_data.append(float(value))
        new_data = new_data[:input_dim]
        new_x_test.append(new_data)

    return new_x_train, new_x_test

def predict(x_test, model):
    y_test = []
    predict_results = model.predict(x_test)
    for result in predict_results:
        y_test.append(np.argmax(result))
    return y_test

def train_model_and_predict(x_train, x_test, y_train):
    model = Sequential()
    model.add(Dense(units=128, input_dim=input_dim,
              kernel_initializer='normal', activation='relu'))
    model.add(Dense(units=128, kernel_initializer='normal', activation='relu'))
    model.add(Dense(units=256, kernel_initializer='normal', activation='relu'))
    model.add(Dense(units=64, kernel_initializer='normal', activation='relu'))
    
    

    model.add(Dense(units=num_classes,
              kernel_initializer='normal', activation='softmax'))
    kap = tfa.metrics.CohenKappa(num_classes=num_classes, sparse_labels=False)
    adam = tensorflow.keras.optimizers.Adam(
        learning_rate=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False, name='Adam')
    model.compile(loss='categorical_crossentropy',
                  optimizer=adam, metrics=['accuracy', kap])
    train_history = model.fit(x=x_train, y=y_train, validation_split=0.1,
                              epochs=train_epochs, batch_size=100, verbose=1)

    y_test = predict(x_test, model)
    return y_test, model, train_history


def plot(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    kap = history.history['cohen_kappa']
    val_kap = history.history['val_cohen_kappa']

    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs_range = range(train_epochs)

    plt.figure(figsize=(16, 6))
    plt.subplot(1, 3, 1)
    plt.plot(epochs_range, acc, label='Training Accuracy')
    plt.plot(epochs_range, val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

    plt.subplot(1, 3, 2)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')

    plt.subplot(1, 3, 3)
    plt.plot(epochs_range, kap, label='Training Kappa')
    plt.plot(epochs_range, val_kap, label='Validation Kappa')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Kappa')
    plt.savefig('Result')
    plt.show()

def print_kappa_matrix(y_train, y_pred):
    yt = tf.reshape(tf.cast(K.round(y_train), dtype=tf.int32), shape=(319,))
    yp = tf.reshape(K.round(y_pred), shape=(319,))
    akap = tfa.metrics.CohenKappa(num_classes=num_classes, sparse_labels=True)
    a = akap.update_state(yt, yp)
    print(f'Kappa Value = {float(akap.result())}')
    print('Kappa Matrix')
    print(np.array(a))



In [4]:
print('Loading datasets...')
train_data = read_datasets('preprocess_train拷貝.csv')
x_train, y_train_ = split_data(train_data)
test_data = read_datasets('preprocess_test拷貝.csv')




Loading datasets...
load success from preprocess_train拷貝.csv
319
load success from preprocess_test拷貝.csv


In [8]:
print(type(train_data))

<class 'list'>


In [None]:
x_test, y_test = test_data, []

print(len(x_train), len(x_test), len(y_train_), len(y_test))
x_train, x_test = process_data(x_train, x_test)

print('Reshaping data...')
x_train = np.array(x_train, dtype=float)
print('x_train', x_train.shape)
x_test = np.array(x_test, dtype=float)
print('x_test', x_test.shape)
y_train_ = np.array(y_train_, dtype=int)
print('y_train', y_train_.shape)

y_train = to_categorical(y_train_, num_classes)

In [4]:
print(type(x_train))

<class 'numpy.ndarray'>


In [None]:
data["concavity_mean"]=((data["concavity_mean"]-data["concavity_mean"].min())/(data["concavity_mean"].max()-data["concavity_mean"].min()))*20

In [None]:
def NormalizeData(data):
    return (data - np.min(data)) / (np.max(data) - np.min(data))

scaled_x = NormalizeData(x_train)

print(scaled_x)

In [None]:
print('Training data...')
y_test, model, history = train_model_and_predict(x_train, x_test, y_train)
y_pred = predict(x_train, model)
plot(history)
print_kappa_matrix(y_train_, y_pred)
print('Predict_result: ', y_test)


In [None]:
# import csv
# import numpy as np
# import tensorflow.keras

# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Dense, Activation
# import tensorflow_addons as tfa
# #from tensorflow.keras.utils import np_utils
# from tensorflow.keras.utils import to_categorical
# from datetime import date, datetime

# num_classes = 11
# input_dim = 30
# train_epochs = 175


# def read_datasets(path):
#     with open(path, newline='') as csvfile:
#         rows = csv.reader(csvfile)
#         data = []
#         for row in rows:
#             data.append(row)
#         print('load success from', path)
#         return data[1:]

# def print_kappa_matrix(y_train, y_pred):
#     yt = tf.reshape(tf.cast(K.round(y_train), dtype=tf.int32), shape=(319,))
#     yp = tf.reshape(K.round(y_pred), shape=(319,))
#     akap = tfa.metrics.CohenKappa(num_classes=num_classes, sparse_labels=True)
#     a = akap.update_state(yt, yp)
#     print(f'Kappa Value = {float(akap.result())}')
#     print('Kappa Matrix')
#     print(np.array(a))


# def split_data(train_data):
#     x_train = train_data
#     y_train = []
#     print(len(train_data))
#     for row in x_train:
#         result = row.pop()
#         y_train.append(result)
#     return x_train, y_train


# def process_data(x_train, x_test):
#     new_x_train = []
#     for data in x_train:
#         new_data = []
#         data = [data[1]]+data[4:]
#         for value in data:
#             new_data.append(float(value))
#         new_x_train.append(new_data)

#     new_x_test = []
#     for data in x_test:
#         new_data = []
#         data = [data[1]]+data[4:]
#         for value in data_:    
#             new_data.append(float(value))
#         new_data = new_data[:30]
#         new_x_test.append(new_data)

#     return new_x_train, new_x_test


# def train_model_and_predict(x_train, x_test, y_train):
#     model = Sequential()
#     model.add(Dense(units=256, input_dim=input_dim, kernel_initializer='normal', activation='relu'))
#     model.add(Dense(units=256, kernel_initializer='normal', activation='relu'))
#     model.add(Dense(units=256, kernel_initializer='normal', activation='relu'))

#     model.add(Dense(units=num_classes, kernel_initializer='normal', activation='softmax'))
#     kap = tfa.metrics.CohenKappa(num_classes=num_classes, sparse_labels=False)
#     #adam = tensorflow.keras.optimizers.Adam( learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False, name='Adam')
#     model.compile(loss='categorical_crossentropy',
#                   optimizer='adam', metrics=['accuracy'])
#     train_history = model.fit(x=x_train, y=y_train, validation_split=0.2, epochs=train_epochs, batch_size=32, verbose=1)

#     y_test = []
#     predict_results = model.predict(x_test)
#     for result in predict_results:
#         y_test.append(np.argmax(result))
#     return y_test, model


# def write_submission_file(result):
#     submission_rows = []
#     with open('./data 2/submission.csv', newline='') as csvfile:
#         rows = csv.reader(csvfile)
#         for row in rows:
#             submission_rows.append(row)

#     for i in range(len(result)):
#         submission_rows[i+1][1] = result[i]

#     now = datetime.now()
#     path = './submission_' + now.strftime("%Y-%m-%d_%H:%M:%S") + '.csv'
#     file = open(path, 'w')
#     writer = csv.writer(file)
#     for row in submission_rows:
#         writer.writerow(row)
#     file.close()



    
# print('Loading datasets...')
# train_data = read_datasets('preprocess_train.csv')
# x_train, y_train = split_data(train_data)
# test_data = read_datasets('preprocess_test.csv')
# x_test, y_test = test_data, []

# print(len(x_train), len(x_test), len(y_train), len(y_test))
# x_train, x_test = process_data(x_train, x_test)

# print('Reshaping data...')
# x_train = np.array(x_train, dtype=float)
# print('x_train', x_train.shape)
# x_test = np.array(x_test, dtype=float)
# print('x_test', x_test.shape)
# y_train = np.array(y_train, dtype=int)
# print('y_train', y_train.shape)

# y_train = to_categorical(y_train, num_classes)

# x_train[:,1] /= np.max(np.abs(x_train[:,1]))
# x_test[:,1] /= np.max(np.abs(x_test[:,1]))

# print('Training data...')
# y_test, model = train_model_and_predict(x_train, x_test, y_train)
# print('Predict_result: ', y_test)
    
    



In [None]:
print('Predict_result: ', y_test)

In [None]:
# 讀取 CSV File
import pandas as pd # 引用套件並縮寫為 pd  
output = pd.read_csv('data2/submission.csv')  
  

In [None]:
len = len(y_test)

In [None]:
for i in range(len):
    output['LEVEL'][i] = y_test[i]

In [None]:
print(output)

In [None]:
output.to_csv("submission.csv", index = False)