In [1]:
import tensorflow as tf
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Input, Conv2D, ReLU, Flatten, Dense, Softmax,SimpleRNN
from tensorflow.python.keras.layers import BatchNormalization,Activation,Add,Dropout,Reshape
from tensorflow.python.keras.layers import GlobalAveragePooling2D,MaxPooling2D
from tensorflow.python.keras.optimizers import adam_v2
from tensorflow.keras.optimizers import SGD,Nadam
from tensorflow.python.keras.callbacks import EarlyStopping,ModelCheckpoint, ReduceLROnPlateau
import numpy as np
import matplotlib.pyplot as plt
import os

from sklearn.model_selection import train_test_split
tf.__version__
df = open('./CSVs/dan_train.csv').read().splitlines()
games = [i.split(',',2)[-1] for i in df]
chars = 'abcdefghijklmnopqrs'
coordinates = {k:v for v,k in enumerate(chars)}
chartonumbers = {k:v for k,v in enumerate(chars)}
def prepare_input(moves):
    x = np.zeros((19,19,4))
    for move in moves:       
        color = move[0]
        column = coordinates[move[2]]
        row = coordinates[move[3]]
        if color == 'B':
            x[row,column,0] = 1
            x[row,column,2] = 1
        if color == 'W':
            x[row,column,1] = 1
            x[row,column,2] = 1
    if moves:
        last_move_column = coordinates[moves[-1][2]]
        last_move_row = coordinates[moves[-1][3]]
        x[row,column,3] = 1
    x[:,:,2] = np.where(x[:,:,2] == 0, 1, 0)
    return x

def prepare_label(move):
    column = coordinates[move[2]]
    row = coordinates[move[3]]
    return column*19+row

#數據生成器:用來训练神经网络时逐批次地加载数据
#---------------------------------------------------------------------------
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, games, batch_size):
        self.games = games
        self.batch_size = batch_size
        self.index = 0

    def __len__(self):
        return sum(len(game.split(',')) for game in self.games) // self.batch_size

    def __getitem__(self, index):
        x_batch, y_batch = [], []
        while len(x_batch) < self.batch_size:
            game = self.games[self.index]
            moves_list = game.split(',')
            for count, move in enumerate(moves_list):
                if len(x_batch) < self.batch_size:
                    x_batch.append(prepare_input(moves_list[:count]))
                    y_batch.append(prepare_label(moves_list[count]))
                else:
                    break  # Break when the batch is full
            self.index = (self.index + 1) % len(self.games)  # Move to the next game
        
        x_batch = np.array(x_batch)
        y_batch = np.array(y_batch)
        y_batch_one_hot = tf.one_hot(y_batch, depth=19*19)
        
        return x_batch, y_batch_one_hot

    def on_epoch_end(self):
        np.random.shuffle(self.games)
#---------------------------------------------------------------------------

# Check how many samples can be obtained
n_games = 0
n_moves = 0
for game in games:
    n_games += 1
    moves_list = game.split(',')
    for move in moves_list:
        n_moves += 1
print(f"Total Games: {n_games}, Total Moves: {n_moves}")

Total Games: 100160, Total Moves: 22853380


In [2]:
def Conv_BN_Relu(filters, kernel_size, strides, input_layer):
    x = Conv2D(filters, kernel_size, strides=strides, padding='same')(input_layer)
    x = tf.keras.layers.BatchNormalization()(x)
    x = Activation('relu')(x)
    return x

# ResNet18網絡
def resiidual_a_or_b(input_x, filters, flag):
    #殘差模塊a
    if flag == 'a':
        # 主路
        x = Conv_BN_Relu(filters, (3, 3), 1, input_x)
        x = Conv_BN_Relu(filters, (3, 3), 1, x)
        # 输出
        y = Add()([x, input_x])
        return y

    #殘差模塊b
    elif flag == 'b':
        # 主路
        x = Conv_BN_Relu(filters, (3, 3), 2, input_x)
        x = Conv_BN_Relu(filters, (3, 3), 1, x)
        # 支路下採樣
        input_x = Conv_BN_Relu(filters, (1, 1), 2, input_x)
        # 输出
        y = Add()([x, input_x])
        return y
def create_model():
    # 第一层
    input_layer = Input((19, 19, 4))
    x = Conv_BN_Relu(128,(3, 3), 1, input_layer)
    x = Conv_BN_Relu(128,(3, 3), 1, x)
    
    # conv2_x(
    x = resiidual_a_or_b(x, 256, 'b')
    x = Dropout(0.2)(x)
    x = resiidual_a_or_b(x, 256, 'a')
    
    #conv3_x
    x = resiidual_a_or_b(x, 512, 'b')
    x = Dropout(0.2)(x)
    x = resiidual_a_or_b(x, 512, 'a')

    x = GlobalAveragePooling2D()(x)

    
    # RNN层
    rnn_units = 512  # 选择RNN层的单元数
    x_rnn = Reshape(target_shape=(19 * 19, 4))(input_layer)  # 将输入形状转换为适合RNN的形状
    x_rnn = SimpleRNN(units=rnn_units)(x_rnn)
    x_rnn = Dense(512, activation='relu')(x_rnn)
    

    # 合并RNN输出和之前的模型输出
    x = Dense(rnn_units)(x)
    x = Add()([x, x_rnn])
    
    # 最后一层
    x = Flatten()(x)
    x = Dense(600)(x)
    x = Dropout(0.5)(x)
    x = Dense(400)(x)
    x = Dropout(0.5)(x)
    x = Dense(361)(x)
    y = Softmax(axis=-1)(x)
    model = Model([input_layer], [y])

    optimizer = 'adam'
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

    return model

model = create_model()
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 19, 19, 4)]  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 19, 19, 128)  4736        input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 19, 19, 128)  512         conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 19, 19, 128)  0           batch_normalization[0][0]        
______________________________________________________________________________________________

In [3]:
start = 1
end = 30000
batch_size = 1024
train_losses = []
val_losses = []
train_accuracy = []
val_accuracy = []

'''
這裡內容為訓練中斷,需要重新讀取權重,繼續訓練
# 尋找所有文件
saved_models = os.listdir('./results/Dan/')
# 找到具有最低验证损失的模型文件
best_model = min(saved_models, key=lambda x: float(x.split('_')[2]) if x.startswith('Dan_loss-accuracy') else float('inf'))
# 构建最佳模型的路径
best_model_path = os.path.join('./results/Dan/', best_model)
print("加载模型:",best_model_path)
# 加载最佳模型的权重
model.load_weights(best_model_path)
''' 

# 定义 EarlyStopping 回调 :回调参数
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

for i in range(3):
    print("开始训练:", start, "-", end)
    
    # 定义 ModelCheckpoint 回调 :回调保存
    checkpoint = ModelCheckpoint(
        f'./results/Dan/Dan_loss-accuracy_{{val_loss:.4f}}_{{val_accuracy:.4f}}_{start}_{end}.h5',
        monitor='val_loss',
        save_best_only=True)
    
    # 將數據分成訓練和驗證集
    games_train, games_val = train_test_split(games[start:end], test_size=0.10,random_state=42)

    # 創建數據生成器實例
    train_generator = DataGenerator(games_train, batch_size)
    val_generator = DataGenerator(games_val, batch_size) 
    

    
    #用数据生成器进行训练
    history = model.fit(
        x=train_generator,
        epochs=100,
        validation_data=val_generator, 
        callbacks=[checkpoint, early_stopping],
        shuffle=True
    )
    
    # 尋找所有文件
    saved_models = os.listdir('./results/Dan/')
    # 找到具有最低验证损失的模型文件
    best_model = min(saved_models, key=lambda x: float(x.split('_')[2]) if x.startswith('Dan_loss-accuracy') else float('inf'))
    # 构建最佳模型的路径
    best_model_path = os.path.join('./results/Dan/', best_model)
    print("加载模型:",best_model_path)
    # 加载最佳模型的权重
    model.load_weights(best_model_path)
    
    
    
    
    #將每個 epoch 的值添加到列表中
    train_losses.extend(history.history['loss'])
    val_losses.extend(history.history['val_loss'])
    train_accuracy.extend(history.history['accuracy'])
    val_accuracy.extend(history.history['val_accuracy'])
    
    
    start=start+30000;
    end=end+30000;
    if (end==90000):
        end=110000

加载模型: results/Dan_loss-accuracy_2.6725_0.4313_1_30000.h5
开始训练: 1 - 30000
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
加载模型: results/Dan_loss-accuracy_2.3816_0.4681_1_30000.h5
开始训练: 30001 - 60000
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
加载模型: results/Dan_loss-accuracy_2.3171_0.4787_30001_60000.h5
开始训练: 60001 - 110000
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
 155/8028 [..............................] - ETA: 4:12:33 - loss: 2.2593 - accuracy: 0.4917

KeyboardInterrupt: 

# 

# 以下內容為訓練中斷,需要重新讀取權重,繼續訓練的重複內容,無需進行執行

# 

In [3]:
'''
start = 60001
end = 110000
batch_size = 1024
train_losses = []
val_losses = []
train_accuracy = []
val_accuracy = []


# 尋找所有文件
saved_models = os.listdir('./results/Dan/')
# 找到具有最低验证损失的模型文件
best_model = min(saved_models, key=lambda x: float(x.split('_')[2]) if x.startswith('Dan_loss-accuracy') else float('inf'))
# 构建最佳模型的路径
best_model_path = os.path.join('./results/Dan/', best_model)
print("加载模型:",best_model_path)
# 加载最佳模型的权重
model.load_weights(best_model_path)

# 定义 EarlyStopping 回调 :回调参数
early_stopping = EarlyStopping(monitor='val_loss', patience=6, restore_best_weights=True)

for i in range(3):
    print("开始训练:", start, "-", end)
    
    # 定义 ModelCheckpoint 回调 :回调保存
    checkpoint = ModelCheckpoint(
        f'./results/Dan/Dan_loss-accuracy_{{val_loss:.4f}}_{{val_accuracy:.4f}}_{start}_{end}.h5',
        monitor='val_loss',
        save_best_only=True)
    
    # 將數據分成訓練和驗證集
    games_train, games_val = train_test_split(games[start:end], test_size=0.10,random_state=42)

    # 創建數據生成器實例
    train_generator = DataGenerator(games_train, batch_size)
    val_generator = DataGenerator(games_val, batch_size) 
    

    
    #用数据生成器进行训练
    history = model.fit(
        x=train_generator,
        epochs=100,
        validation_data=val_generator, 
        callbacks=[checkpoint, early_stopping,
                  ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=1e-7)],
        shuffle=True
    )
    
    # 尋找所有文件
    saved_models = os.listdir('./results/Dan/')
    # 找到具有最低验证损失的模型文件
    best_model = min(saved_models, key=lambda x: float(x.split('_')[2]) if x.startswith('Dan_loss-accuracy') else float('inf'))
    # 构建最佳模型的路径
    best_model_path = os.path.join('./results/Dan/', best_model)
    print("加载模型:",best_model_path)
    # 加载最佳模型的权重
    model.load_weights(best_model_path)
    
    
    
    
    #將每個 epoch 的值添加到列表中
    train_losses.extend(history.history['loss'])
    val_losses.extend(history.history['val_loss'])
    train_accuracy.extend(history.history['accuracy'])
    val_accuracy.extend(history.history['val_accuracy'])
    
    
    start=start+30000;
    end=end+30000;
    if (end==90000):
        end=110000
'''

加载模型: results/Dan_loss-accuracy_2.2397_0.4887_60001_110000.h5
开始训练: 60001 - 110000
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
加载模型: results/Dan_loss-accuracy_2.1903_0.4957_60001_110000.h5
开始训练: 90001 - 140000
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
加载模型: results/Dan_loss-accuracy_1.8483_0.5526_90001_140000.h5
开始训练: 120001 - 170000


ValueError: With n_samples=0, test_size=0.1 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.