In [12]:
import numpy as np
import chess
import chess.pgn
from chess.engine import PovScore, Cp
from io import StringIO,TextIOWrapper
import h5py
import sys,os
import zstandard as zstd

We prepare the data in several stages.

1. Extract valid games (see below) into pgn files using string operations.
2. Create different hdf5 files for training, validation and testing containing (game,label) tensors for different time controls from multiple pgn files, filtering out invalid games using the python chess library.
3. Create oversampled training files.

Valid game files are based on the `TimeControl "{a time control}"` field, whether the term `[%eval` is in the string and whether `BlackRatingDiff` and `WhiteRatingDiff` is below a certain value as well as whether `Termination` is *not* `Abandoned` or `Rules nfraction`.

If all the above conditions are met we utilise the `Board` class to parse the pgn, also checking that game length is above some minimum.

In [2]:
#stores a time-control:file string. The time-control is a regex string that matches the time-control of a game. The file string is the name of the file that the game should be saved to.

file_dict = {"300+0":"blitz",
             "300+3":"blitz",
             "60+0":"ultrabullet",
             "120+1":"bullet",
             "180+0":"superblitz",
             "180+2":"superblitz",
             "600+0":"rapid",
             "600+5":"rapid",
             "900+10":"rapid"
}

#the maximum rating diff above which we ignore the game
MAX_RATING_DIFF = 40

#Termination strings that we ignore
TERMINATION_STRINGS=set(["Abandoned","Rules infraction"])

NUM_MOVES = 40 #number of moves to consider for each game

In [3]:
def get_game_tensor(game_string):
    """returns a tensor representation of the game string. If the game is invalid, returns None. Note that a valid game will have 2 game tensors, one for each player. We also return the ratings of the players and the file that the game should be saved to."""

    #start by checking if the game is valid. The time control is a substring of the form 'TimeControl "{TC}"' where {TC} is a variable, check if {TC} is in the file_dict.

    time_control = game_string.split('TimeControl "')[1].split('"')[0]
    if time_control not in file_dict:
        return None
    
    valid = False
    
    if '[%eval' in game_string and 'WhiteRatingDiff' in game_string and 'BlackRatingDiff' in game_string:
        white_diff = int(game_string.split('WhiteRatingDiff "')[1].split('"')[0])
        black_diff = int(game_string.split('BlackRatingDiff "')[1].split('"')[0])
        if abs(white_diff) < MAX_RATING_DIFF and abs(black_diff) < MAX_RATING_DIFF:
            valid = True
    if not valid:
        return None
    
    #check for termination strings
    for term in TERMINATION_STRINGS:
        if term in game_string:
            return None
    
    ########prepare the game tensors
    gt1 = np.zeros((NUM_MOVES,136),dtype=np.int16)
    gt2 = np.zeros((NUM_MOVES,136),dtype=np.int16)

    game = chess.pgn.read_game(StringIO(game_string))

    board = game.board()
    white_time = 0
    black_time = 0

    move_number = 0

    current_eval = PovScore(Cp(0), chess.WHITE)
    current_move_color = chess.WHITE
    while True:
        t = np.zeros(136)

        for i in range(64):
            if board.piece_at(i) is None:
                t[i] = 0
            elif board.piece_at(i).color == current_move_color:
                t[i] = board.piece_at(i).piece_type
            else:
                t[i] = board.piece_at(i).piece_type + 7

        # get the evaluation, time etc.
        t[128] = move_number // 2  # move number

        t[129] = white_time if current_move_color == chess.WHITE else black_time

        t[131] = black_time if current_move_color == chess.WHITE else white_time

        if current_eval is None: #mate in 0
            t[135] = 1
            t[134] = 0
        elif current_eval.pov(current_move_color).is_mate(): #mate in X
            t[133] = 1
            t[132] = current_eval.pov(current_move_color).mate()
        else:
            t[133] = 0
            t[132] = current_eval.pov(current_move_color).score()

        if move_number == 0:
            m = game.next()
        else:
            m = m.next()
        if m is None:
            break

        if current_move_color == chess.WHITE:
            white_time = m.clock()
        else:
            black_time = m.clock()

        current_eval = m.eval()
        board = m.board()

        for i in range(64):
            if board.piece_at(i) is None:
                t[i + 64] = 0
            elif board.piece_at(i).color == current_move_color:
                t[i + 64] = board.piece_at(i).piece_type
            else:
                t[i + 64] = board.piece_at(i).piece_type + 7

        t[130] = white_time if current_move_color == chess.WHITE else black_time

        if current_eval is None:
            t[135] = 1
            t[134] = 0
        elif current_eval.pov(current_move_color).is_mate():
            t[135] = 1
            t[134] = current_eval.pov(current_move_color).mate()
        else:
            t[135] = 0
            t[134] = current_eval.pov(current_move_color).score()

        if current_move_color == chess.WHITE:
            gt1[move_number // 2] = t
        else:
            gt2[move_number // 2] = t

        current_move_color = not current_move_color

        move_number += 1

        if move_number == NUM_MOVES * 2:
            break

    return np.array(gt1),np.array(gt2),int(game.headers['WhiteElo']),int(game.headers['BlackElo']),file_dict[time_control]


We want to read from an input file compressed using zst and write all the resultant game tensors and ratings to a hdf5 file.

In [4]:
CHUNKSIZE = 1000

def write_to_hdf5(reader):
    """writes the games in the reader to an hdf5 file. The reader is a generator that yields game strings. The games are stored in the file according to the time-control of the game. We will write the game tensors as a dataset in the file. We will also write the ratings of the players as a dataset in the file. The file will be named according to the time-control of the games."""

    #open all the files so that we don't have to keep doing it.
    files = {}
    for file_name in set(file_dict.values()):
        files[file_name] = h5py.File(f"{file_name}.hdf5","a") #5*10^8 bytes = 500MB for the cache for each file

    file_indexes = {}
    if files[file_name].get("game_tensors") is not None:
        file_indexes = {file_name:len(files[file_name]["game_tensors"]) for file_name in files}
    else:
        file_indexes = {file_name:0 for file_name in files}

    game = ""
    count = 0

    for line in reader:
        if line.startswith("[Event") and game == "": #start of a new game when the file hasn't been initialized
            game = line
        elif line.startswith("[Event") and game != "": #start of a new game when the file has been initialized, write the previous game to the file

            if count % 1000 == 0:
                print("read",count,"games")
            count += 1

            game_tensors = get_game_tensor(game)
            if game_tensors is None:
                game = line
                continue
            else:
                #print("read game",game)
                
                gt1,gt2,white_rating,black_rating,file_name = game_tensors
                #print(np.array(gt1.shape),np.array([white_rating]).shape)
                f = files[file_name]
                if f.get("game_tensors") is None:
                    f.create_dataset("game_tensors",shape=(CHUNKSIZE,40,136),maxshape=(None,40,136),chunks=True,compression='lzf')#,compression_opts=1)
                    f.create_dataset("ratings",shape=(CHUNKSIZE,1),chunks=True,maxshape=(None,1))#,compression='gzip',compression_opts=9)
                    f["game_tensors"][0] = gt1
                    f["game_tensors"][1] = gt2
                    f["ratings"][0] = np.array([white_rating])
                    f["ratings"][1] = np.array([black_rating])
                    file_indexes[file_name] = 2
                else: #file already exists
                    #check if we need to resize the dataset
                    if file_indexes[file_name]+1 >= f["game_tensors"].shape[0]:
                        print("enlarging chunk for file",file_name)
                    #+1 as we are writing 2 games at a time
                        f["game_tensors"].resize((f["game_tensors"].shape[0] + CHUNKSIZE,40,136))
                        f["ratings"].resize((f["ratings"].shape[0] + CHUNKSIZE,1))
                    #write the new game
                    f["game_tensors"][file_indexes[file_name]] = gt1
                    f["game_tensors"][file_indexes[file_name]+1] = gt2
                    f["ratings"][file_indexes[file_name]] = np.array([white_rating])
                    f["ratings"][file_indexes[file_name]+1] = np.array([black_rating])
                    file_indexes[file_name] += 2
                game = line
        else: #continue reading the game
            game += line

    for file_name in files:
        f = files[file_name]
        #reshape the datasets to remove the extra space
        f["game_tensors"].resize((file_indexes[f],40,136))
        f["ratings"].resize((file_indexes[f],1))
        f.close()

Allow processing of "plain" files or "zst" files passed in from the command line.

In [5]:
def read_file(fn):
    #if the filename ends in .pgn we will read it as a text file. If it ends in .zst we will read it as a compressed file using streaming.
    if fn.endswith(".pgn"):
        with open(fn,"r") as f:
            write_to_hdf5(f)
    elif fn.endswith(".zst"):
        with open(fn,"rb") as f:
            dctx = zstd.ZstdDecompressor()
            with dctx.stream_reader(f) as reader:
                text_stream = TextIOWrapper(reader, encoding='utf-8')
                write_to_hdf5(text_stream)

In [None]:
#read_file("data/all_data/lichess09.pgn.zst")
read_file("data/all_data/lichess05.pgn.zst")

In [13]:

def create_bins(f,start_index,end_index,min_rating,max_rating,path): 
    #read the hdf file up to some index and bins index values into num_bins bins based on the rating. These bins are in intervals of 50. We will store the data in the bins in separate files in the path directory.
    #N.B., f is a h5py file object. We will create a set of files under path containing the data in the bins.

    #if the path doesn't exist, create it
    if not os.path.exists(path):
        os.makedirs(path)

    ratings = f["ratings"][start_index:end_index]
    
    num_bins = int((max_rating//50)-(min_rating//50))
    start_bin_rating = (min_rating//50)*50

    files = [h5py.File(f"{path}/bin_{i}.hdf5","w") for i in range(num_bins)]
    for fl in files:
        fl.create_dataset("game_tensors",shape=(0,40,136),maxshape=(None,40,136),compression='lzf',chunks=True)
        fl.create_dataset("ratings",shape=(0,1),maxshape=(None,1),chunks=True)

    for i in range(len(ratings)):
        bin=0
        r=f["ratings"][i][0]
        if r<=min_rating:
            bin=0
        elif r>=max_rating:
            bin=num_bins-1
        else:
            bin=int((r-start_bin_rating)//50)
        files[bin]["game_tensors"].resize((files[bin]["game_tensors"].shape[0]+1,40,136))
        files[bin]["ratings"].resize((files[bin]["ratings"].shape[0]+1,1))
        files[bin]["game_tensors"][-1] = f["game_tensors"][i]
        files[bin]["ratings"][-1] = f["ratings"][i]
        if i%100==0:
            print(f"done {i}")
    
    for fl in files:
        fl.close()

####################################################
#split_file(ORIGDATA,TESTDATA,int(num_tensors*(split[0]+split[1]),num_tensors))
def split_file(original_file_path,new_file_path,start_index,end_index):
    #splits the original file by extracting the data from start_index to end_index and writing it to a new file.
    with h5py.File(original_file_path,"r") as f:
        with h5py.File(new_file_path,"w") as nf:
            nf.create_dataset("game_tensors",data=f["game_tensors"][start_index:end_index])
            nf.create_dataset("ratings",data=f["ratings"][start_index:end_index])
 

In [49]:
from tensorflow.keras.utils import Sequence
import random

class InMemoryOverSamplngGenerator(Sequence):
    def __init__(self,path,batch_size,**kwargs):
        super().__init__()
        self.path = path
        self.batch_size = batch_size
        self.shuffle = kwargs.get("shuffle",True)
        self.num_items = kwargs.get("num_items",None)

        self.files = [h5py.File(f"{path}/bin_{i}.hdf5","r") for i in range(len(os.listdir(path)))]
                      
        self.bins = [[] for i in range(len(self.files))]
        self.num_bins = len(self.bins)

        #read the data into memory
        for i in range(len(self.files)):
            self.bins[i] = (self.files[i]["game_tensors"][:],self.files[i]["ratings"][:])
        
        self.current_bin = 0
        self.bin_indexes = [0 for i in range(self.num_bins)]

        for f in self.files:
            f.close()

    def __len__(self):
        if self.num_items is None:
            return sum([len(b[0]) for b in self.bins])//self.batch_size
        else:
            return self.num_items//self.batch_size

    def __getitem__(self,index):
        x_batch = []
        y_batch = []

        num_items = self.batch_size
        while num_items > 0:
            if self.bin_indexes[self.current_bin] == len(self.bins[self.current_bin][0]):
                self.bin_indexes[self.current_bin] = 0
                self.current_bin += 1
                self.current_bin %= self.num_bins

            x_batch.append(self.bins[self.current_bin][0][self.bin_indexes[self.current_bin]])
            y_batch.append(self.bins[self.current_bin][1][self.bin_indexes[self.current_bin]])

            self.bin_indexes[self.current_bin] += 1
            num_items -= 1

        return np.array(x_batch),np.array(y_batch)

    def on_epoch_end(self):
        if self.shuffle:
            for i in range(self.num_bins):
                state = random.random.get_state()
                seed = random.randint(0,10000)
                random.seed(seed)
                random.shuffle(self.bins[i][0])
                random.seed(seed)
                random.shuffle(self.bins[i][1])
                random.random.set_state(state)
    


class TrainingGenerator(Sequence):
    """This generator takes in a path containing a set of hdf5 files, each of which is a bin. The generator will yield data by taking batch_size elements from each bin in the path. We will store cache_size elements from each file in memory, loading them in as needed. The generator will load the data from the files in the path in order, and will loop back to the start when it reaches the end of the files. The generator will also shuffle the order of the files if shuffle is set to True."""
    def __init__(self,path,batch_size,**kwargs):
                
        super().__init__()
        self.path = path
        self.batch_size = batch_size
        self.shuffle = kwargs.get("shuffle",True)
        self.cache_size = kwargs.get("cache_size",512)
        self.num_items = kwargs.get("num_items",None)
        

        self.files = [h5py.File(f"{path}/bin_{i}.hdf5","a") for i in range(len(os.listdir(path)))]
        self.num_files = len(self.files)
        self.file_indexes = [0 for i in range(self.num_files)]
        self.game_cache = [np.zeros((self.cache_size,40,136),dtype=np.int16) for i in range(self.num_files)]
        self.rating_cache = [np.zeros((self.cache_size,1),dtype=np.int16) for i in range(self.num_files)]

        self.cache_index = [0 for i in range(self.num_files)]

        self.current_file = 0

    def __len__(self):
        """returns the number of batches in the generator. This is the sum of the number of elements in each file divided by the batch size."""
        if self.num_items is None:
            return sum([len(f["ratings"]) for f in self.files])//self.batch_size
        else:
            return self.num_items//self.batch_size
    
    def __getitem__(self,index):
        """returns the next batch. The batch is a tuple containing the game tensors and the ratings. We ignore the index as we will just iterate through the files in order."""
        x_batch = []
        y_batch = []

        num_items = self.batch_size

        while num_items>0:
            #check if we need to load more data into the cache
            if self.cache_index[self.current_file] == 0:
                self.__load_data(self.current_file)

            #get the next element from the cache
            x_batch.append(self.game_cache[self.current_file][self.cache_index[self.current_file]])
            y_batch.append(self.rating_cache[self.current_file][self.cache_index[self.current_file]])

            self.cache_index[self.current_file] += 1
            self.cache_index[self.current_file] %= self.cache_size
            self.current_file += 1
            self.current_file %= self.num_files
            num_items -= 1

        return np.array(x_batch),np.array(y_batch)
    
    def __load_data(self,file_index):
        """loads the next cache_size elements from the file at file_index into the cache. If the end of the file is reached, the cache loops around. """
        f = self.files[file_index]
        start_index = self.file_indexes[file_index]
        end_index = start_index + self.cache_size

        #print(f"reading from file {f.filename} from {start_index} to {end_index}, cache size is {self.cache_size}")

        if end_index > len(f["ratings"]):
            num_read = len(f["ratings"]) - start_index
            self.game_cache[file_index][:num_read] = f["game_tensors"][start_index:]
            self.rating_cache[file_index][:num_read] = f["ratings"][start_index:]

            #print(f"I have read {num_read} elements from file, about to read {self.cache_size-num_read} elements from the start of the file")

            self.game_cache[file_index][num_read:] = f["game_tensors"][:self.cache_size-num_read]
            self.rating_cache[file_index][num_read:] = f["ratings"][:self.cache_size-num_read]

            self.file_indexes[file_index] = end_index % len(f["ratings"])
        else:
            self.game_cache[file_index] = f["game_tensors"][start_index:end_index]
            self.rating_cache[file_index] = f["ratings"][start_index:end_index]
            self.file_indexes[file_index] = end_index
    

    def on_epoch_end(self):
        """shuffles the order of the files if shuffle is set to True. Also cleares the caches and resets the file indexes."""
        self.file_indexes = [0 for i in range(self.num_files)]
        self.game_cache = [np.zeros((self.cache_size,40,136),dtype=np.int16) for i in range(self.num_files)]
        self.rating_cache = [np.zeros((self.cache_size,1),dtype=np.int16) for i in range(self.num_files)]
        self.cache_index = [0 for i in range(self.num_files)]
        self.current_file = 0

        if self.shuffle:
            print("shuffling files")    
            #save current random number generator state
            prng_state = random.random.get_state()
            
            for f in self.files:
                print("shuffling file ",f.filename)
                seed = random.randint(0,10000)
                random.seed(seed)
                random.shuffle(f["ratings"])
                random.seed(seed)
                random.shuffle(f["game_tensors"])
            
            #restore the random number generator state
            random.random.set_state(prng_state)
            print("done shuffling files")


    def __del__(self):
        for f in self.files:
            f.close()

class InMemoryGenerator(Sequence):
    def __init__(self,file,batch_size,shuffle=False):
        
        self.batch_size = batch_size
        self.shuffle=shuffle

        with h5py.File(file,"r") as f:
            self.game_tensors = f["game_tensors"][:]
            self.ratings = f["ratings"][:]

    def __len__(self):
        return len(self.ratings)//self.batch_size

    def __getitem__(self, index):
    
        x_batch=[]
        y_batch=[]

        for i in range(self.batch_size):
            x_batch.append(self.game_tensors[(index*self.batch_size+i)%len(self.game_tensors)])
            y_batch.append(self.ratings[(index*self.batch_size+i)%len(self.ratings)])
        
        return np.array(x_batch),np.array(y_batch)

    def on_epoch_end(self):
        if self.shuffle:
            state = random.random.get_state()
            seed = random.randint(0,10000)
            random.seed(seed)
            random.shuffle(self.game_tensors)
            random.seed(seed)
            random.shuffle(self.ratings)
            random.random.set_state(state)

        
class HDF5FileGenerator(Sequence):
    def __init__(self, file, batch_size, shuffle=False):
        self.f = h5py.File(file,"a",rdcc_nbytes=5*10**8) #500MB cache
        self.batch_size = batch_size
        self.shuffle=shuffle
        
    def __len__(self):
        return (len(self.f["ratings"]))//self.batch_size
        

    def __getitem__(self, index):

        x_batch=[]
        y_batch=[]

        for i in range(self.batch_size):
            x_batch.append(self.f["game_tensors"][(index*self.batch_size+i)%len(self.f["game_tensors"])])
            y_batch.append(self.f["ratings"][(index*self.batch_size+i)%len(self.f["ratings"])])
        
        return np.array(x_batch),np.array(y_batch)

    def on_epoch_end(self):
        if self.shuffle:
            state = random.random.get_state()
            seed = random.randint(0,10000)
            random.seed(seed)
            random.shuffle(self.f["game_tensors"])
            random.seed(seed)
            random.shuffle(self.f["ratings"])
            random.random.set_state(state)

    def __del__(self):
        self.f.close()

Define the neural network

In [8]:
import keras

from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Input

from keras.layers import TimeDistributed

inputs = Input(shape=(NUM_MOVES, 136)) #full tensor

#make a dense layer for each of the NUM_MOVES elements. The output of each dense layer is a 1D tensor of 137 elements. Each of these tensors is then concatenated to form a 2D tensor of 137xNUM_MOVES elements. This tensor is then fed into an LSTM layer.

x = TimeDistributed(Dense(80,activation = 'relu'))(inputs)

x = LSTM(40,return_sequences = True)(x)
x = LSTM(32)(x)
#x = LSTM(40)(x)
x = Dense(60,activation='relu')(x)

output = Dense(1,activation='relu',name="Elo")(x)

model = keras.Model(inputs=inputs,outputs=[output])

model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
                    loss={'Elo':'mae'},
                    metrics={'Elo':'mae'})

Prepare the data

In [51]:
import os

split=(0.8,0.1,0.1) #train,validation,test

ORIGDATA = "data/blitz.hdf5"
VALDATA = "data/blitz_val.hdf5"
TESTDATA = "data/blitz_test.hdf5"
OVERSAMPLEDPATH = "data/blitz/"

#if the oversampled file doesn't exist, create it
num_tensors = 0
with h5py.File(ORIGDATA,"r",rdcc_nbytes=5*10**8) as f:
    num_tensors = f["game_tensors"].shape[0]

    if not os.path.exists(OVERSAMPLEDPATH):
        create_bins(f,0,int(num_tensors*split[0]),800,2500,OVERSAMPLEDPATH)
    if not os.path.exists(VALDATA):
        split_file(ORIGDATA,VALDATA,int(num_tensors*split[0]),int(num_tensors*(split[0]+split[1])))
    if not os.path.exists(TESTDATA):
        split_file(ORIGDATA,TESTDATA,int(num_tensors*(split[0]+split[1])),num_tensors)

train_gen = TrainingGenerator(OVERSAMPLEDPATH,32,shuffle=False)
val_gen = HDF5FileGenerator(VALDATA,32,shuffle=False)
test_gen = HDF5FileGenerator(TESTDATA,32,shuffle=False)


train the NN

In [None]:
import tensorflow as tf

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
save = tf.keras.callbacks.ModelCheckpoint('modelO1.keras', save_best_only=True,mode='auto',monitor='val_loss')

model.fit(train_gen,validation_data=val_gen,epochs=100,callbacks=[stop_early,save])

model.evaluate(test_gen)

#N.B., the model saved with the save callback is the best model according to the validation loss. We can load this model and evaluate it on the test data.

model = keras.models.load_model('modelO1.keras')
model.evaluate(test_gen)

In [None]:
def model_builder(hp):

    #inputs = Input(shape=(NUM_MOVES, 132)) #if no eval is used
    inputs = Input(shape=(NUM_MOVES, 136)) #full tensor
    #inputs = Input(shape=(NUM_MOVES,8)) #if only the eval is used
    
    x = inputs

    #prepare hyperparameter tuning

    num_LSTM_layers = hp.Int('num_LSTM_layers',0,3)
    num_LSTM_units=[]
    for i in range(num_LSTM_layers):
        num_LSTM_units.append(hp.Int('lstm'+str(i+1)+'_units',
                                     min_value = 32,
                                     max_value = 64,
                                     step=8))
        
                                     
    num_dense_layers = hp.Int('num_dense_layers',1,3)
    num_dense_units = []
    dense_activation = []

    for i in range(num_dense_layers):
        num_dense_units.append(hp.Int('dense'+str(i+1)+'_units',
                                     min_value = 32,
                                     max_value = 128,
                                     step=16))
        dense_activation.append(hp.Choice("dense"+str(i+1)+"_activation",["relu", "leaky_relu"]))
    
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-3, 1e-2])

    #make the NN
    x = TimeDistributed(Dense(hp.Int('td_dense_units',min_value=32,max_value=128,step=16),activation=hp.Choice("td_dense_activation",["relu","leaky_relu"])))(x)

    for i in range(num_LSTM_layers):
        x = LSTM(num_LSTM_units[i],return_sequences=True if i<num_LSTM_layers else False)(x)

    
    for i in range(num_dense_layers):
        x = Dense(num_dense_units[i],activation = dense_activation[i])(x)


    output = Dense(1,activation='relu',name="Elo")(x)
    

    #Alternative: set outputs to be hot encoded between 48 values
    #output1 = Dense(48,activation='softmax',name="WhiteElo")(x)
    #output2 = Dense(48,activation='softmax',name="BlackElo")(x)

    model = keras.Model(inputs=inputs,outputs=[output])

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                    loss={'Elo':'mae'},
                    metrics={'Elo':'mae'})

    return model

tuner = kt.Hyperband(model_builder,
                     objective='val_loss',
                     max_epochs=100,
                     factor=5)

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
save = tf.keras.callbacks.ModelCheckpoint('modelCP.keras', save_best_only=True,mode='auto',monitor='val_loss')

tuner.search(train_gen,validation_data=val_gen,epochs=100,callbacks=[stop_early,save])

best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(best_hps.values)

model = tuner.hypermodel.build(best_hps)

model.fit(train_gen,validation_data=val_gen,epochs=100,callbacks=[stop_early,save])