# Bayesian Optimization

In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
from glob import glob
import random
import tensorflow as tf
plt.style.use('ggplot')
import warnings
warnings.filterwarnings("ignore")

2024-08-07 03:24:11.664824: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-07 03:24:11.664970: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-07 03:24:11.809914: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [None]:
labels = pd.read_csv("../input/g2net-gravitational-wave-detection/training_labels.csv")
display(labels.head())

Unnamed: 0,id,target
0,00000e74ad,1
1,00001f4945,0
2,0000661522,0
3,00007a006a,0
4,0000a38978,1


In [None]:
! pip install optree
! pip install nnAudio

Collecting nnAudio
  Downloading nnAudio-0.3.3-py3-none-any.whl.metadata (771 bytes)
Downloading nnAudio-0.3.3-py3-none-any.whl (43 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: nnAudio
Successfully installed nnAudio-0.3.3


In [None]:
from sklearn.model_selection import train_test_split

from tensorflow import keras
from keras.utils import Sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv1D, MaxPool1D, BatchNormalization
from keras.optimizers import RMSprop,Adam
import torch # For deep learning
from nnAudio.Spectrogram import CQT1992v2 # For creating Constant-Q Transform spectrograms
import math
from random import shuffle

import warnings
warnings.filterwarnings("ignore")

In [None]:
class DataGenerator(Sequence):
    def __init__(self, path, list_IDs, data, batch_size):
        self.path = path
        self.list_IDs = list_IDs
        self.data = data
        self.batch_size = batch_size
        self.indexes = np.arange(len(self.list_IDs))

    def __len__(self):
        len_ = int(len(self.list_IDs)/self.batch_size)
        if len_*self.batch_size < len(self.list_IDs):
            len_ += 1
        return len_

    def __getitem__(self, index):
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        X, y = self.__data_generation(list_IDs_temp)
        return X, y

    def __data_generation(self, list_IDs_temp):
        X = np.zeros((self.batch_size, 69, 193))
        y = np.zeros((self.batch_size, 1))
        for i, ID in enumerate(list_IDs_temp):
            id_ = self.data.loc[ID, 'id']
            file = id_+'.npy'
            path_in = '/'.join([self.path, id_[0], id_[1], id_[2]])+'/'
            data_array = np.load(path_in+file)
            #np.hstack() apiles arrays in a horizontal way
            waves = np.hstack(data_array)
            #normalize
            waves = waves / np.max(waves)
            #we do a pytorch tensor and convert it into float
            waves = torch.from_numpy(waves).float()
            transform=CQT1992v2(sr=2048, fmin=20, fmax=1024, hop_length=64, verbose = False)
            #Q-transform
            image = transform(waves)
            #We turn it back into a numpy array
            image = np.array(image)
            #transpose axes into (1,2,0)
            image = np.transpose(image,(1,2,0))
            # this creates an array(69, 193, 1) (an image in which the model can identify a signal)
            # and then we create a pack of images corresponding to the batch size
            # so the input_shape of the CNN must be (69, 193).
            X[i, ] = image[:,:,0]
            y[i, ] = self.data.loc[ID, 'target']
        X = np.stack(X)
        y = np.stack(y)
        return X, y

In [None]:
import pandas as pd
import random
from itertools import cycle


#the train dataset is split in 16 files (0,1,2,...,e,f) and we will use all of this files separately
directorio = cycle(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'])

In [None]:
#this is the Bayesian Optimization library
!pip install scikit-optimize



In [None]:
import skopt
from skopt import gp_minimize
from skopt.space import Real, Integer, Categorical

#we will study the logarithm of the number of neurons (base 2) and the number of dense layers
dim_log_width = Integer(low=4, high=8, name='log_width')
dim_layers = Integer(low=1, high=5, name='layers')
dimensions = [dim_log_width, dim_layers]

# The reason why I use numbers in between 2^4 and 2^8 in log_width is
# that the model becomes unstable for higher widths
# because each call only uses a small fragment of the dataset. Maybe if we used
# all the dataset for every call, we would obtain better results, but I don't
# have enough computational resources for that (it is too expensive).

In [None]:
#These vectors just keep track of the width, layers studied in every step with its accuracy.
#I have used these values for a plot that shows how gp_minimize explores the hyperparameter space.
log_width_history = []
layers_history = []
acc_history = []


def train(params):
    #every call will use the next file
    directori = next(directorio)
    #This selects the file of the dataset
    train_idx = labels[labels['id'].str.startswith(directori)]['id'].values
    y = labels[labels['id'].isin(train_idx)]['target'].values
    #params is a vector with the dimensions (log_width, layers)
    print(params)
    log_width, layers = params
    log_width_history.append(log_width)
    layers_history.append(layers)

    #Generating the training and validation data
    train_idx, train_Valx = train_test_split(list(labels[labels['id'].str.startswith(directori)].index), test_size=0.2, random_state=2021)
    train_generator = DataGenerator('/kaggle/input/g2net-gravitational-wave-detection/train/', train_idx, labels[labels['id'].str.startswith(directori)], 256)
    val_generator = DataGenerator('/kaggle/input/g2net-gravitational-wave-detection/train/', train_Valx, labels[labels['id'].str.startswith(directori)], 256)

    # Building the model
    model = Sequential()
    model.add(Conv1D(2**log_width, input_shape=(69, 193,), kernel_size=3, activation='relu'))
    model.add(BatchNormalization())
    model.add(Flatten())
    for i in range(layers):
        model.add(Dense(2**log_width, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer=Adam(learning_rate=2e-4), loss='binary_crossentropy', metrics=['accuracy'])

    # Training the model
    history = model.fit(train_generator, validation_data=val_generator, epochs=1)

    # We evaluate the model's performance
    val_loss, val_accuracy = model.evaluate(val_generator)
    acc_history.append(val_accuracy)
    # It returns the metrics that we want to optimize with gp_minimize
    return -val_accuracy

In [None]:
#Choosing our prior (the first set of hyperparameters studied)
default_parameters = [5, 1]

In [None]:
# EI (expected improcvement) ACQ function is the safest in this context. However, PI (probability of improvement)
# can also be studied. It is a bit more agressive, but might work correctly as well.
# There are more options for ACQ functions but I am not sure if they will work
# Here we choose the number of times we want gp_minimize to call the training function.
# Every call it will study a set of hyperparameters, and it will not use an already used
# set of training data.
search_result = gp_minimize(
    func=train,
    dimensions=dimensions,
    acq_func='EI',
    n_calls=16,
    x0=default_parameters)

[5, 1]
[1m  1/110[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:30:15[0m 50s/step - accuracy: 0.5352 - loss: 0.7683

I0000 00:00:1723001169.345729      82 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
W0000 00:00:1723001169.369747      82 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m108/110[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m10s[0m 5s/step - accuracy: 0.5282 - loss: 0.6970

W0000 00:00:1723001761.038405      81 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m788s[0m 7s/step - accuracy: 0.5290 - loss: 0.6966 - val_accuracy: 0.6374 - val_loss: 0.6858
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 4s/step - accuracy: 0.6382 - loss: 0.6851
[5, 4]
[1m  1/111[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:27:24[0m 48s/step - accuracy: 0.4727 - loss: 0.7105

W0000 00:00:1723002083.404165      82 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m110/111[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m5s[0m 5s/step - accuracy: 0.5038 - loss: 0.6945 

W0000 00:00:1723002675.139127      83 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m784s[0m 7s/step - accuracy: 0.5041 - loss: 0.6945 - val_accuracy: 0.5116 - val_loss: 0.6922
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 4s/step - accuracy: 0.5098 - loss: 0.6924
[7, 3]
[1m  1/109[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:28:36[0m 49s/step - accuracy: 0.4805 - loss: 0.7110

W0000 00:00:1723002995.010139      80 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m107/109[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m10s[0m 5s/step - accuracy: 0.5211 - loss: 0.6930

W0000 00:00:1723003579.518407      83 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m782s[0m 7s/step - accuracy: 0.5223 - loss: 0.6924 - val_accuracy: 0.6519 - val_loss: 0.6780
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 4s/step - accuracy: 0.6401 - loss: 0.6785
[8, 3]
[1m  1/110[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:29:50[0m 49s/step - accuracy: 0.4922 - loss: 0.7124

W0000 00:00:1723003904.988230      83 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m107/110[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m16s[0m 5s/step - accuracy: 0.5278 - loss: 0.6980

W0000 00:00:1723004492.427918      81 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m786s[0m 7s/step - accuracy: 0.5299 - loss: 0.6964 - val_accuracy: 0.7026 - val_loss: 0.6745
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 4s/step - accuracy: 0.7154 - loss: 0.6743
[4, 3]
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m873s[0m 8s/step - accuracy: 0.5119 - loss: 0.6922 - val_accuracy: 0.5664 - val_loss: 0.6894
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 4s/step - accuracy: 0.5533 - loss: 0.6898
[4, 2]
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m774s[0m 7s/step - accuracy: 0.5000 - loss: 0.6945 - val_accuracy: 0.5158 - val_loss: 0.6929
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m106s[0m 4s/step - accuracy: 0.5096 - loss: 0.6930
[5, 2]
[1m  1/109[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:27:33[0m 49s/step - accuracy: 0.4648 - loss: 0.7585

W0000 00:00:1723006738.143264      82 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m108/109[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m5s[0m 5s/step - accuracy: 0.5188 - loss: 0.6965 

W0000 00:00:1723007325.684594      82 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m789s[0m 7s/step - accuracy: 0.5194 - loss: 0.6962 - val_accuracy: 0.5144 - val_loss: 0.6915
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 4s/step - accuracy: 0.4956 - loss: 0.6939
[6, 3]
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m791s[0m 7s/step - accuracy: 0.5481 - loss: 0.6838 - val_accuracy: 0.6821 - val_loss: 0.6803
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 4s/step - accuracy: 0.6695 - loss: 0.6808
[5, 4]
[1m  1/110[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:25:44[0m 47s/step - accuracy: 0.5234 - loss: 0.6898

W0000 00:00:1723008568.856343      80 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m108/110[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m11s[0m 6s/step - accuracy: 0.5081 - loss: 0.6931

W0000 00:00:1723009175.372596      83 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m807s[0m 7s/step - accuracy: 0.5087 - loss: 0.6930 - val_accuracy: 0.5732 - val_loss: 0.6910
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 4s/step - accuracy: 0.5730 - loss: 0.6911
[5, 5]
[1m  1/111[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:31:45[0m 50s/step - accuracy: 0.4844 - loss: 0.7014

W0000 00:00:1723009506.219362      80 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m109/111[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m11s[0m 6s/step - accuracy: 0.5172 - loss: 0.6914

W0000 00:00:1723010135.670017      81 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m846s[0m 7s/step - accuracy: 0.5182 - loss: 0.6911 - val_accuracy: 0.5884 - val_loss: 0.6864
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 4s/step - accuracy: 0.5780 - loss: 0.6872
[7, 4]
[1m  1/110[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:40:43[0m 55s/step - accuracy: 0.4961 - loss: 0.7084

W0000 00:00:1723010521.787447      83 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m108/110[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m12s[0m 6s/step - accuracy: 0.5382 - loss: 0.6847

W0000 00:00:1723011189.818704      80 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m906s[0m 8s/step - accuracy: 0.5396 - loss: 0.6839 - val_accuracy: 0.6415 - val_loss: 0.6756
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 4s/step - accuracy: 0.6348 - loss: 0.6759
[7, 3]
[1m  1/109[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:31:12[0m 51s/step - accuracy: 0.5039 - loss: 0.6952

W0000 00:00:1723011573.888343      82 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m107/109[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m11s[0m 6s/step - accuracy: 0.5514 - loss: 0.6790

W0000 00:00:1723012225.445128      80 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m863s[0m 8s/step - accuracy: 0.5529 - loss: 0.6780 - val_accuracy: 0.6791 - val_loss: 0.6689
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 4s/step - accuracy: 0.6642 - loss: 0.6710
[5, 4]
[1m  1/110[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:38:56[0m 54s/step - accuracy: 0.4883 - loss: 0.7106

W0000 00:00:1723012574.508298      83 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m108/110[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m11s[0m 6s/step - accuracy: 0.5180 - loss: 0.6935

W0000 00:00:1723013196.895355      83 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m831s[0m 7s/step - accuracy: 0.5189 - loss: 0.6933 - val_accuracy: 0.5882 - val_loss: 0.6889
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 4s/step - accuracy: 0.5844 - loss: 0.6892
[7, 1]
[1m  1/111[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:31:54[0m 50s/step - accuracy: 0.4688 - loss: 0.7903

W0000 00:00:1723013537.874719      80 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m109/111[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m13s[0m 7s/step - accuracy: 0.5253 - loss: 0.7333

W0000 00:00:1723014271.958006      83 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m956s[0m 8s/step - accuracy: 0.5265 - loss: 0.7319 - val_accuracy: 0.5392 - val_loss: 0.6804
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 4s/step - accuracy: 0.5296 - loss: 0.6816
[5, 2]
[1m  1/111[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:29:01[0m 49s/step - accuracy: 0.4922 - loss: 0.7212

W0000 00:00:1723014622.732646      83 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m110/111[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m5s[0m 6s/step - accuracy: 0.5184 - loss: 0.6987 

W0000 00:00:1723015268.768599      80 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m874s[0m 8s/step - accuracy: 0.5189 - loss: 0.6984 - val_accuracy: 0.6014 - val_loss: 0.6884
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 4s/step - accuracy: 0.5872 - loss: 0.6892
[6, 1]
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m835s[0m 7s/step - accuracy: 0.5470 - loss: 0.6846 - val_accuracy: 0.6108 - val_loss: 0.6774
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 4s/step - accuracy: 0.6009 - loss: 0.6779


In [None]:
best_width = 2**(search_result.x[0])  # the best value of width obtained
best_layers = search_result.x[1]  # the best value of layers obtained
print(search_result.x)
for i in range(len(acc_history)):
    print(log_width_history[i], layers_history[i], acc_history[i])

[8, 3]
5 1 0.6374163031578064
5 4 0.5115792155265808
7 3 0.6519252061843872
8 3 0.7025669813156128
4 3 0.56640625
4 2 0.5157696604728699
5 2 0.5143694281578064
6 3 0.6820591688156128
5 4 0.5732421875
5 5 0.5884486436843872
7 4 0.6414620280265808
7 3 0.6791294813156128
5 4 0.5881696343421936
7 1 0.5392020344734192
5 2 0.6014229655265808
6 1 0.6107701063156128


In [None]:
sample_submission = pd.read_csv('../input/g2net-gravitational-wave-detection/sample_submission.csv')
train_idx =  labels['id'].values
y = labels['target'].values
test_idx = sample_submission['id'].values
train_idx, train_Valx = train_test_split(list(labels.index), test_size=0.05, random_state=2021)
test_idx = list(sample_submission.index)

In [None]:
train_generator = DataGenerator('/kaggle/input/g2net-gravitational-wave-detection/train/', train_idx, labels, 256)
val_generator = DataGenerator('/kaggle/input/g2net-gravitational-wave-detection/train/', train_Valx, labels, 256)
test_generator = DataGenerator('/kaggle/input/g2net-gravitational-wave-detection/test/', test_idx, sample_submission, 256)

In [None]:
classical_model= Sequential()
classical_model.add(Conv1D(best_width, input_shape=(69, 193,), kernel_size=3, activation='relu'))
classical_model.add(BatchNormalization())
classical_model.add(Flatten())
for i in range(best_layers):
    classical_model.add(Dense(best_width, activation='relu'))
classical_model.add(Dense(1, activation='sigmoid'))
classical_model.compile(optimizer=Adam(learning_rate=2e-4), loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
classical_model.summary()
history_classical = classical_model.fit(train_generator, validation_data=val_generator, epochs = 1)
predict = classical_model.predict(test_generator, verbose=1)
sample_submission['target'] = predict[:len(sample_submission)]
sample_submission.to_csv('submission.csv', index=False)
my_submission = pd.read_csv("./submission.csv")
my_submission
import os
print(os.listdir("."))
print(os.listdir("/kaggle/working"))

[1m   1/2079[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m31:23:57[0m 54s/step - accuracy: 0.5352 - loss: 0.6923

W0000 00:00:1723016601.176779      80 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m2076/2079[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m17s[0m 6s/step - accuracy: 0.7061 - loss: 0.5392

W0000 00:00:1723028448.108699      80 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m2079/2079[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12517s[0m 6s/step - accuracy: 0.7062 - loss: 0.5391 - val_accuracy: 0.7500 - val_loss: 0.4849
[1m  1/883[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:02:29[0m 4s/step

W0000 00:00:1723029079.666915      81 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m883/883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5350s[0m 6s/step
['submission.csv', '__notebook__.ipynb']
['submission.csv', '__notebook__.ipynb']
