# Append submit

Predict on different sets and append the results into a single submit file.

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
import sys
import os

# Sets off SettingWithCopyWarning.
pd.set_option('mode.chained_assignment', None)


# ----------------------------------------
# Flags for working on my different machines.
# flag_kaggle = True
flag_FW = True
# flag_LN = True

try:
    if flag_kaggle:
        sys.path.insert(0, '/kaggle/input/hms-lib')
        base_dir = '/kaggle/input/hms-harmful-brain-activity-classification'
        devset_dir = '/kaggle/input/hms-cwt-scalograms-single-numpy-v1'
        output_dir = ''
except:
    pass 

try:
    if flag_FW:
        sys.path.insert(0, '../lib')
        base_dir = '../../kaggle_data/hms'
        devset_dir = '../data'
        output_dir = 'results/'
except:
    pass 

try:
    if flag_LN:
        sys.path.insert(0, '../lib')
        base_dir = '../../data/hms'
        devset_dir = '../data'
        output_dir = 'results/'
except:
    pass 
# ----------------------------------------

from KLmetric import score

path_train = f'{devset_dir}/05_single_cwt_v1_train.npy'
path_train_items = f'{devset_dir}/05_single_cwt_v1_train_items.npy'
path_val = f'{devset_dir}/05_single_cwt_v1_val.npy'
path_val_items = f'{devset_dir}/05_single_cwt_v1_val_items.npy'
path_test = f'{devset_dir}/05_single_cwt_v1_test.npy'
path_test_items = f'{devset_dir}/05_single_cwt_v1_test_items.npy'

2024-03-25 19:09:59.751288: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-25 19:09:59.840396: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [6]:
# model_filename = f'{output_dir}hms-keras-12-cwt-final.keras'
# model.save(model_filename)

In [2]:

#
# Test Data generator for predicting
# 

class TestDataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, path_to_items, path_to_data, batch_size=32, n_classes=6, shuffle=False):
        ''' Initialization
        item: [eeg_id, eeg_sub_id, idx in sgrams (1st index), target,
        seizure_vote, lpd_vote, gpd_vote, lrda_vote,
        grda_vote, other_vote]
        '''
        self.n_channels = 5
        self.data = np.load(path_to_data)
        self.items = np.load(path_to_items)
        self.dim = (self.data.shape[1], self.data.shape[2])
        self.batch_size = batch_size
        self.len = self.data.shape[0]
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.ceil(self.len / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Generate data
        X = self.__data_generation(indexes)

        return X

    def get_dim(self):
        'Dimensions for the input layer.'
        return (self.dim[0], self.dim[1], self.n_channels)

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(self.len)
        # pass 
        
    def __data_generation(self, indexes):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        true_size = len(indexes)
        X = np.empty((true_size, *self.dim, self.n_channels))

        # Generate data
        for i, idx in enumerate(indexes):
            item = self.items[idx]
            # print(item)  # Uncomment for testing.
            X[i,:,:,:] = self.data[np.int32(item[2]), :, :, :]

        return X


In [10]:
# loaded_model = keras.models.load_model('/kaggle/working/hms-keras-12-cwt-final.keras')
# loaded_model = keras.models.load_model('results/hms-keras-12-cwt-final.keras')
model = keras.models.load_model('results/checkpoint1.model.keras')

In [11]:
params = {
    'batch_size': 32,
    'n_classes': 6,
    }

test_generator = TestDataGenerator(path_test_items, path_test, **params)

y_pred = model.predict(test_generator)



In [12]:
TARGETS = ['seizure_vote', 'lpd_vote', 'gpd_vote', 'lrda_vote', 'grda_vote', 'other_vote']

test_items = np.load(path_test_items)
df_test_items = pd.DataFrame(test_items)
df_test_items[0] = df_test_items[0].astype(int)

sub = pd.DataFrame({'eeg_id':df_test_items[0]})
sub[TARGETS] = np.round(y_pred,6)
# sub.to_csv('submission.csv',index=False)


In [20]:
sub

Unnamed: 0,eeg_id,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote
0,4233815620,0.075590,0.110301,0.182943,0.077463,0.200211,0.353492
1,3403533082,0.098262,0.153067,0.072883,0.171067,0.145180,0.359542
2,3965487042,0.110641,0.119595,0.073578,0.096455,0.284444,0.315287
3,2391349997,0.058712,0.103725,0.050791,0.118276,0.315982,0.352513
4,3572672408,0.130737,0.153516,0.116511,0.110104,0.170550,0.318583
...,...,...,...,...,...,...,...
1243,1366416656,0.070139,0.120307,0.112481,0.224289,0.180304,0.292480
1244,1353894913,0.084653,0.170651,0.100513,0.193428,0.156617,0.294138
1245,4000022002,0.045042,0.022166,0.000193,0.002838,0.004871,0.924889
1246,1256215657,0.067536,0.113160,0.074910,0.132595,0.121191,0.490609


In [16]:
other_items = []
other_items = [4030290342, 2482901265,  342782614, 3129576740, 1384820659, 
               2099984829, 1129801459, 2882719839, 3441128830, 1641054670]
    
other_items

[4030290342,
 2482901265,
 342782614,
 3129576740,
 1384820659,
 2099984829,
 1129801459,
 2882719839,
 3441128830,
 1641054670]

In [21]:
other_items = []
other_items.append(4030290342)
other_items.append(2482901265)
other_items.append(342782614)
other_items

[4030290342, 2482901265, 342782614]

In [22]:
y_pred2 = np.zeros((len(other_items),6),dtype=float)

for i in range(len(other_items)):
    y_pred2[i,] = np.array([0,0,0,0,0,1])
sub2 = pd.DataFrame({'eeg_id':other_items})
sub2[TARGETS] = y_pred2


In [24]:
newsub = pd.concat([sub, sub2])
newsub.to_csv('submission.csv',index=False)


In [None]:

df_test_scoring = df_test_items[[0,4,5,6,7,8,9]]
df_test_scoring.columns = sub.columns

score(df_test_scoring, sub, 'eeg_id')

In [29]:
items = np.load(path_test_items)
items[:,0].astype(int)

array([4233815620, 3403533082, 3965487042, ..., 4000022002, 1256215657,
       4170380205])