In [64]:
import glob
import cupy as cp
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import soundfile as sf
import gc

import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras import layers,regularizers,Sequential,backend,callbacks,optimizers,metrics,losses
from tensorflow.keras.callbacks import ReduceLROnPlateau
from skmultilearn.model_selection import IterativeStratification
from sklearn.metrics import roc_auc_score

In [39]:
testfiles = glob.glob( 'data/test/*.flac' )

In [53]:
data_train = np.load("savedata/TF1/TRAIN.npz")
TRAIN = data_train['arr_0']

data_test = np.load("savedata/TF1/TEST.npz")
TEST = data_test['arr_0']

y_train_all_classes = pd.read_hdf("savedata/TF1/y_train.hd5")

TRAIN.shape, TEST.shape, y_train_all_classes.shape

((8997, 1000), (1992, 1000), (8997, 26))

In [49]:
y_train_all_classes.head()

Unnamed: 0,recording_id,species_id,s0,s1,s2,s3,s4,s5,s6,s7,...,s14,s15,s16,s17,s18,s19,s20,s21,s22,s23
0,003bec244,14,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,006ab765f,23,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,007f87ba2,12,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0099c367b,17,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,009b760e6,10,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [51]:
#Define Model
input_shape = 1000
output_shape = 24
def build_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Input(input_shape),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.2),
        tfa.layers.WeightNormalization(tf.keras.layers.Dense(2048, activation="relu")),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.2),
        tfa.layers.WeightNormalization(tf.keras.layers.Dense(1048, activation="relu")),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.5), 
        tfa.layers.WeightNormalization(tf.keras.layers.Dense(500, activation="relu")),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.2),        
        tfa.layers.WeightNormalization(tf.keras.layers.Dense(output_shape, activation="sigmoid"))
        ])
    model.compile(optimizer=tfa.optimizers.Lookahead(tf.optimizers.Adam(), sync_period=10),
                  loss=losses.BinaryCrossentropy(label_smoothing=0.000000001),metrics=['AUC']
                  )
    return model

In [59]:
target_cols = []
for tgt in range(24):
    target_cols.append('s'+str(tgt))

In [60]:
TRAIN_TARGETS = y_train_all_classes[tar_col].values
TRAIN_TARGETS = np.array(TRAIN_TARGETS,dtype='float64')

In [40]:
submission = pd.DataFrame({"recording_id" : [path[10:19] for path in testfiles]})

In [61]:
N_SPLITS = 5
RNG_SEED = 42
EPOCHS = 40
BATCH_SIZE=128

ytrain = np.zeros((TRAIN.shape[0],len(tar_col)))
ytest = np.zeros((TEST.shape[0],len(tar_col)))

for n, (tr, te) in enumerate(
    IterativeStratification(n_splits=N_SPLITS, random_state=RNG_SEED)
    .split(y_train_all_classes[tar_col], y_train_all_classes[tar_col])):
    
    model = build_model()
    print(f"Fold ==> {n}")
    reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3,
                                       verbose=0, epsilon=1e-4, mode='min')
    model.fit(TRAIN[tr,:],
              TRAIN_TARGETS[tr,:],
              validation_data=(TRAIN[te,:], TRAIN_TARGETS[te,:]),
              epochs=EPOCHS, batch_size=BATCH_SIZE,
              callbacks=[reduce_lr_loss], verbose=1,
             )
    
    ytrain[te,:] += model.predict(TRAIN[te,:])
    ytest +=  model.predict(TEST)/N_SPLITS

Fold ==> 0
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
Fold ==> 1
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
Fold ==> 2
Epoch 1/40
Epoch 2/40
Epo

Fold ==> 3
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
Fold ==> 4
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [None]:
model.save('saved_model/my_model')

In [65]:
AUC_scores = []
for n  in range(len(tar_col)): 
    AUC_scores.append(roc_auc_score(TRAIN_TARGETS[:,n], ytrain[:,n]))
    #print( f'Target{n} AUC', roc_auc_score(TRAIN_TARGETS[:,n], ytrain[:,n]) )
print(f'mean of AUC scores = {np.mean(AUC_scores)}') 

mean of AUC scores = 0.7640451897590913


In [66]:
submission[tar_col] = ytest
submission.head()

Unnamed: 0,recording_id,s0,s1,s2,s3,s4,s5,s6,s7,s8,...,s14,s15,s16,s17,s18,s19,s20,s21,s22,s23
0,000316da7,0.006161,0.005675,0.004037,0.005231,0.004419,0.006131,0.005999,0.006391,0.005704,...,0.009689,0.005181,0.003585,0.006545,0.006226,0.005239,0.007672,0.006276,0.009956,0.007983
1,003bc2cb2,0.003939,0.011657,0.003939,0.008894,0.003298,0.001888,0.002304,0.005089,0.003289,...,0.005366,0.007275,0.091355,0.021637,0.003819,0.005712,0.00775,0.003282,0.003248,0.002667
2,0061c037e,0.005391,0.005367,0.004225,0.004925,0.003714,0.006915,0.008413,0.00676,0.004611,...,0.007868,0.005261,0.003647,0.008345,0.005293,0.006108,0.009097,0.00492,0.011893,0.008345
3,010eb14d3,0.014188,0.004181,0.002437,0.004052,0.00301,0.003374,0.002041,0.003769,0.005537,...,0.008295,0.003531,0.002725,0.003229,0.005944,0.003025,0.004287,0.003747,0.003642,0.004799
4,011318064,0.0083,0.004755,0.003386,0.004891,0.004372,0.005797,0.003434,0.004998,0.0054,...,0.013242,0.004014,0.00288,0.004044,0.007807,0.003762,0.005487,0.005746,0.006298,0.008658


In [67]:
submission.to_csv('savedata/TF1/submission-tf-fft-1.csv', index=False)

In [68]:
submission.shape

(1992, 25)