# 드라이브 연동

In [None]:
from google.colab import drive

drive.mount('/content/gdrive')

In [None]:
%cd /content/gdrive/My\ Drive

In [None]:
#zip file을 내 드라이브에 다운받아야함
!mkdir ./beep_data
!unzip XY_train.zip -d ./beep_data
!unzip XY_test.zip -d ./beep_data

# Data Check

In [13]:
n_freq = 128  # 스펙트로그램 높이

# Data Loader
- with Keras

In [14]:
import os 
import pandas as pd
import numpy as np
from keras.utils import Sequence

In [15]:
data_dir = os.path.join('.','data', 'XY_train')
x_s = []
y_s = []
for file in os.listdir(data_dir):
    if file.startswith('x_'):
        x_s.append(os.path.join(data_dir,file))
    elif file.startswith('y_'):
        y_s.append(os.path.join(data_dir,file))
    x_s = sorted(x_s)
    y_s = sorted(y_s)
df = pd.DataFrame({'x':x_s, 'y':y_s})
df.head()

Unnamed: 0,x,y
0,.\data\XY_train\x_0.npy,.\data\XY_train\y_0.npy
1,.\data\XY_train\x_1.npy,.\data\XY_train\y_1.npy
2,.\data\XY_train\x_10.npy,.\data\XY_train\y_10.npy
3,.\data\XY_train\x_100.npy,.\data\XY_train\y_100.npy
4,.\data\XY_train\x_101.npy,.\data\XY_train\y_101.npy


In [16]:
train_ratio = 0.8
idxs = list(range(len(df)))
np.random.shuffle(idxs)
train_idx = idxs[:int(len(df)*train_ratio)]
valid_idx = idxs[int(len(df)*train_ratio):]

train_df = df.loc[train_idx]
valid_df = df.loc[valid_idx]

print(train_df.shape, valid_df.shape)

(657, 2) (165, 2)


In [30]:
data_dir = os.path.join('.','data', 'XY_test')
x_s = []
y_s = []
for file in os.listdir(data_dir):
    if file.startswith('x_'):
        x_s.append(os.path.join(data_dir,file))
    elif file.startswith('y_'):
        y_s.append(os.path.join(data_dir,file))
    x_s = sorted(x_s)
    y_s = sorted(y_s)
test_df = pd.DataFrame({'x':x_s, 'y':y_s})
test_df.head()

FileNotFoundError: [WinError 3] 지정된 경로를 찾을 수 없습니다: '.\\data\\XY_test'

In [17]:
class DataGenerator(Sequence):
    def __init__(self, df, batch_size, shuffle = True):
        self.X = list(df.x)
        self.y = list(df.y)
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()
        
    def on_epoch_end(self):
        self.indexes = np.arange(len(self.X))
        if self.shuffle:
            np.random.shuffle(self.indexes)
            
    def __len__(self):
        return int(np.floor(len(self.X) / self.batch_size))
    
    def __data_generation(self, X_list, y_list):
        X = []
        y = []
        for i, (img, label) in enumerate(zip(X_list, y_list)):
            X.append(np.load(img))
            y.append(np.load(label))
        
        X = np.stack(X, axis=0)
        y = np.stack(y, axis=0)

        return X, y
        
    def __getitem__(self, index):
        indexes = self.indexes[index * self.batch_size : (index + 1) * self.batch_size]
        X_list = [self.X[k] for k in indexes]
        y_list = [self.y[k] for k in indexes]
        X, y = self.__data_generation(X_list, y_list)
        return X, y

In [18]:
train_generator = DataGenerator(train_df, 5)
valid_generator = DataGenerator(valid_df, 5)
# test_generator = DataGenerator(test_df, 3)

# Train

In [19]:
from keras.callbacks import ModelCheckpoint
from keras.models import Model, load_model, Sequential
from keras.layers import Dense, Activation, Dropout, Input, Masking, TimeDistributed, LSTM, Conv1D
from keras.layers import GRU, Bidirectional, BatchNormalization, Reshape, LSTM
from keras.optimizers import Adam

In [20]:
def make_model(input_shape):
    
    X_input = Input(shape = input_shape)
    X = Conv1D(196, kernel_size=15, strides=4)(X_input)         # CONV1D
    X = BatchNormalization()(X)                                 # Batch normalization
    X = Activation('relu')(X)                                   # ReLu activation
    X = Dropout(0.8)(X)                                         # dropout (use 0.8)

    X = GRU(units = 128, return_sequences = True)(X)            # GRU (use 128 units and return the sequences)
    X = Dropout(0.8)(X)                                         # dropout (use 0.8)
    X = BatchNormalization()(X)                                 # Batch normalization

    X = GRU(units = 128, return_sequences = True)(X)            # GRU (use 128 units and return the sequences)
    X = Dropout(0.8)(X)                                         # dropout (use 0.8)
    X = BatchNormalization()(X)                                 # Batch normalization
    X = Dropout(0.8)(X)                                         # dropout (use 0.8)

    X = TimeDistributed(Dense(1, activation = "sigmoid"))(X)    # time distributed  (sigmoid)

    model = Model(inputs = X_input, outputs = X)
    return model

In [21]:
model = make_model(input_shape = (None, n_freq))

In [22]:
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, None, 128)]       0         
                                                                 
 conv1d_1 (Conv1D)           (None, None, 196)         376516    
                                                                 
 batch_normalization_3 (Batc  (None, None, 196)        784       
 hNormalization)                                                 
                                                                 
 activation_1 (Activation)   (None, None, 196)         0         
                                                                 
 dropout_4 (Dropout)         (None, None, 196)         0         
                                                                 
 gru_2 (GRU)                 (None, None, 128)         125184    
                                                           

In [23]:
opt = Adam(lr=0.00001, beta_1=0.9, beta_2=0.999, decay=0.01)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=["accuracy"])

  super().__init__(name, **kwargs)


In [25]:
model.fit_generator(generator=train_generator,
                    validation_data=valid_generator,
                    epochs = 85)

Epoch 1/85


  This is separate from the ipykernel package so we can avoid doing imports until


Epoch 2/85
Epoch 3/85
Epoch 4/85
Epoch 5/85
Epoch 6/85
Epoch 7/85
Epoch 8/85
Epoch 9/85
Epoch 10/85
Epoch 11/85
Epoch 12/85
Epoch 13/85
Epoch 14/85
Epoch 15/85
Epoch 16/85
Epoch 17/85
Epoch 18/85
Epoch 19/85

KeyboardInterrupt: 

In [None]:
model.save('beta.h5')

# Test

In [None]:
def postprocessing(outputs, th):
    for output in outputs:
        output[output<th] = 0
        output[output>=th] = 1
    return outputs

In [None]:
import matplotlib.pyplot as plt
for batch in test_generator:
    x, y = batch
    output = model.predict(x)
    output = postprocessing(output,0.6)
    for i in range(len(y)):
        plt.plot(y[i], label='true')
        plt.plot(output[i], label='predict')
        plt.legend()
        plt.show()