In [1]:
import random
import logging
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
# from tqdm.auto import tqdm
from scipy.io.wavfile import read

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings(action='ignore')

In [2]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    tf.keras.utils.set_random_seed(seed)

seed_everything(42) # Seed 고정

In [3]:
train_df = pd.read_csv('./open/train.csv')
test_df = pd.read_csv('./open/test.csv')

train_df['path'] = train_df['path'].str[2:]
test_df['path'] = test_df['path'].str[2:]

train_df['path'] = './open/' + train_df['path']
test_df['path'] = './open/' + test_df['path']

In [4]:
def get_wav_arr(df, train=True):
    features = []
    labels = []
    for path in tqdm(df['path']):
        feature = read(path)
        feature = np.array(feature[1], dtype=np.float32)
        features.append(feature)
    if train:
        for label in tqdm(df['label']):
            labels.append(label)
        return features,labels
    else:
        return features

In [5]:
x_train, y_train = get_wav_arr(train_df)
x_test = get_wav_arr(test_df, False)

100%|██████████| 5001/5001 [00:02<00:00, 2277.53it/s]
100%|██████████| 5001/5001 [00:00<00:00, 4768291.50it/s]
100%|██████████| 1881/1881 [00:00<00:00, 2315.76it/s]


In [6]:
current_len = 0
for i in tqdm(range(len(x_train))):
    if len(x_train[i]) > current_len:
        current_len = len(x_train[i])

100%|██████████| 5001/5001 [00:00<00:00, 5008527.77it/s]


In [7]:
current_len

77945

In [8]:
def find_target_length(lst):
    current_len = 0
    for i in tqdm(range(len(lst))):
        if len(lst[i]) > current_len:
            current_len = len(lst[i])
    return current_len

def make_numpy_padding(lst, test):
    current_len = find_target_length(lst)
    current_len = find_target_length(test)

    new_lst = []
    new_lst2 = []

    for wav in tqdm(lst):
        if len(wav) < current_len:
            padding = np.zeros(current_len - len(wav))
            nn = np.concatenate((wav, padding))
            new_lst.append(nn)
        else:
            new_lst.append(wav)
        
    for wav in tqdm(test):
        if len(wav) < current_len:
            padding = np.zeros(current_len - len(wav))
            nn = np.concatenate((wav, padding))
            new_lst2.append(nn)
        else:
            new_lst2.append(wav)

    return np.array(new_lst), np.array(new_lst2)

In [9]:
train,test = make_numpy_padding(x_train, x_test)
train_y = np.array(y_train)

100%|██████████| 5001/5001 [00:00<00:00, 5008527.77it/s]
100%|██████████| 1881/1881 [00:00<00:00, 1879791.71it/s]
100%|██████████| 5001/5001 [00:00<00:00, 5453.32it/s]
100%|██████████| 1881/1881 [00:00<00:00, 4892.94it/s]


In [10]:
scaler = MinMaxScaler()
train = scaler.fit_transform(pd.DataFrame(train))
test = scaler.transform(pd.DataFrame(test))

In [11]:
print('reshape 전')
print('x : ', train.shape, test.shape)
print('y : ', train_y.shape)
print('reshape 후')
train = train.reshape(5001,80080,1)
test = test.reshape(1881,80080,1)
print('x : ', train.shape, test.shape)
print('y : ', train_y.shape)

reshape 전
x :  (5001, 80080) (1881, 80080)
y :  (5001,)
reshape 후
x :  (5001, 80080, 1) (1881, 80080, 1)
y :  (5001,)


In [12]:
num_classes = len(train_df['label'].unique())

def My_LSTM(input_size = (80080, 1), output_size = num_classes):
    inputs = layers.Input(shape = input_size)
    lstm_layer = layers.LSTM(128)(inputs)
    dense_layer = layers.Dense(64, activation='relu')(lstm_layer)
    outputs = layers.Dense(num_classes, activation='softmax')(dense_layer)

    model = keras.models.Model(inputs = inputs, outputs = [outputs])

    return model

model = My_LSTM()
model.summary()
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 80080, 1)]        0         
                                                                 
 lstm (LSTM)                 (None, 128)               66560     
                                                                 
 dense (Dense)               (None, 64)                8256      
                                                                 
 dense_1 (Dense)             (None, 6)                 390       
                                                                 
Total params: 75,206
Trainable params: 75,206
Non-trainable params: 0
_________________________________________________________________


In [13]:
hist = model.fit(train, train_y, batch_size=64, validation_split=.2, epochs=1)

In [122]:
num_classes = len(train_df['label'].unique())

class MyModel(keras.models.Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.lstm_layer = layers.LSTM(128)
        self.dense_layer = layers.Dense(64, activation='relu')
        self.output_layer = layers.Dense(num_classes, activation='softmax')
        
    def call(self, inputs):
        x = self.lstm_layer(inputs)
        x = self.dense_layer(x)
        outputs = self.output_layer(x)
        return outputs
    
model = MyModel()

In [126]:
input_shape = (None, 1)
inputs = layers.Input(shape = input_shape)
lstm_layer = layers.LSTM(128)(inputs)
dense_layer = layers.Dense(64, activation='relu')(lstm_layer)
outputs = layers.Dense(num_classes, activation='softmax')(dense_layer)

model = keras.models.Model(inputs = inputs, outputs = outputs)

In [127]:
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, None, 1)]         0         
                                                                 
 lstm_7 (LSTM)               (None, 128)               66560     
                                                                 
 dense_8 (Dense)             (None, 64)                8256      
                                                                 
 dense_9 (Dense)             (None, 6)                 390       
                                                                 
Total params: 75,206
Trainable params: 75,206
Non-trainable params: 0
_________________________________________________________________


In [131]:
np.asanyarray(x_train)

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (5001,) + inhomogeneous part.

In [128]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics='acc')
model.fit(x_train, y_train, epochs=1)

ValueError: Failed to find data adapter that can handle input: (<class 'list'> containing values of types {"<class 'numpy.ndarray'>"}), (<class 'list'> containing values of types {"<class 'int'>"})

In [95]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics='acc')
model.fit(x_train, y_train, epochs=1)

ValueError: Failed to find data adapter that can handle input: (<class 'list'> containing values of types {"<class 'numpy.ndarray'>"}), (<class 'list'> containing values of types {"<class 'int'>"})

In [80]:
model.compile(optimizer='adam', loss='sparse_categorical_entropy', metrics='acc')

In [88]:
np.expand_dims(x_train[0], axis=0).shape

(1, 37904)

In [81]:
model.fit(x_train, y_train)

ValueError: Failed to find data adapter that can handle input: (<class 'list'> containing values of types {"<class 'numpy.ndarray'>"}), (<class 'list'> containing values of types {"<class 'int'>"})