<a href="https://colab.research.google.com/github/gchaewon/10th-Ewha-Festival-Front/blob/main/multi.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import library

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import os
from scipy.io import wavfile
from collections import defaultdict, Counter
from scipy import signal
import numpy as np
import librosa
import sklearn
import random
from unicodedata import normalize
from keras.layers import Dense
from keras import Model
from keras import Input
from keras.utils import to_categorical
from keras.regularizers import l2
from keras.layers import Dense, TimeDistributed, Dropout, Bidirectional, GRU, BatchNormalization, Activation, LeakyReLU, LSTM, Flatten, RepeatVector, Permute, Multiply, Conv2D, MaxPooling2D

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Load Data

In [None]:
DATA_TRAIN = '/content/drive/My Drive/졸프/ML/STT/dataset/train'
DATA_TEST = '/content/drive/My Drive/졸프/ML/STT/dataset/test'

# Preprocessing

In [None]:
trainset = []
testset = []

train_X = []
train_mfccs = []
train_y = []

test_X = []
test_mfccs = []
test_y = []

pad1d = lambda a, i: a[0: i] if a.shape[0] > i else np.hstack((a, np.zeros(i-a.shape[0])))
pad2d = lambda a, i: a[:, 0:i] if a.shape[1] > i else np.hstack((a, np.zeros((a.shape[0], i-a.shape[1]))))

frame_length = 0.025
frame_stride = 0.0010

In [None]:
from sklearn import preprocessing

for filename in os.listdir(DATA_TRAIN+ "/"):
  filename = normalize('NFC', filename)
  try:
    if '.wav' not in filename:
      continue

    wav, sr = librosa.load(DATA_TRAIN+ "/"+ filename, sr=16000)

    mfcc = librosa.feature.mfcc(y=wav, sr=16000, n_mfcc=100, n_fft=400, hop_length=160)
    mfcc = sklearn.preprocessing.scale(mfcc, axis=1)
    padded_mfcc = pad2d(mfcc, 40)

    if filename[0] == '어':
      trainset.append((padded_mfcc, 0))
    elif filename[0] == '음':
      trainset.append((padded_mfcc, 1))
    elif filename[0] == '그':
      trainset.append((padded_mfcc, 2))
  except Exception as e:
    print(filename, e)
    raise

random.shuffle(trainset)

[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m


In [None]:
for filename in os.listdir(DATA_TEST + "/"):
  filename = normalize('NFC', filename)
  try:
    if '.wav' not in filename in filename:
      continue

    wav, sr = librosa.load(DATA_TEST+ "/"+ filename, sr=16000)

    mfcc = librosa.feature.mfcc(y=wav, sr=16000, n_mfcc=100, n_fft=400, hop_length=160)
    mfcc = sklearn.preprocessing.scale(mfcc, axis=1)
    padded_mfcc = pad2d(mfcc, 40)

    if filename[0] == '어':
      testset.append((padded_mfcc, 0))
    elif filename[0] == '음':
      testset.append((padded_mfcc, 1))
    elif filename[0] == '그':
      testset.append((padded_mfcc, 2))
  except Exception as e:
    print(filename, e)
    raise

random.shuffle(testset)



In [None]:
train_mfccs = [a for (a,b) in trainset]
train_y = [b for (a,b) in trainset]

test_mfccs = [a for (a,b) in testset]
test_y = [b for (a,b) in testset]

train_mfccs = np.array(train_mfccs)
train_y = to_categorical(np.array(train_y))

test_mfccs = np.array(test_mfccs)
test_y = to_categorical(np.array(test_y))

print('train_mfccs:', train_mfccs.shape)
print('train_y:', train_y.shape)

print('test_mfccs:', test_mfccs.shape)
print('test_y:', test_y.shape)


train_mfccs: (2033, 100, 40)
train_y: (2033, 3)
test_mfccs: (150, 100, 40)
test_y: (150, 3)


# Training

In [None]:
train_X_ex = np.expand_dims(train_mfccs, -1)
test_X_ex = np.expand_dims(test_mfccs, -1)
print('train X shape:', train_X_ex.shape)
print('test X shape:', test_X_ex.shape)

train X shape: (2033, 100, 40, 1)
test X shape: (150, 100, 40, 1)


In [None]:
ip = Input(shape=train_X_ex[0].shape)

m = Conv2D(32, kernel_size=(4,4), activation='relu')(ip)
m = MaxPooling2D(pool_size=(4,4))(m)

m = Conv2D(32*2, kernel_size=(4,4), activation='relu')(ip)
m = MaxPooling2D(pool_size=(4,4))(m)

m = Conv2D(32*3, kernel_size=(4,4), activation='relu')(ip)
m = MaxPooling2D(pool_size=(4,4))(m)

m = Flatten()(m)

m = Dense(64, activation='relu', kernel_regularizer=l2(0.001))(m)

m = Dense(32, activation='relu', kernel_regularizer=l2(0.001))(m)

op = Dense(3, activation='softmax')(m)

model = Model(ip, op)

model.summary()

Model: "model_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_12 (InputLayer)       [(None, 100, 40, 1)]      0         
                                                                 
 conv2d_35 (Conv2D)          (None, 97, 37, 96)        1632      
                                                                 
 max_pooling2d_33 (MaxPooli  (None, 24, 9, 96)         0         
 ng2D)                                                           
                                                                 
 flatten_9 (Flatten)         (None, 20736)             0         
                                                                 
 dense_23 (Dense)            (None, 64)                1327168   
                                                                 
 dense_24 (Dense)            (None, 32)                2080      
                                                           

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

history = model.fit(train_X_ex,
                    train_y,
                    epochs=50,
                    batch_size=128,
                    verbose=1
                    )


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50

# Save model

In [None]:
model.save('/content/drive/My Drive/졸프/ML/STT/model/multi_model.h5')

  saving_api.save_model(
