In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **오디오 감정 분류 모델 생성**

# 라이브러리 호출

In [None]:
!pip install samplerate

Collecting samplerate
  Downloading samplerate-0.1.0-py2.py3-none-any.whl (4.0 MB)
[K     |████████████████████████████████| 4.0 MB 8.8 MB/s 
Installing collected packages: samplerate
Successfully installed samplerate-0.1.0


In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import librosa
import librosa.display
from sklearn.preprocessing import scale
import math
import soundfile
import samplerate
import pickle
import joblib
import datetime
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings("ignore")

# 모델 불러오기
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from tensorflow import keras
from tensorflow.keras.layers import Dense, Conv1D, Flatten, Embedding, MaxPooling1D,Dropout, BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping


# 모델 검증 및 평가
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

- 데이터 호출

In [None]:
data = pd.read_csv("/content/drive/MyDrive/kt/bigproject/nemotion/final_result.csv")

# 데이터 전처리

## 오디오 어그멘테이션
1. 오디오 노이즈 추가
2. 오디오 피치 조절

In [None]:
def noise(data, STD_N):
  """원본 데이터에 노이즈 추가"""
  xf, sr = librosa.load("/content/drive/MyDrive/kt/bigproject/nemotion/"+ data, sr = 16000)
  RMS = math.sqrt(np.mean(xf**2))
  noise = np.random.normal(0, STD_N, xf.shape[0])
  signal_noise = xf+noise
  soundfile.write('noise/n_'+data,signal_noise,16000)

In [None]:
def pitch(data):
  """원본 데이터에 피치 조절"""
  xx, sr = librosa.load("/content/drive/MyDrive/kt/bigproject/nemotion/"+data, sr=16000)
  out_data1 = samplerate.resample(xx, 0.8, 'sinc_best')
  out_data2 = samplerate.resample(xx, 1.3, 'sinc_best')
  soundfile.write('pitch/h_'+data,out_data1,16000)
  soundfile.write('pitch/d_'+data,out_data2,16000)

In [None]:
cd /content/drive/MyDrive/kt/bigproject/nemotion

/content/drive/MyDrive/kt/bigproject/nemotion


In [None]:
for i in range(len(data)):
  noise(data["audio_name"][i].strip(),0.005)

In [None]:
for i in range(len(data)):
  pitch(data["audio_name"][i].strip())

- 실행 예시

In [None]:
# 노이즈 조절
xf, sr = librosa.load("/content/drive/MyDrive/kt/bigproject/nemotion/"+ data["audio_name"][0], sr = 16000)
plt.plot(xf)
plt.show()
RMS = math.sqrt(np.mean(xf**2))
STD_N = 0.01
noise = np.random.normal(0, STD_N, xf.shape[0])
signal_noise = xf+noise
plt.plot(signal_noise)
plt.show

In [None]:
# 피치 조절
import samplerate
import IPython.display as ipd
xx, sr = librosa.load("/content/drive/MyDrive/kt/bigproject/nemotion/"+data["audio_name"][0], sr=16000)
out_data1 = samplerate.resample(xx, 0.8, 'sinc_best')
out_data2 = samplerate.resample(xx, 1.3, 'sinc_best')

print('Higher pitch:')
ipd.display(ipd.Audio(out_data1, rate=sr))
print('Lower pitch:')
ipd.display(ipd.Audio(out_data2, rate=sr))

Higher pitch:


Lower pitch:


In [None]:
# 파일 저장
import soundfile
soundfile.write('noise/n_'+data["audio_name"][0],signal_noise,16000)

- 오디오 데이터 어그멘테이션 추가 라벨링

In [None]:
noise_list = os.listdir('./noise')
pitch_list = os.listdir('./pitch')

In [None]:
tmp = []
for i in range(len(noise_list)):
  tmp.append([noise_list[i],list(data[data["audio_name"].str.contains(noise_list[i][2:])]["emotion"])[0]])
data = data.append(pd.DataFrame(tmp, columns=['audio_name','emotion']))

In [None]:
tmp2 = []
for i in range(len(pitch_list)):
  tmp2.append([pitch_list[i],list(data[data["audio_name"].str.contains(pitch_list[i][2:])]["emotion"])[0]])
data = data.append(pd.DataFrame(tmp2, columns=['audio_name','emotion']))

In [None]:
data.reset_index(inplace=True)
del data["index"]

In [None]:
data.to_csv("final_result.csv", index=False, encoding='utf-8-sig')

## 오디오 피쳐 추출

- mfcc로 데이터 특징 추출
  - 파라미터
    - n_mfcc : 26
    - sr : 16000
    - n_fft : 400
    - hop_length = 160

In [None]:
x = []
for i in tqdm(range(len(data))):
  if data["audio_name"][i][0] == 'n':
    xf, sr = librosa.load("/content/drive/MyDrive/kt/bigproject/nemotion/noise/"+ data["audio_name"][i], sr = 16000)
  elif data["audio_name"][i][0] == 'd' or data["audio_name"][i][0] == 'h':
    xf, sr = librosa.load("/content/drive/MyDrive/kt/bigproject/nemotion/pitch/"+ data["audio_name"][i], sr = 16000)
  else:
    xf, sr = librosa.load("/content/drive/MyDrive/kt/bigproject/nemotion/origin/"+ data["audio_name"][i].strip(), sr = 16000)
  mfcc_1 = librosa.feature.mfcc(y=xf, sr=16000, n_mfcc=5, n_fft=400, hop_length = 160)
  # mfcc_1 = scale(mfcc_1, axis=1)
  feature = data["audio_name"][i]
  feature = np.append(feature, np.mean(mfcc_1.T, axis=0))
  x.append(feature)

100%|██████████| 1180/1180 [07:28<00:00,  2.63it/s]


In [None]:
# librosa.display.specshow(mfcc_1)

- 추출한 특징 저장

In [None]:
today = datetime.datetime.today()
save_folder_name = str(today).split(".")[0].replace(" ", "_").replace(":", "_")[:-3]

In [None]:
cd /content/drive/MyDrive/kt/bigproject/nemotion

/content/drive/MyDrive/kt/bigproject/nemotion


In [None]:
with open("base_x_feature"+save_folder_name+".pickle","wb") as fw:
    pickle.dump(x, fw)

## 데이터 정리

- mfcc로 데이터 특징 추출한 파일 호출

In [None]:
cd /content/drive/MyDrive/kt/bigproject/nemotion

/content/drive/MyDrive/kt/bigproject/nemotion


In [None]:
with open('base_x_feature2022-05-08_19_36.pickle', 'rb') as f:
    x = pickle.load(f)

In [None]:
x = pd.DataFrame(x)
x.head(2)

Unnamed: 0,0,1,2,3,4,5
0,1651753197166.wav,-566.843,70.81106,17.963993,10.04262,-11.183487
1,1651754164847.wav,-599.074,47.51433,15.505734,10.031389,-16.752226


In [None]:
x["audio_name"]= x[0] 
x.drop(0,axis=1, inplace=True)
dff = pd.merge(x,data,on="audio_name")

In [None]:
x = dff.drop(["audio_name","emotion"], axis=1)
y = dff.loc[:,"emotion"]

In [None]:
x = x.astype('float')

## 데이터 분할

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42, stratify=y_train)

## 스케일링

In [None]:
x_train_s = x_train.copy()
x_val_s = x_val.copy()
x_test_s = x_test.copy()

In [None]:
scaler = MinMaxScaler()
x_train_s = scaler.fit_transform(x_train_s)
x_val_s = scaler.transform(x_val_s)
x_test_s = scaler.transform(x_test_s)

# 모델링

## 머신러닝

In [None]:
val_pred_result = {}
test_pred_result = {}

### 1. SVM

In [None]:
# train 데이터 학습하기
svm_model = SVC()
params = {'C': [1, 10],
              'gamma': [1, 0.1],
              'kernel': ['rbf']}
 
model_svm = GridSearchCV(svm_model, params, refit = True, scoring='accuracy', verbose = 3)
model_svm.fit(x_train_s, y_train)
print(model_svm.best_params_)
print(model_svm.best_score_)

print("====="*20)
print("================== val 데이터로 평가 ====================")
y_pred = model_svm.predict(x_val_s)
print(confusion_matrix(y_val, y_pred))
print(classification_report(y_val, y_pred))
val_pred_result['SVM'] = [accuracy_score(y_val, y_pred)]

print("====="*20)
print("================== test 데이터로 평가 ====================")
y_pred_test = model_svm.predict(x_test)
print(confusion_matrix(y_test, y_pred_test))
print(classification_report(y_test, y_pred_test))
test_pred_result['SVM'] = [accuracy_score(y_test, y_pred_test)]

Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV 1/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.430 total time=   0.0s
[CV 2/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.364 total time=   0.0s
[CV 3/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.430 total time=   0.0s
[CV 4/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.450 total time=   0.0s
[CV 5/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.411 total time=   0.0s
[CV 1/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.371 total time=   0.0s
[CV 2/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.377 total time=   0.0s
[CV 3/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.377 total time=   0.0s
[CV 4/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.377 total time=   0.0s
[CV 5/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.377 total time=   0.0s
[CV 1/5] END .........C=10, gamma=1, kernel=rbf;, score=0.404 total time=   0.0s
[CV 2/5] END .........C=10, gamma=1, kernel=rbf;,

### 2. RandomForest

In [None]:
# train 데이터 학습하기
rf_model = RandomForestClassifier(random_state = 42)
params = {
    'max_depth': range(3,6),
    'n_estimators': range(50,500,50),
    'min_samples_leaf':range(8,16,4)
}
model_rf = GridSearchCV(rf_model,params, cv=3, scoring='accuracy', verbose = 3)
model_rf.fit(x_train,y_train)
print(model_rf.best_params_)
print(model_rf.best_score_)

print("====="*20)
print("================== val 데이터로 평가 ====================")
y_pred = model_rf.predict(x_val)
print(confusion_matrix(y_val, y_pred))
print(classification_report(y_val, y_pred))
val_pred_result['RandomForest'] = [accuracy_score(y_val, y_pred)]

print("====="*20)
print("================== test 데이터로 평가 ====================")
y_pred_test = model_rf.predict(x_test)
print(confusion_matrix(y_test, y_pred_test))
print(classification_report(y_test, y_pred_test))
test_pred_result['RandomForest'] = [accuracy_score(y_test, y_pred_test)]

Fitting 3 folds for each of 54 candidates, totalling 162 fits
[CV 1/3] END max_depth=3, min_samples_leaf=8, n_estimators=50;, score=0.401 total time=   0.1s
[CV 2/3] END max_depth=3, min_samples_leaf=8, n_estimators=50;, score=0.433 total time=   0.1s
[CV 3/3] END max_depth=3, min_samples_leaf=8, n_estimators=50;, score=0.458 total time=   0.1s
[CV 1/3] END max_depth=3, min_samples_leaf=8, n_estimators=100;, score=0.397 total time=   0.2s
[CV 2/3] END max_depth=3, min_samples_leaf=8, n_estimators=100;, score=0.440 total time=   0.2s
[CV 3/3] END max_depth=3, min_samples_leaf=8, n_estimators=100;, score=0.458 total time=   0.2s
[CV 1/3] END max_depth=3, min_samples_leaf=8, n_estimators=150;, score=0.401 total time=   0.3s
[CV 2/3] END max_depth=3, min_samples_leaf=8, n_estimators=150;, score=0.448 total time=   0.2s
[CV 3/3] END max_depth=3, min_samples_leaf=8, n_estimators=150;, score=0.450 total time=   0.3s
[CV 1/3] END max_depth=3, min_samples_leaf=8, n_estimators=200;, score=0.413 

### 3. XGBoost

In [None]:
xgb_model = XGBClassifier(random_state = 42)

params = {
    'max_depth': range(3,9),
    'n_estimators': range(50,500,50),
    'learning_rate': [0.1,0.5]
}

model_xgb = GridSearchCV(xgb_model,params, cv=3, scoring='accuracy', verbose=3)
model_xgb.fit(x_train.values,y_train)
print(model_xgb.best_params_)
print(model_xgb.best_score_)

print("====="*20)
print("================== val 데이터로 평가 ====================")
y_pred = model_xgb.predict(x_val.values)
print(confusion_matrix(y_val, y_pred))
print(classification_report(y_val, y_pred))
val_pred_result['XGB'] = [accuracy_score(y_val, y_pred)]

print("====="*20)
print("================== test 데이터로 평가 ====================")
y_pred_test = model_xgb.predict(x_test.values)
print(confusion_matrix(y_test, y_pred_test))
print(classification_report(y_test, y_pred_test))
test_pred_result['XGB'] = [accuracy_score(y_test, y_pred_test)]

Fitting 3 folds for each of 108 candidates, totalling 324 fits
[CV 1/3] END learning_rate=0.1, max_depth=3, n_estimators=50;, score=0.425 total time=   0.1s
[CV 2/3] END learning_rate=0.1, max_depth=3, n_estimators=50;, score=0.385 total time=   0.1s
[CV 3/3] END learning_rate=0.1, max_depth=3, n_estimators=50;, score=0.414 total time=   0.1s
[CV 1/3] END learning_rate=0.1, max_depth=3, n_estimators=100;, score=0.425 total time=   0.1s
[CV 2/3] END learning_rate=0.1, max_depth=3, n_estimators=100;, score=0.405 total time=   0.1s
[CV 3/3] END learning_rate=0.1, max_depth=3, n_estimators=100;, score=0.378 total time=   0.1s
[CV 1/3] END learning_rate=0.1, max_depth=3, n_estimators=150;, score=0.421 total time=   0.1s
[CV 2/3] END learning_rate=0.1, max_depth=3, n_estimators=150;, score=0.413 total time=   0.1s
[CV 3/3] END learning_rate=0.1, max_depth=3, n_estimators=150;, score=0.406 total time=   0.1s
[CV 1/3] END learning_rate=0.1, max_depth=3, n_estimators=200;, score=0.409 total tim

### 4. LightGBM

In [None]:
lgbm_model = LGBMClassifier(random_state = 42)

params = {
    'max_depth': range(3,9),
    'n_estimators': range(30,200,50),
    'learning_rate': [0.1,0.5]
}

model_lgbm = GridSearchCV(lgbm_model,params, cv=3, scoring='accuracy', verbose=3)
model_lgbm.fit(x_train.values,y_train)
print(model_lgbm.best_params_)
print(model_lgbm.best_score_)

print("====="*20)
print("================== val 데이터로 평가 ====================")
y_pred = model_lgbm.predict(x_val)
print(confusion_matrix(y_val, y_pred))
print(classification_report(y_val, y_pred))
val_pred_result['LGBM'] = [accuracy_score(y_val, y_pred)]

print("====="*20)
print("================== test 데이터로 평가 ====================")
y_pred_test = model_lgbm.predict(x_test.values)
print(confusion_matrix(y_test, y_pred_test))
print(classification_report(y_test, y_pred_test))
test_pred_result['LGBM'] = [accuracy_score(y_test, y_pred_test)]

Fitting 3 folds for each of 48 candidates, totalling 144 fits
[CV 1/3] END learning_rate=0.1, max_depth=3, n_estimators=30;, score=0.381 total time=   0.0s
[CV 2/3] END learning_rate=0.1, max_depth=3, n_estimators=30;, score=0.381 total time=   0.0s
[CV 3/3] END learning_rate=0.1, max_depth=3, n_estimators=30;, score=0.402 total time=   0.0s
[CV 1/3] END learning_rate=0.1, max_depth=3, n_estimators=80;, score=0.385 total time=   0.0s
[CV 2/3] END learning_rate=0.1, max_depth=3, n_estimators=80;, score=0.413 total time=   0.0s
[CV 3/3] END learning_rate=0.1, max_depth=3, n_estimators=80;, score=0.394 total time=   0.0s
[CV 1/3] END learning_rate=0.1, max_depth=3, n_estimators=130;, score=0.397 total time=   0.1s
[CV 2/3] END learning_rate=0.1, max_depth=3, n_estimators=130;, score=0.417 total time=   0.1s
[CV 3/3] END learning_rate=0.1, max_depth=3, n_estimators=130;, score=0.418 total time=   0.1s
[CV 1/3] END learning_rate=0.1, max_depth=3, n_estimators=180;, score=0.397 total time=  

## 성능 비교

In [None]:
test_result_df = pd.DataFrame(test_pred_result).T
test_result_df.columns = ['test_accuracy']
test_result_df

Unnamed: 0,test_accuracy
SVM,0.377119
RandomForest,0.398305
XGB,0.389831
LGBM,0.398305
