# 오디오 감정 분류 모델 베이스 라인

## 라이브러리 호출

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy
import tensorflow as tf
import keras
from tqdm import tqdm
from glob import glob
from scipy.io import wavfile
import librosa
import librosa.display

## 데이터 전처리

In [2]:
data = pd.read_csv("./data.csv")

### mfcc로 데이터 특징 추출
- n_mfcc : 5
- 참고 : 파일명을 찾을 수 없는 경우가 있어서 제외 시켜줌

In [3]:
import pickle

with open('x.pickle', 'rb') as f:
    x = pickle.load(f)
with open('y.pickle', 'rb') as f:
    y = pickle.load(f)

In [4]:
print(len(x))

43975


In [5]:
x = pd.DataFrame(x)
x.head(2)

Unnamed: 0,0,1,2,3,4
0,-434.244049,80.879303,2.758545,24.256662,-10.964857
1,-414.942932,71.78363,2.073137,16.111912,-12.043771


### 재라벨링 & 라벨 인코딩
- y값 중 sad와 sadness는 sad로 통일
- 정답값을 라벨인코더를 통해 범주형 변수를 수치형 변수로 변경

In [6]:
y = pd.DataFrame(y)

In [7]:
y[0].value_counts()

sad          11138
anger         8370
disgust       4660
happiness     4548
fear          4131
angry         3263
neutral       3262
sadness       2848
surprise      1755
Name: 0, dtype: int64

In [8]:
y[y[0] == 'sadness'] = 'sad'

In [9]:
y[0].value_counts()

sad          13986
anger         8370
disgust       4660
happiness     4548
fear          4131
angry         3263
neutral       3262
surprise      1755
Name: 0, dtype: int64

In [10]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
y = encoder.fit_transform(y[0])

### train test 셋 분리

In [11]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42, stratify=y_train)

In [12]:
print(x_train.shape, x_test.shape, x_val.shape, y_train.shape, y_test.shape, y_val.shape)

(28144, 5) (8795, 5) (7036, 5) (28144,) (8795,) (7036,)


### 스케일링

In [13]:
x_train_s = x_train.copy()
x_val_s = x_val.copy()
x_test_s = x_test.copy()

In [14]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
x_train_s = scaler.fit_transform(x_train_s)
x_val_s = scaler.transform(x_val_s)
x_test_s = scaler.transform(x_test_s)

## 모델링

In [15]:
# 모델 불러오기
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

# 모델 검증 및 평가
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [16]:
pred_result = {}

#### SVM

In [17]:
svm_model = SVC()
params = {'C': [1, 10],
              'gamma': [1, 0.1],
              'kernel': ['rbf']}
 
model_svm = GridSearchCV(svm_model, params, refit = True, scoring='accuracy', verbose = 3)
model_svm.fit(x_train_s, y_train)
print(model_svm.best_params_)
print(model_svm.best_score_)
y_pred = model_svm.predict(x_val_s)
print(confusion_matrix(y_val, y_pred))
print(classification_report(y_val, y_pred))
pred_result['SVM'] = [accuracy_score(y_val, y_pred)]

Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV 1/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.318 total time=  38.7s
[CV 2/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.318 total time=  38.2s
[CV 3/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.318 total time=  38.2s
[CV 4/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.318 total time=  38.2s
[CV 5/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.318 total time=  38.3s
[CV 1/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.318 total time=  31.4s
[CV 2/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.318 total time=  31.8s
[CV 3/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.318 total time=  31.7s
[CV 4/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.318 total time=  31.2s
[CV 5/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.318 total time=  31.5s
[CV 1/5] END .........C=10, gamma=1, kernel=rbf;, score=0.321 total time= 1.0min
[CV 2/5] END .........C=10, gamma=1, kernel=rbf;,

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#### RF

In [18]:
rf_model = RandomForestClassifier(random_state = 42)

params = {
    'max_depth': range(3,6),
    'n_estimators': range(50,200,50),
    'min_samples_leaf':range(8,16,4)
}

model_rf = GridSearchCV(rf_model,params, cv=3, scoring='accuracy', verbose = 3)
model_rf.fit(x_train,y_train)

print(model_rf.best_params_)
print(model_rf.best_score_)
y_pred = model_rf.predict(x_val)
print(confusion_matrix(y_val, y_pred))
print(classification_report(y_val, y_pred))
pred_result['RandomForest'] = [accuracy_score(y_val, y_pred)]

Fitting 3 folds for each of 18 candidates, totalling 54 fits
[CV 1/3] END max_depth=3, min_samples_leaf=8, n_estimators=50;, score=0.321 total time=   0.7s
[CV 2/3] END max_depth=3, min_samples_leaf=8, n_estimators=50;, score=0.321 total time=   0.7s
[CV 3/3] END max_depth=3, min_samples_leaf=8, n_estimators=50;, score=0.322 total time=   0.7s
[CV 1/3] END max_depth=3, min_samples_leaf=8, n_estimators=100;, score=0.321 total time=   1.4s
[CV 2/3] END max_depth=3, min_samples_leaf=8, n_estimators=100;, score=0.321 total time=   1.4s
[CV 3/3] END max_depth=3, min_samples_leaf=8, n_estimators=100;, score=0.322 total time=   1.4s
[CV 1/3] END max_depth=3, min_samples_leaf=8, n_estimators=150;, score=0.321 total time=   2.1s
[CV 2/3] END max_depth=3, min_samples_leaf=8, n_estimators=150;, score=0.321 total time=   2.1s
[CV 3/3] END max_depth=3, min_samples_leaf=8, n_estimators=150;, score=0.322 total time=   2.2s
[CV 1/3] END max_depth=3, min_samples_leaf=12, n_estimators=50;, score=0.321 t

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#### XGB

In [19]:
xgb_model = XGBClassifier(random_state = 42)

params = {
    'max_depth': range(3,6),
    'n_estimators': range(50,200,50),
    'learning_rate': [0.1,0.05,0.01]
}

model_xgb = GridSearchCV(xgb_model,params, cv=3, scoring='accuracy', verbose=3)
model_xgb.fit(x_train,y_train)
print(model_xgb.best_params_)
print(model_xgb.best_score_)

y_pred = model_xgb.predict(x_val)
print(confusion_matrix(y_val, y_pred))
print(classification_report(y_val, y_pred))
pred_result['XGB'] = [accuracy_score(y_val, y_pred)]

Fitting 3 folds for each of 27 candidates, totalling 81 fits
[CV 1/3] END learning_rate=0.1, max_depth=3, n_estimators=50;, score=0.330 total time=  11.3s
[CV 2/3] END learning_rate=0.1, max_depth=3, n_estimators=50;, score=0.330 total time=  10.9s
[CV 3/3] END learning_rate=0.1, max_depth=3, n_estimators=50;, score=0.328 total time=  11.8s
[CV 1/3] END learning_rate=0.1, max_depth=3, n_estimators=100;, score=0.333 total time=  22.1s
[CV 2/3] END learning_rate=0.1, max_depth=3, n_estimators=100;, score=0.330 total time=  22.5s
[CV 3/3] END learning_rate=0.1, max_depth=3, n_estimators=100;, score=0.328 total time=  22.6s
[CV 1/3] END learning_rate=0.1, max_depth=3, n_estimators=150;, score=0.332 total time=  33.3s
[CV 2/3] END learning_rate=0.1, max_depth=3, n_estimators=150;, score=0.333 total time=  33.7s
[CV 3/3] END learning_rate=0.1, max_depth=3, n_estimators=150;, score=0.329 total time=  35.1s
[CV 1/3] END learning_rate=0.1, max_depth=4, n_estimators=50;, score=0.333 total time= 

 #### LGBM

In [22]:
lgbm_model = LGBMClassifier(random_state = 42)

params = {
    'max_depth': range(3,6),
    'n_estimators': range(50,200,50),
    'learning_rate': [0.1,0.05,0.01]
}

model_lgbm = GridSearchCV(lgbm_model,params, cv=3, scoring='accuracy', verbose=3)
model_lgbm.fit(x_train,y_train)
print(model_lgbm.best_params_)
print(model_lgbm.best_score_)

y_pred = model_lgbm.predict(x_val)
print(confusion_matrix(y_val, y_pred))
print(classification_report(y_val, y_pred))
pred_result['LGBM'] = [accuracy_score(y_val, y_pred)]

Fitting 3 folds for each of 27 candidates, totalling 81 fits
[CV 1/3] END learning_rate=0.1, max_depth=3, n_estimators=50;, score=0.335 total time=   9.0s
[CV 2/3] END learning_rate=0.1, max_depth=3, n_estimators=50;, score=0.332 total time=   9.2s
[CV 3/3] END learning_rate=0.1, max_depth=3, n_estimators=50;, score=0.329 total time=   8.9s
[CV 1/3] END learning_rate=0.1, max_depth=3, n_estimators=100;, score=0.335 total time=  20.5s
[CV 2/3] END learning_rate=0.1, max_depth=3, n_estimators=100;, score=0.332 total time=  19.2s
[CV 3/3] END learning_rate=0.1, max_depth=3, n_estimators=100;, score=0.330 total time=  20.0s
[CV 1/3] END learning_rate=0.1, max_depth=3, n_estimators=150;, score=0.334 total time=  27.2s
[CV 2/3] END learning_rate=0.1, max_depth=3, n_estimators=150;, score=0.330 total time=  34.6s
[CV 3/3] END learning_rate=0.1, max_depth=3, n_estimators=150;, score=0.329 total time=  31.4s
[CV 1/3] END learning_rate=0.1, max_depth=4, n_estimators=50;, score=0.334 total time= 

In [23]:
pred_result

{'SVM': [0.3219158612848209],
 'RandomForest': [0.33186469584991474],
 'XGB': [0.3342808413871518],
 'LGBM': [0.334138715179079]}

### 성능 비교

In [24]:
result_df = pd.DataFrame(pred_result).T
result_df.columns = ['accuracy']
result_df

Unnamed: 0,accuracy
SVM,0.321916
RandomForest,0.331865
XGB,0.334281
LGBM,0.334139


- 가장 성능이 높게 나온 모델은 LGBM이고 accuracy는 0.334139다

### test 데이터로 평가

In [25]:
y_pred = model_lgbm.predict(x_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[ 309   11    2    3   14    1 1333    1]
 [  57   32    7    1   62    2  488    4]
 [ 102   20    9    3   60    2  733    3]
 [ 108   16    5    7   23    3  660    4]
 [  89   17    4    2  121    3  669    5]
 [  49   13    4    1   56    1  525    3]
 [ 238   28   10    7   66    3 2444    1]
 [  26   29    3    3   35    1  252    2]]
              precision    recall  f1-score   support

           0       0.32      0.18      0.23      1674
           1       0.19      0.05      0.08       653
           2       0.20      0.01      0.02       932
           3       0.26      0.01      0.02       826
           4       0.28      0.13      0.18       910
           5       0.06      0.00      0.00       652
           6       0.34      0.87      0.49      2797
           7       0.09      0.01      0.01       351

    accuracy                           0.33      8795
   macro avg       0.22      0.16      0.13      8795
weighted avg       0.27      0.33      0.23      8795



In [30]:
import joblib

joblib.dump(model_lgbm, './model/model_lgbm.pkl')

['./model/model_lgbm.pkl']