# 사용하는 함수, 라이브러리

In [1]:
import os
import numpy as np
from scipy.io import wavfile
import librosa
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from joblib import dump
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt

#raw파일을 wav로 변환해주는 함수
def raw_to_wav(file_path, sample_rate, directory, filename):
    raw_file_path = file_path
    raw_data = np.fromfile(raw_file_path, dtype=np.int16)
    raw_data_mono = np.mean(raw_data.reshape(-1, 2), axis=1, dtype=np.int16)
    output_file = os.path.join(directory, filename) + '.wav'
    wavfile.write(output_file, sample_rate, raw_data_mono)
    
#mfcc 추출하는 함수
def extract_mfcc(wav_file):
    audio, sr = librosa.load(wav_file)
    target_length = 7 * sr  #audio의 길이 패딩해서 맞춤
    audio_fixed = np.pad(audio, (0, target_length - len(audio))).astype(np.float32)
    mfcc = librosa.feature.mfcc(y=audio_fixed, sr=sr, n_mfcc=39)
    return mfcc

#raw파일을 wav로 변환해주는 함수 (TestSet용)
def test_raw_to_wav(file_path, sample_rate, filename):
    raw_file_path = file_path
    raw_data = np.fromfile(raw_file_path, dtype=np.int16)
    raw_data_mono = np.mean(raw_data.reshape(-1, 2), axis=1, dtype=np.int16)
    output_file = os.path.join(filename) + '.wav'
    wavfile.write(output_file, sample_rate, raw_data_mono)


# 시작
##### fmcc_train의 파일명들을 읽어옴

In [2]:
with open('fmcc_train.ctl', 'r') as file:
    file_contents = file.read()
files = file_contents.split('\n')
files = files[:-1]

FCJY0/FCJY0_pbw1001


##### 작업공간 변경

In [3]:
print(os.getcwd())
os.chdir('./raw16k')
os.chdir('./train')
print(os.getcwd())

c:\Users\hanja\ML_Project
c:\Users\hanja\ML_Project\raw16k\train


##### raw 파일 wav 형식으로 변환

In [4]:
fullpath = [0 for i in range(10000)] #전체 파일 경로 배열
directory = [0 for i in range(10000)] #train 데이터 폴더 이름 배열
file_name = [0 for i in range(10000)] #train 데이터 파일 이름 배열

for i in range(10000):
    directory[i], file_name[i] = files[i].split('/')
    fullpath[i] = os.path.join(os.getcwd(), directory[i], file_name[i])
    fullpath[i] = fullpath[i] + '.raw'

In [5]:
for i in range(len(files)):
    raw_to_wav(fullpath[i], 16000, directory[i], file_name[i])

# 학습 과정에서의 Test
##### TrainSet에서 20%를 검증

In [6]:
mfccs = [0 for i in range(10000)]

for i in range(10000):
    mfccs[i] = extract_mfcc(os.path.join(directory[i], file_name[i]) + '.wav') #mfcc 추출

###### 모든 화자 데이터의 앞 80% 학습, 뒤 20% 테스트로 나누면 훈련이 안되는 인물이 있기 때문에 
###### 1인당 개별 음성 200개 중 160개 학습, 40개 테스트로 나눔

In [7]:
female_mfcc = mfccs[0:5000]
male_mfcc = mfccs[5000:10000]
i = 0 
female_train = []
female_test = []
male_train = []
male_test = []
while (i <= 9800):
    if (i <= 4800):
        female_train += female_mfcc[i:i+160]
        female_test += female_mfcc[i+160:i+200]
    else:
        male_train += male_mfcc[i-5000:i-4840]
        male_test += male_mfcc[i-4840:i-4800]
    i += 200
trainset_data = female_train + male_train
testset_data = female_test + male_test
trainset_label = np.concatenate((np.zeros(4000), np.ones(4000)))
testset_label = np.concatenate((np.zeros(1000), np.ones(1000)))

trainset_data = np.array(trainset_data)
testset_data = np.array(testset_data)

trainset_data = trainset_data.reshape(trainset_data.shape[0], -1)
testset_data = testset_data.reshape(testset_data.shape[0], -1)

##### 정확도 평가

In [8]:
svm_model = make_pipeline(StandardScaler(), SVC()) #standardscaler, svc 통합 모델
svm_model.fit(trainset_data, trainset_label)
prediction = svm_model.predict(testset_data)
accuracy = accuracy_score(testset_label, prediction)
print("정확도: ", accuracy)

정확도:  0.9475


# TestSet으로 검증

In [9]:
os.chdir("../")
os.chdir("../")

with open('fmcc_test900.ctl', 'r') as file:
    file_contents = file.read()
testFiles = file_contents.split('\n')
testFiles = testFiles[:-1]

print(os.getcwd())
os.chdir("./raw16k") #raw16k 폴더로 이동
print(os.getcwd())
os.chdir("./test")
print(os.getcwd())

c:\Users\hanja\ML_Project
c:\Users\hanja\ML_Project\raw16k
c:\Users\hanja\ML_Project\raw16k\test


In [10]:
path = [0 for i in range(900)] #전체 파일 경로 배열

for i in range(900):
    path[i] = os.path.join(os.getcwd(), testFiles[i])
    path[i] = path[i] + '.raw'

In [11]:
for i in range(len(testFiles)):
    test_raw_to_wav(path[i], 16000, testFiles[i])
#1~200, 301~500 여성, 501~1000 남성 (201~300 제외)

##### TestSet의 Mfcc 추출

In [12]:
TestMfccs = [0 for i in range(900)]

for i in range(900):
    TestMfccs[i] = extract_mfcc(os.path.join(testFiles[i]) + '.wav')

Test_data = TestMfccs
Test_label = np.concatenate((np.zeros(400), np.ones(500)))

Test_data = np.array(Test_data)
Test_data = Test_data.reshape(Test_data.shape[0], -1)

##### 기존 모델에 테스트

In [13]:
predict = svm_model.predict(Test_data)
Accuracy = accuracy_score(Test_label, predict)
print("테스트셋 정확도: ", Accuracy)

테스트셋 정확도:  0.9444444444444444


##### 방금 실행된 테스트의 예측값을 바탕으로 테스트 결과 txt파일 작성

In [14]:
os.chdir('../')
os.chdir('../')

with open('MM_test_results.txt', 'w') as file:
    for i in range(len(predict)):
        if (predict[i] == 0):
            file.write('%s %s\n' % (testFiles[i], 'feml'))
        elif (predict[i] == 1):
            file.write('%s %s\n' % (testFiles[i], 'male'))
file.close()

os.chdir('./raw16k')
os.chdir('./test')

# 학습과정의 손실 시각화


##### cpu 사용량을 최대로 하는 교차검증, 폴드 수 = 2 (최소 교차 검증)
##### 1회당 추가되는 데이터는 500개 -> 총 20번 검증

In [17]:
# a = 250
# train_data = female_mfcc[:a] + male_mfcc[:a]
# train_data = np.array(train_data)
# train_data = train_data.reshape(train_data.shape[0],-1)
# train_label = np.concatenate((np.zeros(a), np.ones(a)))

# losses = [0 for i in range(20)]

# for i in range(20):
#     scores = cross_val_score(svm_model, train_data, train_label, cv=2, scoring='accuracy', n_jobs=-1)
#     losses[i] = 1 - scores
#     a += 250
#     train_data = female_mfcc[:a] + male_mfcc[:a]
#     train_data = np.array(train_data)
#     train_data = train_data.reshape(train_data.shape[0],-1)
#     train_label = np.concatenate((np.zeros(a), np.ones(a)))

In [18]:
# mean = [0 for i in range(20)]
# for i in range(len(mean)):
#     mean[i] = (losses[i][0] + losses[i][1]) / 2

In [19]:
# plt.plot(np.arange(500, 10001, 500), mean)
# plt.xlabel('Number of datas')
# plt.ylabel('Loss')
# plt.xlim([1500, 10000])
# plt.ylim([0.075, 0.27])
# plt.title('Loss Curve')
# plt.show()