In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings
import tensorflow as tf
from tqdm import tqdm
from glob import glob
from google.colab import drive

import librosa
import librosa.display as dsp
import IPython.display as ipd

warnings.filterwarnings(action='ignore')
drive.mount('/content/drive')

%cd '/content/drive/MyDrive/deep-voice/data/'

Mounted at /content/drive
/content/drive/MyDrive/deep-voice/data


In [None]:
import torch

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') #GPU 할당

In [None]:
import random

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

seed_everything(813)

## 원본 데이터 불러오기

In [None]:
# 저장된 데이터 불러오기
X_deepvoice_mel = np.load('X_deepvoice_mel.npy')
X_deepvoice_mfcc = np.load('X_deepvoice_mfcc.npy')
X_normal_mel = np.load('X_normal_mel.npy')
X_normal_mfcc = np.load('X_normal_mfcc.npy')

# RAW 데이터 변경 시에만 실행

In [None]:
print("Original shape:")
print(X_deepvoice_mel.shape, X_deepvoice_mfcc.shape, X_normal_mel.shape, X_normal_mfcc.shape)

Original shape:
(800, 128, 2970) (800, 100, 2970) (800, 128, 2970) (800, 100, 2970)


## 데이터 표준화

In [None]:
import numpy as np
from sklearn.preprocessing import StandardScaler

# 데이터를 Standardize (표준화) 합니다.
# PCA를 적용하기 전에 각 특성의 평균을 0, 분산을 1로 조정합니다.
scaler = StandardScaler()

X_deepvoice_mel_std = scaler.fit_transform(X_deepvoice_mel.reshape(X_deepvoice_mel.shape[0], -1))
X_deepvoice_mfcc_std = scaler.fit_transform(X_deepvoice_mfcc.reshape(X_deepvoice_mfcc.shape[0], -1))
X_normal_mel_std = scaler.fit_transform(X_normal_mel.reshape(X_normal_mel.shape[0], -1))
X_normal_mfcc_std = scaler.fit_transform(X_normal_mfcc.reshape(X_normal_mfcc.shape[0], -1))

In [None]:
np.save('X_deepvoice_mel_std.npy', X_deepvoice_mel_std)
np.save('X_deepvoice_mfcc_std.npy', X_deepvoice_mfcc_std)
np.save('X_normal_mel_std.npy', X_normal_mel_std)
np.save('X_normal_mfcc_std.npy', X_normal_mfcc_std)

## STD 데이터 불러오기

In [None]:
# 저장된 데이터 불러오기
X_deepvoice_mel_std = np.load('X_deepvoice_mel_std.npy')
X_deepvoice_mfcc_std = np.load('X_deepvoice_mfcc_std.npy')
X_normal_mel_std = np.load('X_normal_mel_std.npy')
X_normal_mfcc_std = np.load('X_normal_mfcc_std.npy')

# RAW 데이터 변경 시에만 실행

In [None]:
# PCA 인스턴스 생성 및 적용
# 설명된 분산의 비율을 기반으로 차원 수를 선택할 수 있습니다.
# 예를 들어, n_components=0.95는 데이터 분산의 95%를 유지하도록 차원을 축소합니다.
from sklearn.decomposition import PCA
pca = PCA(n_components=0.95)

X_deepvoice_mel_pca = pca.fit_transform(X_deepvoice_mel_std)
X_deepvoice_mfcc_pca = pca.fit_transform(X_deepvoice_mfcc_std)
X_normal_mel_pca = pca.fit_transform(X_normal_mel_std)
X_normal_mfcc_pca = pca.fit_transform(X_normal_mfcc_std)

In [None]:
# 새로운 차원의 크기 확인
print("New shape after PCA:")
print(X_deepvoice_mel_pca.shape, X_deepvoice_mfcc_pca.shape, X_normal_mel_pca.shape, X_normal_mfcc_pca.shape)

New shape after PCA:
(800, 372) (800, 574) (800, 411) (800, 633)


In [None]:
np.save('X_deepvoice_mel_pca.npy', X_deepvoice_mel_pca)
np.save('X_deepvoice_mfcc_pca.npy', X_deepvoice_mfcc_pca)
np.save('X_normal_mel_pca.npy', X_normal_mel_pca)
np.save('X_normal_mfcc_pca.npy', X_normal_mfcc_pca)

## 데이터셋 병합 및 저장

In [None]:
# 데이터 병합 및 라벨 생성
X_mel = np.concatenate([X_deepvoice_mel, X_normal_mel])
X_mfcc = np.concatenate([X_deepvoice_mfcc, X_normal_mfcc])
y = np.concatenate([np.zeros(len(X_deepvoice_mel)), np.ones(len(X_normal_mel))]) # 0: deepvoice, 1: normal

# RAW 데이터 변경 시에만 실행

In [None]:
# # Mel Spectrogram 데이터 저장
# np.save('X_mel.npy', X_mel)
# np.save('X_mfcc.npy', X_mfcc)

# # 라벨 데이터 저장
# np.save('y.npy', y)

# RAW 데이터 변경 시에만 실행

## 저장한 데이터셋 불러오기

In [3]:
# Mel Spectrogram 데이터 불러오기
X_mel_loaded = np.load('X_mel.npy')
X_mfcc_loaded = np.load('X_mfcc.npy')

# 라벨 데이터 불러오기
y_loaded = np.load('y.npy')

In [4]:
from sklearn.model_selection import train_test_split

# Mel-spectrogram 데이터 분할
X_train_mel, X_test_mel, y_train, y_test = train_test_split(X_mel_loaded, y_loaded, test_size=0.2, random_state=42)
X_train_mel, X_val_mel, y_train, y_val = train_test_split(X_train_mel, y_train, test_size=0.25, random_state=42) # test_size=0.25로 설정하여 트레이닝 데이터의 20%가 검증 세트가 됨

# MFCC 데이터 분할
X_train_mfcc, X_test_mfcc = train_test_split(X_mfcc_loaded, test_size=0.2, random_state=42)[0:2]
X_train_mfcc, X_val_mfcc = train_test_split(X_train_mfcc, test_size=0.25, random_state=42)[0:2]

In [5]:
# X_train_mel, X_val_mel, y_train, y_val, X_train_mfcc, X_val_mfcc 데이터를 파일로 저장합니다.
np.save('X_train_mel.npy', X_train_mel)
np.save('X_test_mel.npy', X_test_mel)
np.save('X_val_mel.npy', X_val_mel)
np.save('y_train.npy', y_train)
np.save('y_test.npy', y_test)
np.save('y_val.npy', y_val)

In [6]:
np.save('X_train_mfcc.npy', X_train_mfcc)
np.save('X_test_mfcc.npy', X_test_mfcc)
np.save('X_val_mfcc.npy', X_val_mfcc)