In [1]:
import tensorflow as tf
import scipy
import scipy.signal as signal
import numpy as np
import os, random, sys
import scipy.io.wavfile as wav
import math
import pandas as pd

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [3]:
cd '/content/gdrive/My Drive/Data'

/content/gdrive/My Drive/Data


In [4]:
#filename을 입력받아 앞에서부터 len(filename)-11 까지만 남긴 뒤 _voice 추가
def formatFilename(filename):
    return filename[:len(filename) - 11] + "_voice.wav"


# Strip away the _xnoise.wav part of the filename, and append _voice.wav to obtain clean voice counterpart

def create_final_sequence(sequence, max_length):
    a, b = sequence.shape
    extra_len = max_length - b
    null_mat = np.zeros((len(sequence), extra_len), dtype=np.float32)
    sequence = np.concatenate((sequence, null_mat), axis=1)
    return sequence


def sequentialized_spectrum(batch):
    # Get maximum length of batch
    t = []
    t_vec = []
    Sxx_Vec = []
    for each in batch:
        _, t, Sxx_Vec_Temp = signal.stft(each, fs=rate_repository[0], nperseg=stft_size, return_onesided = False) #frequency, time, Zxx
        t_vec.append(t)
        Sxx_Vec.append(Sxx_Vec_Temp)
    maximum_length = findMaxlen(t_vec)

    max_run_total = int(math.ceil(float(maximum_length) / sequence_length))
    final_data = np.zeros([len(batch), max_run_total, stft_size, sequence_length])
    true_time = np.zeros([len(batch), max_run_total])

    # Read in a file and compute spectrum
    # for batch_idx, each_set in enumerate(batch):
    for batch_idx, Sxx in enumerate(Sxx_Vec):
        # f, t, Sxx = signal.stft(each_set, fs=rate_repository[0], nperseg=stft_size, return_onesided = False)

        # Magnitude and Phase Spectra
        Mag = Sxx.real
        t = t_vec[batch_idx]
        # Phase = Sxx.imag

        # Break up the spectrum in sequence_length sized data
        run_full_steps = float(len(t)) / sequence_length
        run_total = int(math.ceil(run_full_steps))

        # Run a loop long enough to break up all the data in the file into chunks of sequence_size
        for step in range(run_total):

            begin_point = step * sequence_length
            end_point = begin_point + sequence_length

            m, n = Mag[:, begin_point:end_point].shape

            # Store each chunk sequentially in a new array, accounting for zero padding when close to the end of the file
            if n == sequence_length:
                final_data[batch_idx, step, :, :] = np.copy(Mag[:, begin_point:end_point])
                true_time[batch_idx, step] = n
            else:
                final_data[batch_idx, step, :, :] = np.copy(create_final_sequence(Mag[:, begin_point:end_point], sequence_length))
                true_time[batch_idx, step] = n

    final_data = np.transpose(final_data, (0, 1, 3, 2))

    return final_data, true_time, maximum_length


def findMaxlen(data_vec):
    max_ = 0
    for each in data_vec:
        if len(each) > max_:
            max_ = len(each)
    return max_

In [5]:
# ----------------- Begin Vars --------------------- #

# Training data directories
traindata = os.getcwd() + "/Training/NoiseAdded/"
voicedata = os.getcwd() + "/Training/HumanVoices/"
checkpoints = os.getcwd() + "/TF_Checkpoints/"

# NormConstant
norm_factor = (1 / 32768.0)

# Spectrogram Parameters
stft_size = 1024

# RNN Specs
sequence_length = 100
batch_size = 1200
learning_rate = 0.001
epochs = 250
# number_of_layers = 3

# Temp_data_variables
no_of_files = 0
temp_list = []
final_data = []
sequence_length_id = 0

# Repositories
file_repository = []
rate_repository = []
clean_repository = []

# Selected vectors
files_vec = []
clean_files_fin_vec = []
clean_files_vec = []

# 원본데이터에 대해 정상적으로 동작하는지 확인

In [6]:
os.chdir(traindata)

In [7]:
# Buffer training data to memory for faster execution:
for root, _, files in os.walk(traindata):
    files = sorted(files)
    no_of_files = len(files)
    

    #files는 NoiseAdded data들을 의미하며 배치사이즈가 training data size보다 크면 말이 안되니까 여기서 에러처리
    if batch_size > no_of_files:
        sys.exit("Error: batch_size cannot be more than number of files in the training directory")

    #files를 읽어와 data와 samplingrate를 각각 리스트에 저장
    for f in files:
        if f.endswith(".wav"):
            temp_list.append(f)
            srate, data = wav.read(os.path.join(root, f))
            file_repository.append(data)
            rate_repository.append(srate)

data에 대한 file,rate 배열 생성

In [8]:
clean_files_vec = list(map(formatFilename, temp_list))

#clean한 목소리 데이터들을 clean_repasitory에 저장
for root, _, files in os.walk(voicedata):
    files = sorted(files)
    for each in files:
        if each.endswith(".wav"):
            for name in clean_files_vec:
                if each == name:
                    srate2, data2 = wav.read(os.path.join(root, name))
                    clean_repository.append(data2)

humanvoice 배열 생성

In [9]:
    #모든 배열의 원소에 norm_factor를 곱한다. sequentialized_spectrum함수를 
    for file_iter in range(len(file_repository)):
        i = random.randint(0, len(file_repository) - 1)   #랜덤으로 data를 섞기위함
        files_vec.append(file_repository[i] * norm_factor)    #data에 norm 상수를 곱해 저장.
        clean_files_fin_vec.append(clean_repository[i] * norm_factor)   #clean_file에 norm 상수를 곱해 저장.

norm 상수를 곱한뒤 저장

In [11]:
    #stft_bach, clean_voice_batch를 생성
    stft_batch, sequence_length_id, maximum_length = sequentialized_spectrum(files_vec)

In [15]:
stft_batch.shape

(2000, 4, 100, 1024)

STFT 적용, stft_batch, sequence_lengh_id, maxium_length 생성

stft_batch : 
sequence_lengh_id : 
maxium_length : repository에있던 data중 최대길이

# 역 변환 시작

In [16]:

testFileNum = len(files)

# Start Processing
for idx in range(testFileNum):
    nowNAFile = []
    nowNAFile.append(testNADataRepository[idx]) #testNADataRepository는 norm이 곱해진 stft전 data

    # Get NA stft repository.
    nowNAData_STFT, sequenceLengthID, maxLength = sequentialized_spectrum(nowNAFile)

    # Get Time Steps.
    maxTimeSteps = len(nowNAData_STFT[0])

    # Define outputData List to contain rnn_outputs_value.
    outputData = np.zeros([1,  maxTimeSteps, stft_size, sequence_length])           # Transpose, [0, 1, 3, 2]

    for timeStep in range(maxTimeSteps):
        #stft_batch[timeStep]가 이곳에 들어오면 됨!
        stft_batch[]
        rnn_outputs_value = np.transpose(rnn_outputs_value, [0, 2, 1])
        outputData[0][timeStep] = rnn_outputs_value

    # Define outputData_STFT, link outputData List by timeStep in 1 dimension.-----------------------------------------------------------------------------
    outputData_STFT = np.zeros([stft_size, maxLength])
    beginTime = 0
    endTime = 0
    
    for timeStep in range(maxTimeSteps):
        if(timeStep < maxTimeSteps - 1):
            endTime = beginTime + sequence_length
            outputData_STFT[:, beginTime : endTime] = outputData[0, timeStep, :, :]
        else:
            endTime = beginTime + int(sequenceLengthID[0, timeStep])
            outputData_STFT[:, beginTime : endTime] = outputData[0, timeStep, :, 0 : (endTime - beginTime)]

        beginTime = beginTime + sequence_length

    # Compute ISTFT
    _, outputData_ISTFT = signal.istft(outputData_STFT, fs=testNARateRepository[0], nperseg=stft_size, input_onesided = False)

    outputData_ISTFT = (outputData_ISTFT / norm_factor).real
    outputData_ISTFT = outputData_ISTFT.astype(np.int16)

    #wav.write(modelOutput + outputFileList[idx], testNARateRepository[idx], outputData_ISTFT)
    print("Index: " + str(idx))
    print("\tOutput File: " + str(outputFileList[idx]))

SyntaxError: ignored