<a href="https://colab.research.google.com/github/arjasc5231/Lingometer/blob/speaker_verification/speaker_verificaiton/batcher.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import os
import pickle
import random

import import_ipynb
from constants import NUM_FBANK, HOP_LENGTH, NUM_SPEC_PER_SPEAKER, NUM_FRAME, NUM_ENROLL_UTT, NUM_TRUE_UTT, NUM_FALSE_UTT

importing Jupyter notebook from constants.ipynb


In [None]:
"""
speaker classification task를 위한 batcher
data_path로부터 화자별 스펙트로그램[화자 수(N),스펙트로그램 수(M), 128, 128, 1]을 받아
학습을 위한 형태 [N*M, 128, 128, 1]로 편집

Pamameters:
  data_path : 데이터셋(specs_of_speakers) 파일 경로

Attributes:
  X : 학습 데이터. [N*M,128,128,1]. [spk1_spec1,spk1_spec2,...,spk1_specM, spk2_spec1,spk2_spec2,...spk2_specM, spk3_spec1,...] 형태
  Y : 학습 데이터의 라벨. [N*M]. [spk1,spk1,...,spk1, spk2,spk2,...,spk2, spk3,...] 형태
"""
class naive_batcher:
  def __init__(self, data_path):
    self.name = 'naive_batcher'
    self.data_path = data_path
    with open(self.data_path,"rb") as f:
      specs_of_speakers = pickle.load(f)
      self.X = np.concatenate(specs_of_speakers[:200])
      del specs_of_speakers
    self.Y = np.concatenate(np.array([[i]*NUM_SPEC_PER_SPEAKER for i in range(200)]))
    
    print('==================================================')
    print('[batcher:naive] load train dataset')
    print('shape of X :', self.X.shape)
    print('shape of Y :', self.Y.shape)

In [None]:
"""
similarity matrix를 위한 batcher
data_path로부터 화자별 스펙트로그램[화자 수(N),스펙트로그램 수(M), 128, 128, 1]을 받아
online으로 batch [5,]
학습을 위한 형태 [N*M, 128, 128, 1]로 편집

Pamameters:
  data_path : 데이터셋(specs_of_speakers) 파일 경로

Attributes:
  X : 학습 데이터. [N*M,128,128,1]. [spk1_spec1,spk1_spec2,...,spk1_specM, spk2_spec1,spk2_spec2,...spk2_specM, spk3_spec1,...] 형태
  Y : 학습 데이터의 라벨. [N*M]. [spk1,spk1,...,spk1, spk2,spk2,...,spk2, spk3,...] 형태
"""
class simMat_batcher:
    def __init__(self, data_path):
        self.name = 'simMat_batcher'
        self.data_path = data_path
        self.num_speaker = int(data_path.split('_')[-4])
        with open(self.data_path,"rb") as f:
            self.specs_of_speakers = pickle.load(f)

        print('==================================================')
        print('[batcher:simMat] load train dataset')
        print('shape of data :', self.specs_of_speakers.shape)


    # num_speaker_per_batch : 배치당 화자 수. 논문=64 출처코드=4
    # num_utt_per_speaker : 배치당 화자마다 발화 수. 논문=10 출처코드=5
    def get_batch(self, num_speaker_per_batch=10, num_utt_per_speaker=5):

        batch = []
        speakers = np.random.choice(self.num_speaker, num_speaker_per_batch, replace=False)
        for speaker in speakers:
            utts_idx = np.random.choice(NUM_SPEC_PER_SPEAKER, num_utt_per_speaker, replace=False)
            batch.append(self.specs_of_speakers[speaker][utts_idx])
        batch = np.concatenate(batch)

        Y = np.zeros(batch.shape[0]) # dummy

        return batch,Y
    
    
    def train_generator(self):
        while True:
            yield self.get_batch()

In [None]:
def get_batcher(batcher_name, data_path):
    if batcher_name=='naive_batcher': return naive_batcher(data_path)
    elif batcher_name=='simMat_batcher': return simMat_batcher(data_path)
    
    

In [None]:
#simMat_batcher('C:/Users/LeeJunghun/Desktop/lingometer/data/NIKL_DIALOGUE_2020_PCM_v1.2_part1/NIKL_DIALOGUE_2020_PCM_v1.2_part1/npy/traintest','C:/Users/LeeJunghun/Desktop/lingometer/data/NIKL_DIALOGUE_2020_PCM_v1.2_part1/NIKL_DIALOGUE_2020_PCM_v1.2_part1/simMat_batcher')

number of speakers who has more than minimun: 167
total generated shape: (1000, 25, 128, 128, 1)
dummy Y shape: (1000, 25)


<__main__.simMat_batcher at 0x1adc5592520>