In [1]:
import os
import time
import wave
import random
import numpy as np
from typing import Dict, Tuple

import torch
import torchaudio

import json
import sys
sys.path.append('../training')
from utils import data_utils, utils, audio_utils
# hubert = torch.hub.load("bshall/hubert:main", "hubert_soft")

In [69]:
# main_dir = '/home/jaejun/nansy/
config_path = '../training/configs/f2v.json'
with open(config_path, "r") as f:
    data = f.read()
config = json.loads(data)
args = utils.HParams(**config)

# Face2Voice

## Original Image version

In [35]:
image_dir = "/disk2/VGG_Face2/data/original"
train_valid_spkr_list_dir = '../training/filelists'
train_valid_spkr_list_file = 'voxceleb_train_speakerlist.txt'
test_spkr_list_file = 'voxceleb_test_speakerlist.txt'

In [36]:
train_valid_labels = np.loadtxt(os.path.join(train_valid_spkr_list_dir, 'voxceleb_train_labels.csv'), dtype=str)
train_valid_gender_dict = {}
for train_valid_label in train_valid_labels:
    spkr = train_valid_label.split(',')[0]
    gender = train_valid_label.split(',')[1]
    train_valid_gender_dict[spkr] = gender

test_labels = np.loadtxt(os.path.join(train_valid_spkr_list_dir, 'voxceleb_test_labels.csv'), dtype=str)
test_gender_dict = {}
for test_label in test_labels:
    spkr = test_label.split(',')[0]
    gender = test_label.split(',')[1]
    test_gender_dict[spkr] = gender

In [37]:
# This is for Check gender distribution on train/valid set
train_valid_spkrs = np.loadtxt(os.path.join(train_valid_spkr_list_dir, train_valid_spkr_list_file), dtype=str)
print(f'# of Train & Valid speakers:{len(train_valid_spkrs)}')
valid_speaker_num = 100
np.random.seed(17)
np.random.shuffle(train_valid_spkrs)
train_spkrs = train_valid_spkrs[valid_speaker_num:]
valid_spkrs = train_valid_spkrs[:valid_speaker_num]

male_cnt, female_cnt = 0, 0
for train_spkr in train_spkrs:
    if train_spkr not in train_valid_gender_dict:
        continue
    if train_valid_gender_dict[train_spkr] == 'm':
        male_cnt += 1
    elif train_valid_gender_dict[train_spkr] == 'f':
        female_cnt += 1
print(f'Train spkr - male:{male_cnt}, female:{female_cnt}')

male_cnt, female_cnt = 0, 0
for valid_spkr in valid_spkrs:
    if valid_spkr not in train_valid_gender_dict:
        continue    
    if train_valid_gender_dict[valid_spkr] == 'm':
        male_cnt += 1
    elif train_valid_gender_dict[valid_spkr] == 'f':
        female_cnt += 1
print(f'Valid spkr - male:{male_cnt}, female:{female_cnt}')

# of Train & Valid speakers:5991
Train spkr - male:3366, female:2131
Valid spkr - male:60, female:34


In [38]:
train_image_paths = []
for spkr in train_spkrs:
    files = os.listdir(os.path.join(image_dir, spkr))
    for file in files:
        path = os.path.join(image_dir, spkr, file)
        train_image_paths.append(path)

valid_image_paths = []
for spkr in valid_spkrs:
    files = os.listdir(os.path.join(image_dir, spkr))
    for file in files:
        path = os.path.join(image_dir, spkr, file)
        valid_image_paths.append(path)

print(f'Train files:{len(train_image_paths)}, Valid files:{len(valid_image_paths)}')

Train files:2211115, Valid files:38151


In [39]:
test_spkrs = np.loadtxt(os.path.join(train_valid_spkr_list_dir, test_spkr_list_file), dtype=str)
print(f'# of Test speakers:{len(test_spkrs)}')

male_cnt, female_cnt = 0, 0
for test_spkr in test_spkrs:
    if test_spkr not in test_gender_dict:
        continue    
    if test_gender_dict[test_spkr] == 'm':
        male_cnt += 1
    elif test_gender_dict[test_spkr] == 'f':
        female_cnt += 1
print(f'Test spkr - male:{male_cnt}, female:{female_cnt}')

# of Test speakers:118
Test spkr - male:77, female:39


In [40]:
test_image_paths = []
for spkr in test_spkrs:
    files = os.listdir(os.path.join(image_dir, spkr))
    for file in files:
        path = os.path.join(image_dir, spkr, file)
        test_image_paths.append(path)

print(f'Test files:{len(test_image_paths)}')

Test files:53652


### File Save

In [41]:
f = open("../training/filelists/VGG_Face_train.txt","w")
for i in range(len(train_image_paths)):
    data = train_image_paths[i] + '\n'
    f.write(data)
f.close()

In [42]:
f = open("../training/filelists/VGG_Face_valid.txt","w")
for i in range(len(valid_image_paths)):
    data = valid_image_paths[i] + '\n'
    f.write(data)
f.close()

In [43]:
f = open("../training/filelists/VGG_Face_test.txt","w")
for i in range(len(test_image_paths)):
    data = test_image_paths[i] + '\n'
    f.write(data)
f.close()

## MT-CNN Emb version

### Audio

In [3]:
audio_dir = "/disk2/VoxCeleb2/VoxCeleb2/dev"

### Image

In [5]:
image_dir = "/disk2/VGG_Face2/data/modified"
train_valid_spkr_list_dir = '../training/filelists'
train_valid_spkr_list_file = 'voxceleb_train_speakerlist.txt'
test_spkr_list_file = 'voxceleb_test_speakerlist.txt'

In [6]:
train_valid_labels = np.loadtxt(os.path.join(train_valid_spkr_list_dir, 'voxceleb_train_labels.csv'), dtype=str)
train_valid_gender_dict = {}
for train_valid_label in train_valid_labels:
    spkr = train_valid_label.split(',')[0]
    gender = train_valid_label.split(',')[1]
    train_valid_gender_dict[spkr] = gender

test_labels = np.loadtxt(os.path.join(train_valid_spkr_list_dir, 'voxceleb_test_labels.csv'), dtype=str)
test_gender_dict = {}
for test_label in test_labels:
    spkr = test_label.split(',')[0]
    gender = test_label.split(',')[1]
    test_gender_dict[spkr] = gender

Train/Valid

In [25]:
# This is for Check gender distribution on train/valid set
train_valid_spkrs = np.loadtxt(os.path.join(train_valid_spkr_list_dir, train_valid_spkr_list_file), dtype=str)
print(f'# of Train & Valid speakers:{len(train_valid_spkrs)}')
valid_speaker_num = 100
np.random.seed(17)
np.random.shuffle(train_valid_spkrs)
train_spkrs = train_valid_spkrs[valid_speaker_num:]
valid_spkrs = train_valid_spkrs[:valid_speaker_num]

male_cnt, female_cnt = 0, 0
for train_spkr in train_spkrs:
    if train_spkr not in train_valid_gender_dict:
        continue
    if train_valid_gender_dict[train_spkr] == 'm':
        male_cnt += 1
    elif train_valid_gender_dict[train_spkr] == 'f':
        female_cnt += 1
print(f'Train spkr - male:{male_cnt}, female:{female_cnt}')

male_cnt, female_cnt = 0, 0
for valid_spkr in valid_spkrs:
    if valid_spkr not in train_valid_gender_dict:
        continue    
    if train_valid_gender_dict[valid_spkr] == 'm':
        male_cnt += 1
    elif train_valid_gender_dict[valid_spkr] == 'f':
        female_cnt += 1
print(f'Valid spkr - male:{male_cnt}, female:{female_cnt}')

# of Train & Valid speakers:5991
Train spkr - male:3366, female:2131
Valid spkr - male:60, female:34


In [26]:
min_train_images = 999
train_image_paths = []
for spkr in train_spkrs:
    files = os.listdir(os.path.join(image_dir, spkr))
    for file in files:
        path = os.path.join(image_dir, spkr, file)
        train_image_paths.append(path)

valid_image_paths = []
for spkr in valid_spkrs:
    files = os.listdir(os.path.join(image_dir, spkr))
    for file in files:
        path = os.path.join(image_dir, spkr, file)
        valid_image_paths.append(path)

print(f'Train files:{len(train_image_paths)}, Valid files:{len(valid_image_paths)}')

Train files:2204588, Valid files:38027


Test

In [27]:
test_spkrs = np.loadtxt(os.path.join(train_valid_spkr_list_dir, test_spkr_list_file), dtype=str)
print(f'# of Test speakers:{len(test_spkrs)}')

male_cnt, female_cnt = 0, 0
for test_spkr in test_spkrs:
    if test_spkr not in test_gender_dict:
        continue    
    if test_gender_dict[test_spkr] == 'm':
        male_cnt += 1
    elif test_gender_dict[test_spkr] == 'f':
        female_cnt += 1
print(f'Test spkr - male:{male_cnt}, female:{female_cnt}')

# of Test speakers:118
Test spkr - male:77, female:39


In [28]:
test_image_paths = []
for spkr in test_spkrs:
    files = os.listdir(os.path.join(image_dir, spkr))
    for file in files:
        path = os.path.join(image_dir, spkr, file)
        test_image_paths.append(path)

print(f'Test files:{len(test_image_paths)}')

Test files:53251


### File save

In [29]:
f = open("../training/filelists/VGG_Face_Emb_train.txt","w")
for i in range(len(train_image_paths)):
    data = train_image_paths[i] + '\n'
    f.write(data)
f.close()

In [30]:
f = open("../training/filelists/VGG_Face_Emb_valid.txt","w")
for i in range(len(valid_image_paths)):
    data = valid_image_paths[i] + '\n'
    f.write(data)
f.close()

In [31]:
f = open("../training/filelists/VGG_Face_Emb_test.txt","w")
for i in range(len(test_image_paths)):
    data = test_image_paths[i] + '\n'
    f.write(data)
f.close()

### Temp

In [71]:
cnt_aud = 0
aud_spkrs = os.listdir(args.data.aud_datadir)
for aud_spkr in aud_spkrs:
    cnt_aud += len(os.listdir(os.path.join(args.data.aud_datadir, aud_spkr)))
print(cnt_aud)

cnt = 0
for test_spkr in test_spkrs:
    if test_spkr not in aud_spkrs:
        cnt += 1
        # print(test_spkr)
print(f"Test spkr #{cnt} of {len(test_spkrs)} doesn't match")

cnt = 0
for train_valid_spkr in train_valid_spkrs:
    if train_valid_spkr not in aud_spkrs:
        cnt += 1
        # print(test_spkr)
print(f"Train-valid spkr #{cnt} of {len(train_valid_spkrs)} doesn't match")



1128246
Test spkr #0 of 118 doesn't match
Train-valid spkr #0 of 5991 doesn't match
