In [5]:
import sys, os
import torch
import numpy as np
import pandas as pd
from torch import Tensor
from scipy.io import wavfile
from torchvision import transforms
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
import torchaudio

In [6]:
csvData = pd.read_csv('./data/data/target/vox1_meta.csv',sep="\t")
print(csvData.iloc[0:4, 0:5])
#for i, cls_name in enumerate(csvData["VoxCeleb1 ID"]):
   # print("==",i,"====",cls_name)

  VoxCeleb1 ID   VGGFace1 ID Gender Nationality  Set
0      id10001  A.J._Buckley      m     Ireland  dev
1      id10002   A.R._Rahman      m       India  dev
2      id10003    Aamir_Khan      m       India  dev
3      id10004   Aaron_Tveit      m         USA  dev


In [7]:
class RawNetDataset(Dataset):
    def __init__(self, transform=None, mode="train",files_dir=None, base_dir="",csv_file_dir="",nb_time=59049):
        self.base_dir = base_dir
        self.mode = mode
        self.transform = transform
        self.csv_file_dir = csv_file_dir
        self.files_dir = files_dir
        csv_file = pd.read_csv(csv_file_dir,sep="\t")
        self.nb_time = nb_time
        self.classes = np.array([i for i, cls_name in enumerate(csv_file["VoxCeleb1 ID"].unique())])
        
    def __len__(self):
        return self.classes.shape[0] 
    
    def __getitem__(self, idx):
        filename = self.files_dir[idx]
        X, sample_rate = torchaudio.load(self.base_dir+filename)
        label = self.classes[idx]
        self._pre_emphasis(X)
        nb_time = X.shape[1]
        if nb_time > self.nb_time:
            start_idx = np.random.randint(low = 0,
                high = nb_time - self.nb_time)
            X = X[:, start_idx:start_idx+self.nb_time]
            print("taille supérieure")
        elif nb_time < self.nb_time:
            nb_dup = int(self.nb_time / nb_time) + 1
            X = np.tile(X, (1, nb_dup))[:, :self.nb_time]
            print("taille inférieure")
        else:
            X = X
            print("taille égale")
        return X, label

    def _pre_emphasis(self, x):
        '''
        Pre-emphasis for single channel input
        '''
        return np.asarray(x[:,1:] - 0.97 * x[:, :-1], dtype=np.float32)

In [47]:
base_dir = 'data/data/wav/'

csv_file_dir = 'data/data/target/vox1_meta.csv'

In [48]:
def get_utt_list(src_dir):
    l_utt = []
    for r, ds, fs in os.walk(src_dir):
        r = r.replace('\\', '/') 
       
        base = '/'.join(r.split('/')[-2:])+'/'
        for f in fs:
            l_utt.append(base+f[:-4]+'.wav')
    
    list_train, list_test=l_utt[2:450],l_utt[451:]
    return list_train, list_test

In [49]:
src_dir = 'data/data/wav/'
list_train, list_test = get_utt_list(src_dir)
print(len(list_train),len(list_test))
print(list_train)


(['id10009/7hpSiT9_gCE/00001.wav', 'id10009/aFttHpeaXaQ/00001.wav', 'id10009/AtavJVP4bCk/00001.wav', 'id10009/AtavJVP4bCk/00002.wav', 'id10009/AtavJVP4bCk/00003.wav', 'id10009/AtavJVP4bCk/00004.wav', 'id10009/AtavJVP4bCk/00005.wav', 'id10009/AtavJVP4bCk/00006.wav', 'id10009/AtavJVP4bCk/00007.wav', 'id10009/AtavJVP4bCk/00008.wav', 'id10009/AtavJVP4bCk/00009.wav', 'id10009/EJ3tyqGqYfw/00001.wav', 'id10009/FOFbkVlz-wQ/00001.wav', 'id10009/FOFbkVlz-wQ/00002.wav', 'id10009/HCGXIgKsozU/00001.wav', 'id10009/JntZkGsH2Cc/00001.wav', 'id10009/JntZkGsH2Cc/00002.wav', 'id10009/JntZkGsH2Cc/00003.wav', 'id10009/JntZkGsH2Cc/00004.wav', 'id10009/JrwqvWr5_VE/00001.wav', 'id10009/JrwqvWr5_VE/00002.wav', 'id10009/JrwqvWr5_VE/00003.wav', 'id10009/JrwqvWr5_VE/00004.wav', 'id10009/JrwqvWr5_VE/00005.wav', 'id10009/JrwqvWr5_VE/00006.wav', 'id10009/JrwqvWr5_VE/00007.wav', 'id10009/JrwqvWr5_VE/00008.wav', 'id10009/JrwqvWr5_VE/00009.wav', 'id10009/qdop2-gjKBQ/00001.wav', 'id10009/qdop2-gjKBQ/00002.wav', 'id10009

In [46]:
dataset_train = RawNetDataset(files_dir=list_train,base_dir=base_dir,csv_file_dir=csv_file_dir)
dataset_test = RawNetDataset(files_dir=list_test,base_dir=base_dir,csv_file_dir=csv_file_dir)

print("Train set size: " + str(len(list_train)))
print("Test set size: " + str(len(list_test)))

data_loader = torch.utils.data.DataLoader(dataset_train, batch_size=50, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset_test, batch_size = 50, shuffle = True)

Train set size: 448
Test set size: 95


In [None]:
print(dataset_train.__getitem__(5))
print(dataset_test.__getitem__(5))

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv1d(3,128, 3)
        self.conv2 = nn.Conv1d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))
    