# Setup


## Import

In [None]:
%%capture 
!apt-get install libsox-fmt-all libsox-dev sox > /dev/null
!pip install sox
!pip install jsonargparse


In [None]:
%%capture 
import os
import gdown

import pickle
import re

import soundfile as sf
from scipy.signal import lfilter
import librosa

import glob
import torch
import yaml

## Gdrive

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

PATH_PREFIX = '/content/gdrive'

### TO CHANGE ######
DIR = f"/content/gdrive/MyDrive/Path/to/Dementiabank/folder"

Mounted at /content/gdrive


## Download models


In [None]:
# downloading the project 

!git clone https://github.com/yistLin/FragmentVC
!pip install -r /FragmentVC/requirements.txt


fatal: destination path 'FragmentVC' already exists and is not an empty directory.
[31mERROR: Could not open requirements file: [Errno 2] No such file or directory: '/FragmentVC/requirements.txt'[0m


In [None]:
!gdown https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_small.pt
!gdown https://github.com/yistLin/FragmentVC/releases/download/v1.0/fragmentvc.pt
!gdown https://github.com/yistLin/FragmentVC/releases/download/v1.0/vocoder.pt

Downloading...
From: https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_small.pt
To: /content/wav2vec_small.pt
100% 951M/951M [00:44<00:00, 21.6MB/s]
Downloading...
From: https://github.com/yistLin/FragmentVC/releases/download/v1.0/fragmentvc.pt
To: /content/fragmentvc.pt
100% 192M/192M [00:13<00:00, 14.8MB/s]
Downloading...
From: https://github.com/yistLin/FragmentVC/releases/download/v1.0/vocoder.pt
To: /content/vocoder.pt
100% 20.5M/20.5M [00:00<00:00, 23.2MB/s]


In [None]:
%%capture
!pip install -r /content/gdrive/MyDrive/FragmentVC/requirements.txt

In [None]:
%%capture
!pip install fairseq
!pip install pydub

# Voice conversion

## Helper functions

In [None]:
def segment_mp3_tensor(audio, duration, spacing = None, tensor = False):

    segments = []
    audio_tensor, sr = librosa.load(audio, sr=16000)
    audio_len = len(audio_tensor)
    start_pt = 0
    end_pt = duration*sr
    while end_pt < audio_len:
        segments.append(audio_tensor[start_pt:end_pt])
        if spacing is None: # non-overlapping segments
            start_pt += duration*sr
            end_pt += duration*sr
        else:
            start_pt += spacing*sr
            end_pt += spacing*sr
    return segments

def merge_on_id(audio_paths):
  full_audio_list = []
  for audio in audio_paths:
    audio_tensor, sr =  sf.read(audio)
    full_audio_list.append(audio_tensor)

  full_audio = np.concatenate(full_audio_list)

  return full_audio


## Converting VCTK files from flac to wav

In [None]:
import io
from pydub import AudioSegment
import glob

names = glob.glob(f'{dataset_dir}/vctk_4/*.flac')

for t in names:
  flac = AudioSegment.from_file(t, format='flac')
  stream = io.BytesIO()
  wav_name = t.replace("flac","wav")
  flac.export(wav_name, format='wav')
  #data, samplerate = sf.read(t1.replace("flac",'wav'))
  y, sr = librosa.load(wav_name,  sr=16000)
  sf.write(wav_name, y, sr)



## Convert Audio files to VCTK voice

We convert each sample from the Dementiabank to a voice from the VCTK dataset, available [here](https://datashare.ed.ac.uk/handle/10283/3443). 

In [None]:
def generate_segments_yaml(all_paths,out_path_prefix, samplerate):
  lbls = []
  ts_list = glob.glob(f'/content/gdrive/MyDrive/PHD/AuthorshipObfuscation/Adress-2020/audio/vctk/*.wav')
  dict_file_all = dict()

  for file_path in all_paths:
    label = file_path.split("/")[-2]
    name = os.path.basename(file_path)
    ID = name.split(".")[0]
    dict_file = dict()
    
    if label in ["vctk", 'vctk_2','vctk_3','vctk_4']:
      break

    lbls.append(label)

    out_path = f"{out_path_prefix}/{ID}"
    if not os.path.exists(out_path):
      try:
        os.mkdir(f"{out_path_prefix}/{ID}")
      except OSError:
        print ("Creation of the directory %s failed" % f"{out_path_prefix}/{ID}")
    
    if not os.path.exists(f'{out_path}/output_seg_0.wav'):

      segs = segment_mp3_tensor(file_path, duration = 5)
      seg_paths = []

      for idx,s in enumerate(segs):
        seg_path = f'{out_path}/output_seg_{idx}.wav' 
        sf.write(f'{out_path}/output_seg_{idx}.wav',s, samplerate)
        seg_paths.append(seg_path)

      for idx,seg in enumerate(seg_paths): 
        source = seg
        target = ts_list
        key = f"{label}_{ID}_{idx}_x1"
        dict_file[key] = {}
        dict_file[key]['source'] = source
        dict_file[key]['target'] = list(ts_list) 

        dict_file_all[key] = {}
        dict_file_all[key]['source'] = source
        dict_file_all[key]['target'] = list(ts_list) 

      with open(f'{out_path_prefix}/store_file_{ID}_x1.yaml', 'w+') as file:
        documents = yaml.dump(dict_file, file)

      with open(f'{out_path_prefix}/store_file.yaml', 'w+') as file:
        documents = yaml.dump(dict_file_all, file)


def convert_segments(out_path_prefix, sample_rate):
  yaml_files = glob.glob(f'{out_path_prefix}/*_x1.yaml')

  for yaml_f in yaml_files:
    if os.path.basename(yaml_f) == "store_file.yaml":
      continue
    try:
      wav_file=os.path.basename(yaml_f).replace('store_file_','cd_').replace('yaml','wav')

      wav_file = wav_file.split('_')[0]+"_"+wav_file.split('_')[1]+"_0_x1.wav"

      if not os.path.exists(f'{out_path_prefix}/segments_x1/{wav_file}'):

        print("fragmentVC for {}".format(yaml_f))
        cmd = f'python /content/gdrive/MyDrive/FragmentVC/convert_batch.py \
          -w "./wav2vec_small.pt" \
          --sample_rate "{sample_rate}" \
          -v "./vocoder.pt" \
          -c "./fragmentvc.pt" \
          {yaml_f} \
          {out_path_prefix}/segments_x1'
    except subprocess.CalledProcessError as e:
        raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output))


def merge_and_save(out_path_prefix, samplerate):
  pathlib.Path(f'{out_path_prefix}/Full_aug_samples/x1').mkdir(parents=True, exist_ok=True)
  segments_path = f'{out_path_prefix}/segments_x1' 
  all_segments = glob.glob(f'{segments_path}/*_x1.wav')
  
  labels_dict = dict()
  id_list = [os.path.basename(x).split("_")[1] for x in all_segments]

  for x in all_segments:
    id_name = os.path.basename(x).split("_")[1]
    labels_dict[id_name] =  os.path.basename(x).split("_")[0]

  unique_ids = np.unique(id_list)

  for speaker in id_list:
    id_segs = glob.glob(f'{segments_path}/*_{speaker}_*.wav')
    full_sample = merge_on_id(id_segs)
    sf.write(f'{out_path_prefix}/Full_aug_samples/x1/{labels_dict[speaker]}_{speaker}.wav',full_sample, samplerate)


In [None]:
adress_path = "/content/gdrive/MyDrive/PHD/Dementiabank"
samplerate = 16000

dataset_dir_cd = '/content/gdrive/MyDrive/PHD/Dementiabank/cd'
dataset_dir_cc = '/content/gdrive/MyDrive/PHD/Dementiabank/cc'

!mkdir -p /content/gdrive/MyDrive/PHD/Dementiabank/augmented_cd
!mkdir -p /content/gdrive/MyDrive/PHD/Dementiabank/augmented_cc

cd_paths = glob.glob(f'{dataset_dir_cd}/*.wav')
cc_paths = glob.glob(f'{dataset_dir_cc}/*.wav')

out_path_prefix_cd = f"{adress_path}/augmented_cd"
out_path_prefix_cc = f"{adress_path}/augmented_cc"

generate_segments_yaml(cd_paths,out_path_prefix_cd, samplerate)
convert_segments(out_path_prefix_cd, samplerate)
merge_and_save(out_path_prefix_cd, samplerate)

generate_segments_yaml(cc_paths,out_path_prefix_cc, samplerate)
convert_segments(out_path_prefix_cc, samplerate)
merge_and_save(out_path_prefix_cc, samplerate)
