# Fish Diffusion

In [1]:
from IPython.display import display, HTML
display(HTML(
"""
<a target="_blank" href="https://colab.research.google.com/github/LordElf/fish-diffusion/blob/notebooks-support/notebooks/fish-audio_sample.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>
"""
))

### Environment Setup




Install Conda

In [None]:
%%bash
mkdir /content/env
MINICONDA_INSTALLER_SCRIPT=Miniconda3-py310_23.1.0-1-Linux-x86_64.sh
MINICONDA_PREFIX=/content/env
wget https://repo.continuum.io/miniconda/$MINICONDA_INSTALLER_SCRIPT
chmod +x $MINICONDA_INSTALLER_SCRIPT
./$MINICONDA_INSTALLER_SCRIPT -b -f -p $MINICONDA_PREFIX

Create conda environment

In [None]:
!source /content/env/bin/activate;\
conda create -n fish_diffusion python=3.10 -y

Install pytorch

In [None]:

# Install PyTorch related core dependencies
!source /content/env/bin/activate;\
conda activate fish_diffusion;\
conda install "pytorch>=2.0.0" "torchvision>=0.15.0" "torchaudio>=2.0.0" pytorch-cuda=11.8 -c pytorch -c nvidia -y
# !pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118


Install dependencies

In [None]:
!git clone https://github.com/LordElf/fish-diffusion
%cd fish-diffusion
!git checkout notebooks-support

In [None]:
!source /content/env/bin/activate;\
conda activate fish_diffusion;\
cat requirements.txt | xargs -n 1 pip install;\
pip install -e .

### Vocoder preparation

In [None]:
!source /content/env/bin/activate;\
conda activate fish_diffusion;\
python tools/download_nsf_hifigan.py --agree-license

### Dataset preparation
```shell
dataset
├───train
│   ├───xxx1-xxx1.wav
│   ├───...
│   ├───Lxx-0xx8.wav
│   └───speaker0 (Subdirectory is also supported)
│       └───xxx1-xxx1.wav
└───valid
    ├───xx2-0xxx2.wav
    ├───...
    └───xxx7-xxx007.wav
```

##### Mount google drive or upload your dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

#### Soft link your dataset to the current diretory

In [None]:
dataset_path = "/content/drive/MyDrive/test-fish-audio/dataset/"#@param{type:"string"}
!ln -s $dataset_path dataset

#### Extract all data features, such as pitch, text features, mel features, etc.

##### if error about torchvision occured, run this cell

In [None]:
# !source /content/env/bin/activate;\
# conda activate fish_diffusion;\
# pip uninstall torchvision -y;\
# pip install torchvision  --index-url https://download.pytorch.org/whl/cu118

##### Extract features

In [None]:
!source /content/env/bin/activate;\
conda activate fish_diffusion;\
python tools/preprocessing/extract_features.py --config configs/svc_hubert_soft.py --path dataset  --clean

### Baseline training

In [None]:
import ipywidgets as widgets
from IPython.display import display

training_options = widgets.RadioButtons(
    options=['single_gpu', 'multi_gpu', 'multi_node'],
    description='Training Options:',
    disabled=False
)
display(training_options)

pretrained = widgets.ToggleButtons(
    options=['yes', 'no'],
    description='Pretrained:',
    disabled=False
)
display(pretrained)

resume = widgets.ToggleButtons(
    options=['yes', 'no'],
    description='Resume:',
    disabled=False
)
display(resume)

resume_checkpoint = widgets.Text(
    value='',
    placeholder='Enter the checkpoint file name',
    description='Checkpoint File:',
    disabled=resume.value == 'no'
)
display(resume_checkpoint)

pretrain_checkpoint = widgets.Text(
    value='',
    placeholder='Enter the pretrain checkpoint file name',
    description='Pretrain Checkpoint:',
    disabled=ptrained.value == 'no'
)
display(pretrain_checkpoint)

In [None]:
if resume.value == 'yes':
    resume_str = f"--resume {resume_checkpoint.value}"
else:
    resume_str = ""
    
if pretrain_checkpoint.value != '':
    pretrain_str = f"--pretrain {pretrain_checkpoint.value}"
else:
    pretrain_str = ''

if training_options.value == "single_gpu":
    cmd = f"tools/diffusion/train.py --config configs/svc_hubert_soft.py {resume_str} {pretrain_str}"
elif training_options.value == "multi_gpu":
    cmd = f"tools/diffusion/train.py --config configs/svc_hubert_soft.py --gpus 2 {resume_str} {pretrain_str}"
elif training_options.value == "multi_node":
    cmd = f"-m torch.distributed.launch --nnodes 2 --nproc_per_node 2 train.py --config configs/svc_hubert_soft.py {resume_str} {pretrain_str}"

!source /content/env/bin/activate;\
conda activate fish_diffusion;\
python {cmd}