# Fish Diffusion

In [1]:
from IPython.display import display, HTML
display(HTML(
"""
<a target="_blank" href="https://colab.research.google.com/github/LordElf/fish-diffusion/blob/notebooks-support/notebooks/fish-audio_sample.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>
"""
))

### Environment Setup




Install Conda

In [None]:
%%bash
mkdir /content/env
MINICONDA_INSTALLER_SCRIPT=Miniconda3-py310_23.1.0-1-Linux-x86_64.sh
MINICONDA_PREFIX=/content/env
wget https://repo.continuum.io/miniconda/$MINICONDA_INSTALLER_SCRIPT
chmod +x $MINICONDA_INSTALLER_SCRIPT
./$MINICONDA_INSTALLER_SCRIPT -b -f -p $MINICONDA_PREFIX

Create conda environment

In [None]:
!source /content/env/bin/activate;\
conda create -n fish_diffusion python=3.10 -y

Install pytorch

In [None]:

# Install PyTorch related core dependencies
!source /content/env/bin/activate;\
conda activate fish_diffusion;\
conda install "pytorch>=2.0.0" "torchvision>=0.15.0" "torchaudio>=2.0.0" pytorch-cuda=11.8 -c pytorch -c nvidia -y
# !pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118


Install dependencies

In [None]:
!git clone https://github.com/LordElf/fish-diffusion
!git checkout notebooks-support
%cd fish-diffusion
!source /content/env/bin/activate;\
conda activate fish_diffusion;\
pip install poetry;\
poetry install

### Vocoder preparation

In [None]:
!source /content/env/bin/activate;\
conda activate fish_diffusion;\
python tools/download_nsf_hifigan.py --agree-license

### Dataset preparation
```shell
dataset
├───train
│   ├───xxx1-xxx1.wav
│   ├───...
│   ├───Lxx-0xx8.wav
│   └───speaker0 (Subdirectory is also supported)
│       └───xxx1-xxx1.wav
└───valid
    ├───xx2-0xxx2.wav
    ├───...
    └───xxx7-xxx007.wav
```

Mount google drive or upload your dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Extract all data features, such as pitch, text features, mel features, etc.

In [None]:
!source /content/env/bin/activate;\
conda activate fish_diffusion;\
python tools/preprocessing/extract_features.py --config configs/svc_hubert_soft.py --path ../drive/MyDrive/test-fish-audio/dataset/  --clean

### Baseline training

In [None]:
!source /content/env/bin/activate;\
conda activate fish_diffusion;\
python tools/diffusion/train.py --config configs/svc_hubert_soft.py