<a href="https://colab.research.google.com/github/graylan0/quantum-machine-learning/blob/main/singing.bark.multi.threaded.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install --pre torch torchvision -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html -U
!pip install git+https://github.com/suno-ai/bark.git
!pip install openai
!pip install scipy

Looking in links: https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html
Collecting git+https://github.com/suno-ai/bark.git
  Cloning https://github.com/suno-ai/bark.git to /tmp/pip-req-build-5h8hgc92
  Running command git clone --filter=blob:none --quiet https://github.com/suno-ai/bark.git /tmp/pip-req-build-5h8hgc92
  Resolved https://github.com/suno-ai/bark.git to commit 42d579ddcd2089be7791ba65f22abd99e0ce63a5
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting boto3 (from suno-bark==0.0.1a0)
  Downloading boto3-1.28.47-py3-none-any.whl (135 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.8/135.8 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting encodec (from suno-bark==0.0.1a0)
  Downloading encodec-0.1.1.tar.gz (3.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━

In [11]:
!pip install scipy
!pip install sounddevice
!pip install fastapi uvicorn

Collecting fastapi
  Downloading fastapi-0.103.1-py3-none-any.whl (66 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.2/66.2 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting uvicorn
  Downloading uvicorn-0.23.2-py3-none-any.whl (59 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.5/59.5 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
Collecting starlette<0.28.0,>=0.27.0 (from fastapi)
  Downloading starlette-0.27.0-py3-none-any.whl (66 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.0/67.0 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
Collecting h11>=0.8 (from uvicorn)
  Downloading h11-0.14.0-py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: h11, uvicorn, starlette, fastapi
Successfully installed fastapi-0.103.1 h11-0.14.0 starlette-0.27.0 uvicorn-0.23.2


In [10]:
from bark import SAMPLE_RATE, generate_audio, preload_models
from scipy.io.wavfile import write as write_wav
from IPython.display import Audio
from concurrent.futures import ThreadPoolExecutor
import numpy as np
import uuid
import os

# Preload models
preload_models()

os.environ["SUNO_USE_SMALL_MODELS"] = "True"

def generate_audio_for_sentence(sentence):
    return generate_audio(sentence)

def generate_response(song_elements, num_threads=4):
    pieces = []

    with ThreadPoolExecutor(max_workers=num_threads) as executor:
        future_to_element = {executor.submit(generate_audio_for_sentence, element.get('lyrics', element.get('music'))): element for element in song_elements if 'lyrics' in element or 'music' in element}

        for future in future_to_element:
            audio_array = future.result()
            pieces += [audio_array]

            element = future_to_element[future]
            if 'pause' in element:
                silence = np.zeros(int(element['pause'] * SAMPLE_RATE))
                pieces += [silence]

    audio = np.concatenate(pieces)
    file_name = str(uuid.uuid4()) + ".wav"
    write_wav(file_name, SAMPLE_RATE, audio.astype(np.int16))
    print(f"Audio generation completed and saved to {file_name}")
    return Audio(audio, rate=SAMPLE_RATE)

song_elements = [
    {'music': "Soft ukulele strums, tempo 120", 'pause': 2},
    {'lyrics': "Love on the island, sun in the sky, feeling so high", 'pause': 1},
    {'lyrics': "The breeze whispers secrets, as we let the time fly", 'pause': 2},
    {'lyrics': "Dance, dance, under the moonlight, let's make this a beautiful night", 'pause': 1},
    {'music': "musich", 'pause': 1},
    {'lyrics': "Waves are crashing, hearts are smashing, yet we're so free", 'pause': 2},
    {'lyrics': "The stars are our spotlight, as we carve our love on a palm tree", 'pause': 1},
    {'music': "music", 'pause': 1},
    {'lyrics': "But we're still here, love's so clear, like the ocean so deep", 'pause': 2},
    {'lyrics': "Dance, dance, under the moonlight, let's make memories to keep", 'pause': 1},
    {'lyrics': "And so we say Aloha, as the sun greets the dawn", 'pause': 2},
    {'end': "Soft ukulele fade-out, tempo slows to 90"}
]

audio_output = generate_response(song_elements)
audio_output  # This should play the audio in the notebook



100%|██████████| 691/691 [00:09<00:00, 74.64it/s]
100%|██████████| 35/35 [00:32<00:00,  1.09it/s]
100%|██████████| 686/686 [00:08<00:00, 78.21it/s]
100%|██████████| 35/35 [00:31<00:00,  1.12it/s]
100%|██████████| 684/684 [00:09<00:00, 71.71it/s]
100%|██████████| 35/35 [00:31<00:00,  1.10it/s]
100%|██████████| 602/602 [00:06<00:00, 89.89it/s]
100%|██████████| 31/31 [00:28<00:00,  1.08it/s]
100%|██████████| 524/524 [00:06<00:00, 82.08it/s]
100%|██████████| 27/27 [00:24<00:00,  1.11it/s]
100%|██████████| 680/680 [00:08<00:00, 76.60it/s]
100%|██████████| 35/35 [00:31<00:00,  1.13it/s]
100%|██████████| 641/641 [00:08<00:00, 77.00it/s]
100%|██████████| 33/33 [00:29<00:00,  1.13it/s]
100%|██████████| 150/150 [00:01<00:00, 99.49it/s]
100%|██████████| 8/8 [00:06<00:00,  1.26it/s]
100%|██████████| 590/590 [00:06<00:00, 90.42it/s]
100%|██████████| 30/30 [00:27<00:00,  1.09it/s]
100%|██████████| 196/196 [00:03<00:00, 63.83it/s]
100%|██████████| 10/10 [00:07<00:00,  1.34it/s]
100%|██████████| 711/7

Audio generation completed and saved to ebcb0429-9e40-4d82-a279-4bf3026a5139.wav
