<a href="https://colab.research.google.com/github/graylan0/quantum-machine-learning/blob/main/Suno_BarkAI_TheBlokeLlama2_Emotional_Inference_Enabled_TTS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install git+https://github.com/suno-ai/bark.git

In [None]:
# Build Llama cpp
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python==0.1.78

In [None]:
import llama_cpp
from bark import SAMPLE_RATE, generate_audio
import numpy as np
from scipy.io.wavfile import write as write_wav
import uuid

def llama2_generate_text(prompt):
    # Initialize the Llama2 model
    llm = llama_cpp.Llama(
        model_path="llama-2-7b-chat.ggmlv3.q8_0.bin",
        n_gpu_layers=-1,
        n_ctx=3900,
    )

    # Generate the text using the Llama2 model with the input message as a prompt
    response = llm(prompt, max_tokens=700)

    # Clean up the Llama2 model
    del llm

    return response['choices'][0]['text']

def generate_response(message):
    # Use the input message as a prompt for the Llama2 model
    prompt = f"""
    1. You are a text-to-speech model called Bark.
    2. Analyze the following text: '{message}'
    3. Infer emotions from the text.
    4. Split the text into meaningful segments.
    5. Generate responses for each segment.
    6. Include emotional tags in the responses.
    7. Re-employ the Llama2 model to double-check the analysis.
    """

    # Analyze the text for emotions and split it into meaningful segments
    segments_analysis = llama2_generate_text(prompt).split('\n')

    # Process each segment and generate audio
    pieces = []
    for segment_analysis in segments_analysis:
        try:
            segment, emotion = segment_analysis.split(' - Emotion: ')
            segment = segment.replace('Segment: ', '').strip()

            # Define the emotional tag that Bark can recognize
            if emotion.strip() == "happy":
                emotion_tag = "[joyful]"
            elif emotion.strip() == "sad":
                emotion_tag = "[sad]"
            elif emotion.strip() == "angry":
                emotion_tag = "[angry]"
            elif emotion.strip() == "calm":
                emotion_tag = "[calm]"
            elif emotion.strip() == "excited":
                emotion_tag = "[excited]"
            else:
                emotion_tag = "[neutral]"

            # Generate audio with the specified emotional tag
            audio_array = generate_audio(segment + emotion_tag, history_prompt="v2/en_speaker_6")
            silence = np.zeros(int(0.75 * SAMPLE_RATE))  # quarter second of silence
            pieces += [audio_array, silence.copy()]
        except ValueError:
            print(f"Error processing segment: {segment_analysis}")
            continue

    # Concatenate all audio pieces
    audio = np.concatenate(pieces)

    # Generate a random file name
    wav_file_name = str(uuid.uuid4()) + ".wav"

    # Save the audio to a WAV file in the current directory
    write_wav(wav_file_name, SAMPLE_RATE, audio)

    print(f"Audio file generated: {wav_file_name}")