# Building Your First Multimodal GenAI App

![Alt text](img/multimodal_app_1.png)

In this notebook, we'll explore a variety of SOTA GenAI models and get a sense of how to stitch them together!

## Get our API Keys in our environment

In [1]:
import getpass

# Prompt for the OpenAI API key
openai_api_key = getpass.getpass("Please enter your OpenAI API key: ")

# Prompt for the Replicate API key
replicate_api_key = getpass.getpass("Please enter your Replicate API key: ")
print("Replicate API key captured successfully!")

# Prompt for the Grok API key
groq_api_key = getpass.getpass("Please enter your Groq API key: ")
print("Groq API key captured successfully!")

Replicate API key captured successfully!
Groq API key captured successfully!


## Suno Bark: text to audio

First up, we'll experiment with the [Suno Bark](https://github.com/suno-ai/bark) text to audio model:

In [None]:
import replicate

# Create a Replicate client instance with the API token
client = replicate.Client(api_token=replicate_api_key)

# Define the input parameters for the model
input_params = {
    "prompt": "Hello, my name is Hugo. And, uh — and I like pizza. [laughs] But I also have other interests such as playing tic tac toe.",
    "text_temp": 0.7,
    "output_full": False,
    "waveform_temp": 0.7,
    "history_prompt": "announcer"
}

# Run the model using Replicate API
try:
    output = client.run(
        "suno-ai/bark:b76242b40d67c76ab6742e987628a2a9ac019e11d56ab96c4e91ce03b79b2787",
        input=input_params
    )
    print(output)
except Exception as e:
    print(f"Error: {e}")

### LLM output --> Suno bark

But what if we want to pipe the output of an LLM into Bark?

In [None]:
from openai import OpenAI
import os


def get_llm_response(user_input):
    client = OpenAI(api_key=openai_api_key)
    
    response = client.chat.completions.create(
        model="gpt-3.5-turbo-0613",
        messages=[
    {"role": "user", "content": user_input}
  ]
        )
    return response.choices[0].message.content

In [None]:
song = get_llm_response("a short pirates sea shanty")
print(song)

In [None]:
# Define the input parameters for the model
input_params = {
    "prompt": song,
    "text_temp": 0.7,
    "output_full": False,
    "waveform_temp": 0.7,
    "history_prompt": "announcer",
   # "duration": 30
}

# Run the model using Replicate API
try:
    output = client.run(
        "suno-ai/bark:b76242b40d67c76ab6742e987628a2a9ac019e11d56ab96c4e91ce03b79b2787",
        input=input_params
    )
    print(output)
except Exception as e:
    print(f"Error: {e}")

This is totally bent and makes no sense lolz ^

## Text to music w/ meta musicgen

In [None]:
input = {
    "prompt": "Horns and Drums. Edo25 major g melodies that sound triumphant and cinematic. Leading up to a crescendo that resolves in a 9th harmonic",
    "model_version": "stereo-large",
    "output_format": "mp3",
    "normalization_strategy": "peak"
}

output = client.run(
    "meta/musicgen:671ac645ce5e552cc63a54a2bbff63fcf798043055d2dac5fc9e36a837eedcfb",
    input=input
)
print(output)
#=> "https://replicate.delivery/pbxt/OeLYIQiltdzMaCex1shlEFy6...

In [None]:
input = {
    "prompt": "Ancient Trip Hop with Throat Singing",
    "model_version": "stereo-large",
    "output_format": "mp3",
    "normalization_strategy": "peak",
    "duration": 30 
}

output = client.run(
    "meta/musicgen:671ac645ce5e552cc63a54a2bbff63fcf798043055d2dac5fc9e36a837eedcfb",
    input=input
)
print(output)
#=> "https://replicate.delivery/pbxt/OeLYIQiltdzMaCex1shlEFy6...

## Text to music with riffusion

In [None]:
output = client.run(
    "riffusion/riffusion:8cf61ea6c56afd61d8f5b9ffd14d7c216c0a93844ce2d82ac1c9ecc9c7f24e05",
    input={
        "alpha": 0.5,
        "prompt_a": "West African Desert Blues",
        "prompt_b": "Throat Singing",
        "denoising": 0.75,
        "seed_image_id": "vibes",
        "num_inference_steps": 50
    }
)
print(output)

___

## Experiment: One prompt to many models

In [None]:
message = "The Waffle House is really messing shit up with pancakes and bacon tonight HOLY MOLEY and there's anarchist jazz also!"

### text to image

In [None]:
input = {
    "prompt": message
}

output = client.run(
    "fofr/epicrealismxl-lightning-hades:0ca10b1fd361c1c5568720736411eaa89d9684415eb61fd36875b4d3c20f605a",
    input=input
)
print(output)
#=> ["https://replicate.delivery/pbxt/ulYZRIyAUDYpOZfl7OjhrKx...

### text to audio

In [None]:
# Define the input parameters for the model
input_params = {
    "prompt": message,
    "text_temp": 0.7,
    "output_full": False,
    "waveform_temp": 0.7,
    "history_prompt": "announcer",
   # "duration": 30
}

# Run the model using Replicate API
try:
    output = client.run(
        "suno-ai/bark:b76242b40d67c76ab6742e987628a2a9ac019e11d56ab96c4e91ce03b79b2787",
        input=input_params
    )
    print(output)
except Exception as e:
    print(f"Error: {e}")

### text to music

In [None]:
input = {
    "prompt": message,
    "model_version": "stereo-large",
    "output_format": "mp3",
    "normalization_strategy": "peak",
    "duration": 30 
}

output = client.run(
    "meta/musicgen:671ac645ce5e552cc63a54a2bbff63fcf798043055d2dac5fc9e36a837eedcfb",
    input=input
)
print(output)
#=> "https://replicate.delivery/pbxt/OeLYIQiltdzMaCex1shlEFy6...

## Many models at once

In [None]:
import replicate

def generate_epic_realism(prompt, api_token):
    # Create a Replicate client instance with the API token
    client = replicate.Client(api_token=replicate_api_key)

    # Define the input parameters for the model
    input_data = {
        "prompt": prompt
    }

    # Run the model using Replicate API
    output = client.run(
        "fofr/epicrealismxl-lightning-hades:0ca10b1fd361c1c5568720736411eaa89d9684415eb61fd36875b4d3c20f605a",
        input=input_data
    )
    
    return output

# Example usage
# api_token = XXX
message = "crazy wild zombie party at the blaring symphony orchestra"
output = generate_epic_realism(message, replicate_api_key)
print(output)


def generate_suno_bark(prompt, api_token, text_temp=0.7, output_full=False, waveform_temp=0.7, history_prompt="announcer"):
    # Create a Replicate client instance with the API token
    client = replicate.Client(api_token=replicate_api_key)

    # Define the input parameters for the model
    input_params = {
        "prompt": prompt,
        "text_temp": text_temp,
        "output_full": output_full,
        "waveform_temp": waveform_temp,
        "history_prompt": "zh_speaker_7",
    }

    # Run the model using Replicate API
    try:
        output = client.run(
            "suno-ai/bark:b76242b40d67c76ab6742e987628a2a9ac019e11d56ab96c4e91ce03b79b2787",
            input=input_params
        )
        return output
    except Exception as e:
        print(f"Error: {e}")
        return None

# Example usage
# api_token = 'your_api_token_here'
# message = "Your prompt here"
output = generate_suno_bark(message, replicate_api_key)
print(output)

import replicate

def generate_music_gen(prompt, api_token, duration=30, model_version="stereo-large", output_format="mp3", normalization_strategy="peak"):
    # Create a Replicate client instance with the API token
    client = replicate.Client(api_token=replicate_api_key)

    # Define the input parameters for the model
    input_data = {
        "prompt": prompt,
        "model_version": model_version,
        "output_format": output_format,
        "normalization_strategy": normalization_strategy,
        "duration": duration 
    }

    # Run the model using Replicate API
    output = client.run(
        "meta/musicgen:671ac645ce5e552cc63a54a2bbff63fcf798043055d2dac5fc9e36a837eedcfb",
        input=input_data
    )
    
    return output

# Example usage
# api_token = 'your_api_token_here'
# message = "Ancient Trip Hop with Throat Singing"
output = generate_music_gen(message, replicate_api_key)
print(output)



In [None]:
def generate_suno_bark(prompt, api_token, text_temp=0.7, output_full=False, waveform_temp=0.7, history_prompt="announcer"):
    # Create a Replicate client instance with the API token
    client = replicate.Client(api_token=replicate_api_key)

    # Define the input parameters for the model
    input_params = {
        "prompt": prompt,
        "text_temp": text_temp,
        "output_full": output_full,
        "waveform_temp": waveform_temp,
        "history_prompt": "announcer",
    }

    # Run the model using Replicate API
    try:
        output = client.run(
            "suno-ai/bark:b76242b40d67c76ab6742e987628a2a9ac019e11d56ab96c4e91ce03b79b2787",
            input=input_params
        )
        return output
    except Exception as e:
        print(f"Error: {e}")
        return None

# Example usage
# api_token = 'your_api_token_here'
# message = "Your prompt here"
output = generate_suno_bark(message, replicate_api_key)
print(output)

In [None]:
# Define your API token and prompt message
# api_token = 'your_api_token_here'
message = "The Waffle House messing it up for real with the pancakes and bacon and punk abstract jazz, yo!"

# Run the Epic Realism model
epicrealism_output = generate_epic_realism(message, replicate_api_key)
print("Epic Realism Output:")
print(epicrealism_output)

# Run the Meta MusicGen model
musicgen_output = generate_music_gen(message, replicate_api_key)
print("Meta MusicGen Output:")
print(musicgen_output)

# Run the Suno Bark model
bark_output = generate_suno_bark(message, replicate_api_key)
print("Suno Bark Output:")
print(bark_output)



now define a little LLM utility function to help us out:

In [None]:
def get_llm_response(user_input):
    client = OpenAI(api_key=openai_api_key)
    
    response = client.chat.completions.create(
        model="gpt-3.5-turbo-0613",
        messages=[
    {"role": "user", "content": user_input}
  ]
        )
    return response.choices[0].message.content

In [None]:
message = get_llm_response("short pirates sea shanty about mermaids")
print(message)
# Run the Suno Bark model
bark_output = generate_suno_bark(message, replicate_api_key)
print("Suno Bark Output:")
print(bark_output)

### Experiment: text to video

In [None]:
import replicate

message = "The Double RR diner messing it up for real with the pancakes and bacon and punk abstract jazz, yo!"

input = {
    "sampler": "klms",
    "max_frames": 100,
    "animation_prompts": message
}

output = client.run(
    "deforum/deforum_stable_diffusion:e22e77495f2fb83c34d5fae2ad8ab63c0a87b6b573b6208e1535b23b89ea66d6",
    input=input
)
print(output)
#=> "https://replicate.delivery/mgxm/873a1cc7-0427-4e8d-ab3c-...

In [None]:
message = "giddy up!!! [laughs] time for the thunderdome! [thunder]"

bark_output = generate_suno_bark(message, replicate_api_key)
print("Suno Bark Output:")
print(bark_output)

## Groq tests


In [None]:
! pip install groq

In [None]:
import os

from groq import Groq

client = Groq(
    api_key=groq_api_key)

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "Explain the importance of fast language models",
        }
    ],
    model="llama3-8b-8192",
)

print(chat_completion.choices[0].message.content)

In [None]:
chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "which model are you",
        }
    ],
    model="mixtral-8x7b-32768",
)

print(chat_completion.choices[0].message.content)