# Exercises of audio classification with a pipeline

In [None]:
from datasets import load_dataset
from datasets import Audio

minds = load_dataset("PolyAI/minds14", name="en-AU", split="train")
minds = minds.cast_column("audio", Audio(sampling_rate=16_000))

In [None]:
from transformers import pipeline

classifier = pipeline(
    "audio-classification",
    model="anton-l/xtreme_s_xlsr_300m_minds14",
)

In [None]:
example = minds[0]

In [None]:
classifier(example["audio"]["array"])

In [None]:
id2label = minds.features["intent_class"].int2str
id2label(example["intent_class"])

## Automatic speech recognition with a pipeline

In [None]:
from transformers import pipeline

asr = pipeline("automatic-speech-recognition")

In [None]:
example = minds[0]
asr(example["audio"]["array"])

In [None]:
example["english_transcription"]

In [None]:
from datasets import load_dataset
from datasets import Audio

minds = load_dataset("PolyAI/minds14", name="de-DE", split="train")
minds = minds.cast_column("audio", Audio(sampling_rate=16_000))

In [None]:
example = minds[0]
example["transcription"]

In [None]:
from transformers import pipeline

asr = pipeline("automatic-speech-recognition", model="maxidl/wav2vec2-large-xlsr-german")
asr(example["audio"]["array"])

## Audio generation with a pipeline

In [None]:
from transformers import pipeline

pipe = pipeline("text-to-speech", model="suno/bark-small")

In [None]:
text = "Ladybugs have had important roles in culture and religion, being associated with luck, love, fertility and prophecy. "
output = pipe(text)

In [None]:
from IPython.display import Audio

Audio(output["audio"], rate=output["sampling_rate"])

In [None]:
fr_text = "Contrairement à une idée répandue, le nombre de points sur les élytres d'une coccinelle ne correspond pas à son âge, ni en nombre d'années, ni en nombre de mois. "
output = pipe(fr_text)
Audio(output["audio"], rate=output["sampling_rate"])

In [None]:
song = "♪ In the jungle, the mighty jungle, the ladybug was seen. ♪ "
output = pipe(song)
Audio(output["audio"], rate=output["sampling_rate"])

In [None]:
music_pipe = pipeline("text-to-audio", model="facebook/musicgen-small")

In [None]:
text = "90s rock song with electric guitar and heavy drums"

In [None]:
forward_params = {"max_new_tokens": 512}

output = music_pipe(text, forward_params=forward_params)
Audio(output["audio"][0], rate=output["sampling_rate"])