In [None]:
from transformers import AutoProcessor, SeamlessM4TModel
import torchaudio
from IPython.display import Audio

# Load processor and model
processor = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium")
model = SeamlessM4TModel.from_pretrained("facebook/hf-seamless-m4t-medium")


In [2]:
def processAudio(audio_file_path):
  waveform, sample_rate = torchaudio.load(audio_file_path)

  # Ensure the audio is 16 kHz
  if sample_rate != 16000:
      resampler = torchaudio.transforms.Resample(sample_rate, 16000)
      waveform = resampler(waveform)
  return waveform, sample_rate

In [6]:
def generateS2TT(file:str):
# Load audio file
    waveform, sample_rate = processAudio(file)


    # Process audio
    audio_inputs = processor(audios=waveform.numpy(), return_tensors="pt")
    text_array_from_audio = model.generate(**audio_inputs, tgt_lang="eng", generate_speech=False)[0].cpu().numpy().squeeze()

    translated_text_from_audio = processor.decode(text_array_from_audio, skip_special_tokens=True)
    print(translated_text_from_audio)
    return translated_text_from_audio

def generateT2ST(text,src_lng="eng",tgt_lang="tel"):
    text_inputs = processor(text = text, src_lang=src_lng, return_tensors="pt")
    audio_array_from_text = model.generate(**text_inputs, tgt_lang=tgt_lang)[0].cpu().numpy().squeeze()
    sample_rate = model.config.sampling_rate
    return Audio(audio_array_from_text, rate=sample_rate)

def generateT2TT(text,src_lng="eng",tgt_lang="tel"):
    text_inputs = processor(text = text, src_lang=src_lng, return_tensors="pt")
    output_tokens = model.generate(**text_inputs, tgt_lang=tgt_lang, generate_speech=False)
    translated_text_from_text = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
    return translated_text_from_text


In [4]:
tt=generateS2TT('A:\\personal\\test\\seamless\\namo1.mp3')

It is strongly recommended to pass the `sampling_rate` argument to this function. Failing to do so can result in silent errors that might be hard to debug.


When was Narendra Modi born?


In [5]:
generateT2ST(tt)

In [9]:
generateT2TT("When was Narendra Modi born?")

'నరేంద్ర మోడీ ఎప్పుడు జన్మించారు?'