In [1]:
import librosa
import pandas as pd
import os
import torch
from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer

In [2]:
#load pre-trained model and tokenizer
tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h")
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")

In [6]:
#load any audio file of your choice
def transcript(x, y):
    speech = {}
    input_values = {}
    logits = {}
    predicted_ids = {}
    transcribe = {}
    for i in range(x, y+1):
        speech[i], rate = librosa.load(
            "../audio/lhl_wef/lhl_wef-%d.flac" % i, sr=16000
        )
        input_values[i] = tokenizer(speech[i], return_tensors="pt").input_values
        logits[i] = model(input_values[i]).logits
        predicted_ids[i] = torch.argmax(logits[i], dim=-1)
        transcribe[i] = tokenizer.decode(predicted_ids[i][0])
    return transcribe

In [7]:
%%time
transcripts1 = pd.DataFrame.from_dict(transcript(1,25), orient="index").rename(
    columns={0: "Transcribed_Text"}
)

transcripts2 = pd.DataFrame.from_dict(transcript(26,50), orient="index").rename(
    columns={0: "Transcribed_Text"}
)

transcripts3 = pd.DataFrame.from_dict(transcript(51,75), orient="index").rename(
    columns={0: "Transcribed_Text"}
)

CPU times: user 7min 4s, sys: 35.2 s, total: 7min 39s
Wall time: 2min 46s


In [8]:
transcripts = pd.concat([transcripts1, transcripts2, transcripts3])

In [9]:
transcripts.shape

(75, 1)

In [10]:
lhl_wef = transcripts["Transcribed_Text"].apply(''.join)

lhl_wef.to_csv("../transcripts/lhl_wef.txt", sep="\t", index=False)

In [11]:
transcripts["Transcribed_Text"].values

array(['I AM VERY HONORED TO SPEAK AT DISCLOSING ADDRESS AND I LEKE TO CONGRATULATE PROFESSOR SCHB YOURSELF AND THE WHOLE BU E F TM FOR PUTTING TOGETHER A SUCCE',
       "SSFUL PROGRAMME IT'S BEEN A YEAR SINCE WE WERE ALL PHYSICALLY GATHERED IN DAVORSE FOR THE FIFTIETH ANNUAL MEETING OVER THE DEBRUI F AT THAT T",
       'IME WE WERE JUST STARTING TO HEAR ABOUT THIS NEW VIRIS AND TRYING TO UNDERSTAND WHAT WAS HAPPENING NONE OF US ANTICIPATED HOW QUICKLY A FULL SCALE PA',
       'NDEMIC WOULD BLOW UP AND DRAMATICALLY CHANGE OUR WORLD THE DESRUPTION TO LIVES AND LIVELIHOODS HAS BEEN MASSIVE AND UNPRECEDENTED',
       'THE VIRUS IS STILL RAGING IN MANY COUNTRIES IN THE DEVELOPED WORL IN THE US AN EUROPE AND ALSO IN THE DEVELOPING WORLD IN AFRICA SOUTH AM',
       'ERICA AND SOUTHASIA THANKFULLY WITH THE BAXINES BECOMING AVAILABLE THERE IS SOME LIGHT AT THE END OF THE TUNNEL IT IS NOW',
       'CRITICAL THAT VAXINES ARE RULLED OUT QUICKLY ACROSS THE WALL BUT EVEN WITH VAXINES THE PANDEMIC I