In [2]:
import whisper 
import tempfile
import os 
import warnings 

In [3]:
from moviepy.editor import VideoFileClip
def get_audio(paths):
    print(f"Video path = {paths}")    
    temp_dir = tempfile.gettempdir()
    audio_paths = {}
    for path in paths:
        filename = os.path.basename(path).split('.')[0]
        print(f"Extracting audio from file {os.path.basename(path)}........")
        output_path = os.path.join(temp_dir, f"{filename}.wav") 
        video_clip = VideoFileClip(path)
        audio_clip = video_clip.audio
        audio_clip.write_audiofile(output_path, codec='pcm_s16le', bitrate='16k')
        audio_clip.close()
        video_clip.close()
        audio_paths[path] = output_path 
    return audio_paths


In [4]:
def write_transcript(audio_path,text_path,transcribe:callable):    
    warnings.filterwarnings('ignore') 
    result = transcribe(audio_path)
    warnings.filterwarnings('default')
    with open(text_path,'w',encoding='utf-8') as f:
        f.write(result['text']) 
    return result     

In [5]:
def get_transcript(audio_path:list,output_text:bool,output_dir:str,transcribe:callable):
    text_path = output_dir if output_text else tempfile.gettempdir()
    for path, audio_path in audio_path.items():
        filename = os.path.basename(path).split('.')[0]
        text_path = os.path.join(text_path,f"{filename}.txt")
        result = write_transcript(audio_path,text_path,transcribe) 
    return result      

In [6]:
def initiate_stt(video_path:str,model:str,output_dir:str,srt:bool,verbose:bool):
    print(f"Video path = {video_path}") 
    os.makedirs(output_dir,exist_ok=True)
    if model.endswith('.en'):
        print(f"{model} is English model")
    model = whisper.load_model(model)
    audio = get_audio(video_path)
    transcript = get_transcript(audio,srt,output_dir,lambda audio_path: model.transcribe(audio_path,verbose=verbose,task='transcribe')) 
    return transcript    

In [7]:
video_path = ["nptel.mp4"] 

In [10]:
transcript = initiate_stt(video_path=video_path, model='tiny', output_dir='transcript_text', srt=True, verbose=False) 

Video path = ['nptel.mp4']


Video path = ['nptel.mp4']
Extracting audio from file nptel.mp4........
MoviePy - Writing audio in /tmp/nptel.wav


                                                                        

MoviePy - Done.
Detected language: English


100%|██████████| 107756/107756 [01:32<00:00, 1165.35frames/s]


In [11]:
transcript['text'] 

" This is the course on electromagnetic theory and any course starts with coulombs law. So, we start with in this lecture, we are going to start with coulombs law, for forces between two charges. Then, we are going to see how this can be confirmed that this is really true experimental verification. Third, then we are going to solve some examples for forces between two charges. And finally, in this lecture, we are going to talk about the force between a point charge and a charge distribution. This is the program for this lecture. And based on what we covered today, I am also going to give you an assignment where you will solve three or four problems employing these concepts. So, what coulombs law says is that if there are two charges, let us take them to point charges right now. So, there is a charge q 1 and another charge q 2 separated by a distance r and for the future, because I am going to develop a notation also. Let us write this as r 1 2 that indicates the distance between charge

In [12]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer 

tokenizer = AutoTokenizer.from_pretrained("philschmid/bart-large-cnn-samsum") 
model = AutoModelForSeq2SeqLM.from_pretrained("philschmid/bart-large-cnn-samsum")  


In [13]:
inputs = tokenizer(transcript['text'],max_length=512, truncation=True, return_tensors='pt')

In [14]:
# summary_ids = model.generate(inputs['inputs_ids'],max_new_tokens=512) 
summary_ids = model.generate(inputs["input_ids"],num_beams=2,max_length=1024)

In [15]:
summary = tokenizer.batch_decode(summary_ids,skip_special_tokens=True,
                                         clean_up_tokenization_spaces=False)[0] 

In [16]:
summary

'In this lecture, we are going to talk about the force between a point charge and a charge distribution. The program for this lecture is based on what we covered today. You will solve three or four problems employing these concepts. The SI units that we work in are 4 pi epsilon 0.'