# Neural net speech recognition and summarization for Videos
Loading libraries and global variables

In [1]:
import subprocess
import moviepy.editor as mp
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
import whisper
# https://openai.com/blog/whisper/
# pip install git+https://github.com/openai/whisper.git, not whisper.py unrelated package
from pathlib import Path
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
import torch
import os

model_name = 'Falconsai/text_summarization'#'google/pegasus-xsum' #'facebook/bart-large-cnn'
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)
# https://arxiv.org/pdf/1912.08777.pdf
# https://medium.com/codex/build-an-automatic-abstractive-text-summarizer-in-ten-minutes-f15f07e54bae

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'T5Tokenizer'. 
The class this function is called from is 'PegasusTokenizer'.
You are using a model of type t5 to instantiate a model of type pegasus. This is not supported for all configurations of models and can yield errors.
Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at Falconsai/text_summarization and are newly initialized: ['decoder.embed_positions.weight', 'decoder.layer_norm.bias', 'decoder.layer_norm.weight', 'decoder.layers.0.encoder_attn.k_proj.bias', 'decoder.layers.0.encoder_attn.k_proj.weight', 'decoder.layers.0.encoder_attn.out_proj.bias', 'decoder.layers.0.encoder_attn.out_proj.weight', 'decoder.layers.0.encoder_attn.q_proj.bias', 'decoder.layers.0.encoder_attn.q_proj.weight', 'decoder.layers.0.encoder_attn.v_proj

Función para determinar la duración total del video

In [2]:
def get_length(filename):
    result = subprocess.run(["ffprobe", "-v", "error", "-show_entries",
                             "format=duration", "-of",
                             "default=noprint_wrappers=1:nokey=1", filename],
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT)
    return float(result.stdout)

Function to generate the video subtitles, separating them into blocks of chunk_size seconds. The function can be limited so that it only generates the subtitles between the start and stop seconds, and can be used as:

-VideoToSRT("video.mp4")

-VideoToSRT("video.mp4", 10)

-VideoToSRT("video.mp4", 10, 52)

-VideoToSRT("video.mp4", 5, 34, 212)

In [3]:
def VideoToSRT(filename, chunk_size=5, start=0, stop=-1):
    num_seconds_video= int(get_length(filename))+1
    print("The video is {} seconds".format(num_seconds_video))
    l=list(range(0,num_seconds_video+1,chunk_size))
    rank=range(len(l)-1)
    if stop>num_seconds_video:
        print("Upper time limit exceeds the duration of the video, it will be considered the end of the video")
    if start>0:
        emp=int(start/chunk_size)
    else:
        emp=0
    if stop>-1:
        term=int(stop/chunk_size)+1
    else:
        term=len(l)-1
    rank=range(emp,term)
    diz={}

    for i in rank:
        ffmpeg_extract_subclip(filename, l[i]-2*(l[i]!=0), l[i+1], targetname="chunks/cut{}.mp4".format(i+1))
        clip = mp.VideoFileClip(r"chunks/cut{}.mp4".format(i+1)) 
        clip.audio.write_audiofile(r"converted/converted{}.wav".format(i+1))
        os.remove(r"chunks/cut{}.mp4".format(i+1))

        model = whisper.load_model("base")
        result = model.transcribe("converted/converted{}.wav".format(i+1))
        os.remove(r"converted/converted{}.wav".format(i+1))
        print(round((i-emp)*100/len(rank),2),"%:")
        print(result["text"])
        diz['chunk{}'.format(i+1)]=result["text"]

        l_chunks=[str(i+1)+'\n00:'+"{:02d}".format(int(chunk_size*i/60))+':'+"{:02d}".format(chunk_size*i%60)+',000 --> '+'00:'+"{:02d}".format(int(chunk_size*(i+1)/60))+':'+"{:02d}".format(chunk_size*(i+1)%60-1)+',999\n'+diz['chunk{}'.format(i+1)] for i in range(min(rank),(len(diz)+min(rank)-1))]
        text='\n\n'.join(l_chunks)

        with open(filename[:-3]+'txt',mode ='w') as file:
          file.write("\n")
          file.write(text)
        
        p = Path(filename[:-3]+'txt')
        p.rename(p.with_suffix('.srt'))
    print("100%")
    print("Finally ready!")

## Get the video script
Function to get the complete video script.

In [4]:
def VideoScript(filename):
    clip = mp.VideoFileClip(filename)
    clip.audio.write_audiofile(filename[:-3]+".wav")
    model = whisper.load_model("base")
    result = model.transcribe(filename[:-3]+".wav")
    os.remove(filename[:-3]+".wav")
    Script=result["text"]
    with open('Script.txt',mode ='w') as file:
          file.write(Script)
    print(Script)
    return(Script)

## Create a summary from the script
Defines the functions necessary to summarize the Script into a maximum of max_len sentences

In [5]:
def AbstractiveSummarization(Script, max_len=5):
    sentences = Script.split(". ")
    textsize=len(sentences)
    paragraph_size=int(textsize/(max_len))
    paragraphs_lims=list(range(0,len(sentences)+1,paragraph_size))
    paragraphs_lims[-1]=textsize
    paragraphs=[]
    main_ideas=[]
    batch = tokenizer.prepare_seq2seq_batch(Script, truncation=True, padding='longest',return_tensors='pt')
    translated = model.generate(**batch)
    tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
    main_ideas.append(tgt_text)
    for i in range(1,len(paragraphs_lims)):
        paragraphs.append('. '.join(sentences[paragraphs_lims[i-1]:paragraphs_lims[i]]))
    for i in range(len(paragraphs)):
        print(min(round((i+1)*100/max_len,2),100),"%")
        batch = tokenizer.prepare_seq2seq_batch(paragraphs[i], truncation=True, padding='longest',return_tensors='pt')
        translated = model.generate(**batch)
        tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
        tgt_text[0] = tgt_text[0].replace('datememe','')
        tgt_text[0] = tgt_text[0].replace('DropCatch','')
        if 'n our series of' in tgt_text[0]:
            Names=re.findall(r"[A-Z][a-z]+,?\s+(?:[A-Z][a-z]*\.?\s*)?[A-Z][a-z]+", tgt_text[0])
            if len(Names)>0:
                presence=True
                for j in range(len(Names)):
                    if Names[j] not in paragraphs[i]:
                        presence=False
                if presence==False:
                    print("Incorrect summary: ", tgt_text[0])
                    print("Original text: ", paragraphs[i])
        main_ideas.append(tgt_text)
    return(''.join(idea[0] for idea in main_ideas))

Running the functions

The following function only creates the video subtitles in an SRT format to test the Whisper library

In [6]:
VideoToSRT("How Clicking a Single Link Can Cost Millions.mp4",5,0,180)

The video is 861 seconds
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted1.wav


                                                        

MoviePy - Done.




0.0 %:
 oh receive the function
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted2.wav


                                                        

MoviePy - Done.




2.7 %:
 Lloyd Fondghérad y Co Work Y Cienaden Mae'sol w街 o'n Ysd Eenig
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted3.wav


                                                        

MoviePy - Done.




5.41 %:
 They explain to me that this organization has suffered a cyber attack, more specifically, a ransomware attack, which is designed, I-9-9-9-9-9-9-9-9-9-9-9-9-9-9-9-9-9.
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted4.wav


                                                        

MoviePy - Done.




8.11 %:
 a ransomware attack which is designed to both steal your data and make it unusable.
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted5.wav


                                                        

MoviePy - Done.




10.81 %:
—ым lle kwrsof centu ond ynrhyw. Elioaf yn ac этwch reciprocal ac hyn suis, da i tilg i mi familiesfai whannui lineaf Shall wedi racbodycot ond y swydd yn fy fawron â motionol wyradiol am fio febodol «naw
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted6.wav


                                                        

MoviePy - Done.




13.51 %:
 a tozt, a f dop dan a fDIrplau'r d Edgar mis'n dubcat hwnnil. Gwell, it if buckle oedd sior ei ymloai,
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted7.wav


                                                        

MoviePy - Done.




16.22 %:
 nwch gl explosive efo italyth genwn ni fillio chi'n rym efo hwybod. Héch ystfroedd marineu f ovalig.
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted8.wav


                                                        

MoviePy - Done.




18.92 %:
 FERMANT TO EUN roles . . . Da cael goblet lawn . , . . . .
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted9.wav


                                                        

MoviePy - Done.




21.62 %:
 Thelia dde rigen o pithyn ar y tur prat faith mwso facilitiesau shzh?
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted10.wav


                                                        

MoviePy - Done.




24.32 %:
 Bethan yng Nghymru ymwya gwif orph ymwch oedd Weimowitz. Ant wedi fy i say, enwch ei fod yn 2017.
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted11.wav


                                                        

MoviePy - Done.




27.03 %:
 It seems sort of something similar. And it costs nearly 100 million pounds to recover. This incident…
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted12.wav


                                                        

MoviePy - Done.




29.73 %:
 consortiaeth yn trwyach awin 11%切orau 14 lasu.
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted13.wav


                                                        

MoviePy - Done.




32.43 %:
 Friedya. Allanllyllos diethol was hip attacks, nói我在os o'n h paud,miş o'n ond povero enstrwne byd eich中
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted14.wav


                                                        

MoviePy - Done.




35.14 %:
 How this happened? A single individual clicked a link, and a single individual enabled this.
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted15.wav


                                                        

MoviePy - Done.




37.84 %:
 and enabled this unknowingly to happen to an organisation. Multiple people were signed off-sec due to strap-a-pap-a-pap-a-pap-a-pap-a-pap.
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted16.wav


                                                        

MoviePy - Done.




40.54 %:
 eu fan, yn y Elblodol am rwy de connector, ac yn siiddorol ac ymly 적이ch a werre imagineu admant o bobl o therapies. Dym ministar Dolondol остan獌astaol anoddolol am gerda am am energyยdru delwyd yn y por Billy Gotta. AND AI NY LV
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted17.wav


                                                        

MoviePy - Done.




43.24 %:
 Which means I can work won't go to work the next day. Now, from me cyber Security...
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted18.wav


                                                        

MoviePy - Done.




45.95 %:
 Mae alczynnyder Rowlands yt newgl yn ag Brwleidio ym
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted19.wav


                                                        

MoviePy - Done.




48.65 %:
 Agen gemell ac yn y serbyranner ar Maynui'n 20-21 o'r 1995 hwnyn un ieddiol elant sy'n seidoero.
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted20.wav


                                                        

MoviePy - Done.




51.35 %:
 25% of cyber attacks use the human element. Now.
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted21.wav


                                                        

MoviePy - Done.




54.05 %:
 hefyd yn precisegu, hyn faffodio sy Tomato, hyn y mat,ltry garlicuan cedig pregnant o da cwnas un, hwnn i, a yngwnn yn go defnt o ni foul o diagonalidnowdd mo nid
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted22.wav


                                                        

MoviePy - Done.




56.76 %:
 actually mean. It means people can be exploited too. There's no lines of code and there's no
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted23.wav


                                                        

MoviePy - Done.




59.46 %:
 Bidio, un cym yn act сценillứ aux fog honohau. Una wne fortunate wneud mi byddwyr sdost magic, weak, fat deud Sarah Hassol dwi'natersid fram jest seg. Un modac eti usaches moordiam pentru mother''s icheithald. Os y cymrydص-Y-<|cy|> Virusis yn ôch i feithaeth ar ôch yn totalent.
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted24.wav


                                                        

MoviePy - Done.




62.16 %:
 as far as the media is concerned, maybe teenagers and their bedrooms causing trouble stealing things.
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted25.wav


                                                        

MoviePy - Done.




64.86 %:
 or stealing things and learning how to use them. Yet, what people don't see is the m-m-m-m-m-m.
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted26.wav


                                                        

MoviePy - Done.




67.57 %:
 y veo nhw gan eiso wunod lwfwb yn wnaeth розwyr y ei gafyru tagoedd. Ili yma i mor anghodol yn cael crefru i magician.
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted27.wav


                                                        

MoviePy - Done.




70.27 %:
 Yn cyfutiliau mig Commsod yn phwarty cyw better yn ddiolch y Csyfan cてerto.
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted28.wav


                                                        

MoviePy - Done.




72.97 %:
 I'm recently I had an opportunity which presented this thought
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted29.wav


                                                        

MoviePy - Done.




75.68 %:
 ti powder Tymill siaradys Reg Όp wedi trai p contouri di? De roedd iomrhysio noddan,
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted30.wav


                                                        

MoviePy - Done.




78.38 %:
 a wneitedys yn llwn ni bydd yn landen. Mae'w sablead hyw cymru.
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted31.wav


                                                        

MoviePy - Done.




81.08 %:
 yn ymwch i'n mynd i'n landen. Mae'n fawr y nesafnwch yn ymwch i'n fawr yng Nghymdol, unrhywch i'n fawr yng Nghymdol, mae'n mynd i'n fawr yng Nghymdol, yng Nghymdol, yng Nghymdol, yng Nghymdol, yng Nghymdol,
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted32.wav


                                                        

MoviePy - Done.




83.78 %:
 It was my job to see if I could get past the security controls and get into the building.
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted33.wav


                                                        

MoviePy - Done.




86.49 %:
 and get into the building. And so for me, thinking outside of the box, this building, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted34.wav


                                                        

MoviePy - Done.




89.19 %:
 box. This building has floor to ceiling doors 2477 sq.t. and aless budget for this kind of thing based on where
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted35.wav


                                                        

MoviePy - Done.




91.89 %:
 a fe小 child ar g Champdgoes bod y ddimgnian yn bain i hyще yn elethau.
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted36.wav


                                                        

MoviePy - Done.




94.59 %:
 Wh Guard Goto Ye dau premia?
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted37.wav


                                                        

MoviePy - Done.




97.3 %:
 attacking MYlwyd ddim puzz Underground
100%
Finally ready!


The VideoScript function generates and prints the entire video script

In [7]:
ClickingMillions=VideoScript("How Clicking a Single Link Can Cost Millions.mp4")

MoviePy - Writing audio in How Clicking a Single Link Can Cost Millions..wav


chunk:   0%|          | 0/18981 [00:00<?, ?it/s, now=None]

                                                                        

MoviePy - Done.
 Whanau blaen ei dyna rwy 2021 Bo神eth Ys gaf frustrated a las Johannau Ckur Pm refugees eu nyw ar y Ys nhw y felly Mae confus, yr ysshac fy rya'r cyf古'r magyraeth a barod diwario eu ceithio Mae marowany wedi'r yser Cy getfaforlw'r a– y Fai Defektael Ys nhw at y aut exhaust sy nar dobrze chylu o eich arnu 계 Holdwch'n ni nhw i add i ku ffagDa butter Islandlation, ac ydw'n ringwant arher. Ond'r Intoidade jurdin ailwch att ni fi ac gwi'n ei'n jobsio llyrygu? Ond neu dwaudni holl. Aquel ei phiertraeth ymgσιw Somehow gust o ffur i'n digliwch gyrund reliedwaethir fydd di雫wr d cancelledi i pridleg sot mae ytty totu dde issuingど y dod badly wantedad en gyd i ffór. A ddim caelau ffawr hyn yn ymwyllaedead eiretr am ddiddir yw mewn urfanol ei gwneud darwm! Bdeld wneud traidydd yn ffaf resolvedig. Eu gynfal mae'r hynnyd gen gethkidos�� olai eu sylfa mewn ni-demryduddi. Cosa mewn mi gdrum a un Matru pegu peth i' mwy kalau fi dynaondi gwneud cyn un. Gallag'l gwlu yn cael ar fihing ool

# Summary

In [8]:
AbstractiveSummarization(ClickingMillions, 40)

2.5 %
5.0 %
7.5 %
10.0 %
12.5 %
15.0 %
17.5 %
20.0 %
22.5 %
25.0 %
27.5 %
30.0 %
32.5 %
35.0 %
37.5 %
40.0 %
42.5 %
45.0 %
47.5 %
50.0 %
52.5 %
55.0 %
57.5 %
60.0 %
62.5 %
65.0 %
67.5 %
70.0 %
72.5 %
75.0 %
77.5 %
80.0 %
82.5 %
85.0 %
87.5 %
90.0 %
92.5 %
95.0 %
97.5 %
100.0 %
100 %
100 %
100 %
100 %
100 %
100 %
100 %
100 %
100 %
100 %
100 %
100 %
100 %
100 %
100 %
100 %
100 %
100 %
100 %
100 %
100 %
100 %
100 %
100 %


''