# Neural net speech recognition and summarization for Videos
---

Victor Miguel Garcia Sanchez

1 de marzo de 2024

---
Loading libraries and global variables

In [1]:
import subprocess
import moviepy.editor as mp
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
import whisper
# https://openai.com/blog/whisper/
# pip install git+https://github.com/openai/whisper.git, not whisper.py unrelated package
from pathlib import Path
from transformers import pipeline
import os
import re

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# https://arxiv.org/pdf/1912.08777.pdf

Función para determinar la duración total del video

In [2]:
def get_length(filename):
    result = subprocess.run(["ffprobe", "-v", "error", "-show_entries",
                             "format=duration", "-of",
                             "default=noprint_wrappers=1:nokey=1", filename],
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT)
    return float(result.stdout)

Function to generate the video subtitles, separating them into blocks of chunk_size seconds. The function can be limited so that it only generates the subtitles between the start and stop seconds, and can be used as:

-VideoToSRT("video.mp4")

-VideoToSRT("video.mp4", 10)

-VideoToSRT("video.mp4", 10, 52)

-VideoToSRT("video.mp4", 5, 34, 212)

In [3]:
def VideoToSRT(filename, chunk_size=5, start=0, stop=-1):
    num_seconds_video= int(get_length(filename))+1
    print("The video is {} seconds".format(num_seconds_video))
    l=list(range(0,num_seconds_video+1,chunk_size))
    rank=range(len(l)-1)
    if stop>num_seconds_video:
        print("Upper time limit exceeds the duration of the video, it will be considered the end of the video")
    if start>0:
        emp=int(start/chunk_size)
    else:
        emp=0
    if stop>-1:
        term=int(stop/chunk_size)+1
    else:
        term=len(l)-1
    rank=range(emp,term)
    diz={}

    for i in rank:
        ffmpeg_extract_subclip(filename, l[i]-2*(l[i]!=0), l[i+1], targetname="chunks/cut{}.mp4".format(i+1))
        clip = mp.VideoFileClip(r"chunks/cut{}.mp4".format(i+1)) 
        clip.audio.write_audiofile(r"converted/converted{}.wav".format(i+1))
        os.remove(r"chunks/cut{}.mp4".format(i+1))

        model = whisper.load_model("base")
        result = model.transcribe("converted/converted{}.wav".format(i+1), language="english")
        os.remove(r"converted/converted{}.wav".format(i+1))
        print("Removing converted/converted{}.wav".format(i+1))
        print("------------------------------------------------------------")
        print(round((i-emp)*100/len(rank),2),"%:")
        diz['chunk{}'.format(i+1)]=result["text"]

        l_chunks=[str(i+1)+'\n00:'+"{:02d}".format(int(chunk_size*i/60))+':'+"{:02d}".format(chunk_size*i%60)+',000 --> '+'00:'+"{:02d}".format(int(chunk_size*(i+1)/60))+':'+"{:02d}".format(chunk_size*(i+1)%60-1)+',999\n'+diz['chunk{}'.format(i+1)] for i in range(min(rank),(len(diz)+min(rank)-1))]
        text='\n\n'.join(l_chunks)

        with open(filename[:-3]+'txt',mode ='w') as file:
          file.write("\n")
          file.write(text)
        
        p = Path(filename[:-3]+'txt')
        p.rename(p.with_suffix('.srt'))
    print("100%")
    print("Finally ready!")

## Get the video script
Function to get the complete video script at Script.txt file.

In [4]:
def VideoScript(filename):
    clip = mp.VideoFileClip(filename)
    clip.audio.write_audiofile(filename[:-3]+".wav")
    model = whisper.load_model("base")
    result = model.transcribe(filename[:-3]+".wav", language="english")
    os.remove(filename[:-3]+".wav")
    Script=result["text"]
    with open('Script.txt',mode ='w') as file:
          file.write(Script)
    print(Script)
    return(Script)

## Create a summary from the script
Defines the functions necessary to summarize the Script into a maximum of max_len sentences

Running the functions

The following function only creates the video subtitles in an SRT format to test the Whisper library

In [5]:
VideoToSRT("How Clicking a Single Link Can Cost Millions.mp4",5,0,180)

The video is 861 seconds
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted1.wav


                                                        

MoviePy - Done.




Removing converted/converted1.wav
------------------------------------------------------------
0.0 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted2.wav


                                                        

MoviePy - Done.




Removing converted/converted2.wav
------------------------------------------------------------
2.7 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted3.wav


                                                        

MoviePy - Done.




Removing converted/converted3.wav
------------------------------------------------------------
5.41 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted4.wav


                                                        

MoviePy - Done.




Removing converted/converted4.wav
------------------------------------------------------------
8.11 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted5.wav


                                                        

MoviePy - Done.




Removing converted/converted5.wav
------------------------------------------------------------
10.81 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted6.wav


                                                        

MoviePy - Done.




Removing converted/converted6.wav
------------------------------------------------------------
13.51 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted7.wav


                                                        

MoviePy - Done.




Removing converted/converted7.wav
------------------------------------------------------------
16.22 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted8.wav


                                                        

MoviePy - Done.




Removing converted/converted8.wav
------------------------------------------------------------
18.92 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted9.wav


                                                        

MoviePy - Done.




Removing converted/converted9.wav
------------------------------------------------------------
21.62 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted10.wav


                                                        

MoviePy - Done.




Removing converted/converted10.wav
------------------------------------------------------------
24.32 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted11.wav


                                                        

MoviePy - Done.




Removing converted/converted11.wav
------------------------------------------------------------
27.03 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted12.wav


                                                        

MoviePy - Done.




Removing converted/converted12.wav
------------------------------------------------------------
29.73 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted13.wav


                                                        

MoviePy - Done.




Removing converted/converted13.wav
------------------------------------------------------------
32.43 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted14.wav


                                                        

MoviePy - Done.




Removing converted/converted14.wav
------------------------------------------------------------
35.14 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted15.wav


                                                        

MoviePy - Done.




Removing converted/converted15.wav
------------------------------------------------------------
37.84 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted16.wav


                                                        

MoviePy - Done.




Removing converted/converted16.wav
------------------------------------------------------------
40.54 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted17.wav


                                                        

MoviePy - Done.




Removing converted/converted17.wav
------------------------------------------------------------
43.24 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted18.wav


                                                        

MoviePy - Done.




Removing converted/converted18.wav
------------------------------------------------------------
45.95 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted19.wav


                                                        

MoviePy - Done.




Removing converted/converted19.wav
------------------------------------------------------------
48.65 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted20.wav


                                                        

MoviePy - Done.




Removing converted/converted20.wav
------------------------------------------------------------
51.35 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted21.wav


                                                        

MoviePy - Done.




Removing converted/converted21.wav
------------------------------------------------------------
54.05 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted22.wav


                                                        

MoviePy - Done.




Removing converted/converted22.wav
------------------------------------------------------------
56.76 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted23.wav


                                                        

MoviePy - Done.




Removing converted/converted23.wav
------------------------------------------------------------
59.46 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted24.wav


                                                        

MoviePy - Done.




Removing converted/converted24.wav
------------------------------------------------------------
62.16 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted25.wav


                                                        

MoviePy - Done.




Removing converted/converted25.wav
------------------------------------------------------------
64.86 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted26.wav


                                                        

MoviePy - Done.




Removing converted/converted26.wav
------------------------------------------------------------
67.57 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted27.wav


                                                        

MoviePy - Done.




Removing converted/converted27.wav
------------------------------------------------------------
70.27 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted28.wav


                                                        

MoviePy - Done.




Removing converted/converted28.wav
------------------------------------------------------------
72.97 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted29.wav


                                                        

MoviePy - Done.




Removing converted/converted29.wav
------------------------------------------------------------
75.68 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted30.wav


                                                        

MoviePy - Done.




Removing converted/converted30.wav
------------------------------------------------------------
78.38 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted31.wav


                                                        

MoviePy - Done.




Removing converted/converted31.wav
------------------------------------------------------------
81.08 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted32.wav


                                                        

MoviePy - Done.




Removing converted/converted32.wav
------------------------------------------------------------
83.78 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted33.wav


                                                        

MoviePy - Done.




Removing converted/converted33.wav
------------------------------------------------------------
86.49 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted34.wav


                                                        

MoviePy - Done.




Removing converted/converted34.wav
------------------------------------------------------------
89.19 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted35.wav


                                                        

MoviePy - Done.




Removing converted/converted35.wav
------------------------------------------------------------
91.89 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted36.wav


                                                        

MoviePy - Done.




Removing converted/converted36.wav
------------------------------------------------------------
94.59 %:
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
MoviePy - Writing audio in converted/converted37.wav


                                                        

MoviePy - Done.




Removing converted/converted37.wav
------------------------------------------------------------
97.3 %:
100%
Finally ready!


The VideoScript function generates and prints the entire video script

In [6]:
ClickingMillions=VideoScript("How Clicking a Single Link Can Cost Millions.mp4")

MoviePy - Writing audio in How Clicking a Single Link Can Cost Millions..wav


                                                                        

MoviePy - Done.
 I received a phone call from somebody who needed my help and they explained to me that this organisation suffered a cyber attack, more specifically a ransomware attack which is designed to both steal your data and make it unusable. It replicates itself throughout the business and can drive you down to paper-based controls. And this was an opportunity that I saw where I could influence something positively. And it was my job to investigate what had happened, how it happened and why. And I saw something that I hadn't experienced before firsthand. In 2017, the NHS suffered something similar and it cost nearly £100m to recover. This incident cost around £5m to recover and took 14 months. Yet what I saw was the human impact. How this happened, a single individual clicked a link and a single individual enabled this, unknowingly, to happen to an organisation. Multiple people were signed off sick due to stress and multiple people weren't able to go to work the next day and car

# Summary

In [7]:
def AbstractiveSummarization(Script, max_len=60):
    Sentences=Script.split(". ")
    textsize=len(Sentences)
    if(textsize<=50):
        Summary=summarizer(Script, max_length=max_len+10, min_length=max_len-10, do_sample=False)[0]["summary_text"]
    else:
        chunks=int(textsize/50)+1
        k,m=divmod(textsize,chunks)
        Summary=""
        for i in range(chunks):
            subscript=[]
            subscript.append('. '.join(Sentences[i*k+min(i,m):(i+1)*k+min(i,m)]))
            if i>0:
                Summary+='. '
            try:
                Summary+=summarizer(subscript, max_length=int((max_len+10)/chunks), min_length=int((max_len-10)/chunks), do_sample=False)[0]["summary_text"]
            except:
                #Too long paragraph, splitting again
                Subsentences=subscript[0].split(". ")
                Subk,Subm=divmod(len(Subsentences),2)
                for j in range(2):
                    subsubscript=[]
                    subsubscript.append('. '.join(Subsentences[j*Subk+min(j,Subm):(j+1)*Subk+min(j,Subm)]))
                    Summary+=summarizer(subsubscript, max_length=int((max_len+10)/2*chunks), min_length=int((max_len-10)/2*chunks), do_sample=False)[0]["summary_text"]
    Summary=Summary.replace("\'","'")
    return(Summary)

In [8]:
AbstractiveSummarization(ClickingMillions, 60)

Token indices sequence length is longer than the specified maximum sequence length for this model (1036 > 1024). Running this sequence through the model will result in indexing errors


'In 2017, the NHS suffered something similar and it cost nearly £100m to recover. This incident. Mr Pullin was paid as a cybersecurity expert to evade the controls of this building. He says he. "This is to exploit the human behaviors. And so there\'s some basic things you can do, such as resetting passwords and making sure you\'re not using the same password for your accounts" "I don\'t believe any generation can avoid this anymore. Children are being raised with iPads and older generations are online shopping because of convenience and accessibility to services they may not have had before"If you\'re going on holidays in Mexico, say for your honeymoon, you\'ve saved up all of this money. Have a lovely time. Yet someone you know or an acquaintance or you have public visibility of your arrangements. If someone knows that information and they know the bank you may work with, they could phone you whilst you land and say, we\'ve seen your card be used in this location.'