In [1]:
# Python code to convert video to audio 
import moviepy.editor as mp 
  
# Insert Local Video File Path  
clip = mp.VideoFileClip(r"D:/Hyper-dimension/video.mp4") 
  
# Insert Local Audio File Path 
audio_clip = clip.audio.write_audiofile(r"D:/Hyper-dimension/Audio_file.wav")

chunk:   0%|                                                                | 3/7854 [00:00<06:31, 20.03it/s, now=None]

MoviePy - Writing audio in D:/Hyper-dimension/Audio_file.wav


                                                                                                                       

MoviePy - Done.




In [2]:
import os 
from pydub import AudioSegment
from pydub.silence import split_on_silence
import speech_recognition as sr
r = sr.Recognizer()

In [3]:
def get_large_audio_transcription(path):
    """
    Splitting the large audio file into chunks
    and apply speech recognition on each of these chunks
    """
    # open the audio file using pydub
    sound = AudioSegment.from_wav(path)  
    # split audio sound where silence is 700 miliseconds or more and get chunks
    chunks = split_on_silence(sound,
        # experiment with this value for your target audio file
        min_silence_len = 500,
        # adjust this per requirement
        silence_thresh = sound.dBFS-14,
        # keep the silence for 1 second, adjustable as well
        keep_silence=500,
    )
    folder_name = "audio-chunks"
    # create a directory to store the audio chunks
    if not os.path.isdir(folder_name):
        os.mkdir(folder_name)
    whole_text = ""
    # process each chunk 
    for i, audio_chunk in enumerate(chunks, start=1):
        # export audio chunk and save it in
        # the `folder_name` directory.
        chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
        audio_chunk.export(chunk_filename, format="wav")
        # recognize the chunk
        with sr.AudioFile(chunk_filename) as source:
            audio_listened = r.record(source)
            # try converting it to text
            try:
                text = r.recognize_google(audio_listened)
            except sr.UnknownValueError as e:
                print("Error:", str(e))
            else:
                text = f"{text.capitalize()}. "
                print(chunk_filename, ":", text)
                whole_text += text
    # return the text for all chunks detected
    return whole_text

In [4]:
text = get_large_audio_transcription("D:/Hyper-dimension/Audio_file.wav")

audio-chunks\chunk1.wav : Hello guys and welcome back to get fighting. 
audio-chunks\chunk2.wav : In this video you will see. 
audio-chunks\chunk3.wav : How we can convert our audio into text. 
audio-chunks\chunk4.wav : Using. 
audio-chunks\chunk5.wav : Speech recognition library. 
audio-chunks\chunk6.wav : So without wasting any time that's it started. 
audio-chunks\chunk7.wav : Before starting video i want to just tell you that. 
audio-chunks\chunk8.wav : I am using a different mike that is night of mine. 
audio-chunks\chunk9.wav : Your phone's into the audio will not be that loud. 
audio-chunks\chunk10.wav : But you can see from the code. 
audio-chunks\chunk11.wav : And understand it. 
audio-chunks\chunk12.wav : So. 
audio-chunks\chunk13.wav : First of all. 
audio-chunks\chunk14.wav : We need to install. 
audio-chunks\chunk15.wav : This. 
audio-chunks\chunk16.wav : Speech recognition library. 
audio-chunks\chunk17.wav : And. 
audio-chunks\chunk18.wav : A simple compound. 
audio-chun

In [5]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
#from spacy.en import English
from spacy.lang.en import English

In [6]:
extra_words=list(STOP_WORDS)+list(punctuation)+['\n']
nlp=spacy.load('en_core_web_sm')
docx = nlp(text)

In [7]:
all_words=[word.text for word in docx]
Freq_word={}
for w in all_words:
    w1=w.lower()
    if w1 not in extra_words and w1.isalpha():
        if w1 in Freq_word.keys():
            Freq_word[w1]+=1
        else:
            Freq_word[w1]=1

In [8]:
Freq_word

{'hello': 2,
 'guys': 2,
 'welcome': 1,
 'fighting': 1,
 'video': 5,
 'convert': 6,
 'audio': 13,
 'text': 7,
 'speech': 8,
 'recognition': 8,
 'library': 5,
 'wasting': 1,
 'time': 3,
 'started': 1,
 'starting': 1,
 'want': 1,
 'tell': 1,
 'different': 1,
 'mike': 1,
 'night': 1,
 'phone': 1,
 'loud': 1,
 'code': 1,
 'understand': 1,
 'need': 3,
 'install': 4,
 'simple': 1,
 'compound': 1,
 'twin': 1,
 'scroll': 1,
 'pseudo': 1,
 'factory': 1,
 'python': 3,
 'version': 1,
 'simply': 1,
 'pip': 1,
 'noticed': 1,
 'uppercase': 1,
 'installed': 1,
 'earlier': 1,
 'says': 1,
 'requirement': 1,
 'satisfied': 1,
 'let': 4,
 'jump': 1,
 'coding': 1,
 'import': 2,
 'al': 1,
 'asr': 1,
 'notice': 1,
 'format': 1,
 'week': 1,
 'looked': 1,
 'cases': 1,
 'case': 2,
 'sensitive': 1,
 'notation': 1,
 'writing': 1,
 'bb': 1,
 'good': 2,
 'way': 2,
 'initialise': 2,
 'equal': 1,
 'sr': 3,
 'dot': 5,
 'work': 1,
 'recognizer': 3,
 'recognise': 5,
 'essar': 1,
 'microphone': 4,
 'source': 6,
 'means':

In [9]:
val=sorted(Freq_word.values())
max_freq=val[-4:]
print("Topic of document given :-")
for word,freq in Freq_word.items():
    if freq in max_freq:
        print(word ,end=" ")
    else:
        continue

Topic of document given :-
audio text speech recognition 

In [10]:
for word in Freq_word.keys():
       Freq_word[word] = (Freq_word[word]/max_freq[-1])

In [11]:
sent_strength={}
for sent in docx.sents:
    for word in sent :
        if word.text.lower() in Freq_word.keys():
            if sent in sent_strength.keys():
                sent_strength[sent]+=Freq_word[word.text.lower()]
            else:
                sent_strength[sent]=Freq_word[word.text.lower()]
        else:
            continue

In [12]:
sent_strength

{Hello guys and welcome back to get fighting.: 0.46153846153846156,
 In this video you will see.: 0.38461538461538464,
 How we can convert our audio into text.: 2.0,
 Speech recognition library.: 1.6153846153846154,
 So without wasting any time that's it started.: 0.38461538461538464,
 Before starting video i want to just tell you that.: 0.6153846153846154,
 I am using a different mike that is night of mine.: 0.23076923076923078,
 Your phone's into the audio will not be that loud.: 1.1538461538461537,
 But you can see from the code.: 0.07692307692307693,
 And understand it.: 0.07692307692307693,
 We need to install.: 0.5384615384615385,
 Speech recognition library.: 1.6153846153846154,
 A simple compound.: 0.15384615384615385,
 Twin scroll it is.: 0.15384615384615385,
 Pseudo.: 0.07692307692307693,
 Factory.: 0.07692307692307693,
 Speech recognition.: 1.2307692307692308,
 Now if you are using python to version you can simply do pip install speech recognition.: 2.0,
 But i am using pyth

In [13]:
top_sentences=(sorted(sent_strength.values())[::-1])
top20percent_sentence=int(0.2*len(top_sentences))
top_sent=top_sentences[:top20percent_sentence]

In [14]:
summary=[]
for sent,strength in sent_strength.items():
    if strength in top_sent:
        summary.append(sent)
    else:
        continue

In [15]:
for i in summary:
    print(i,end="")

How we can convert our audio into text.Speech recognition library.Your phone's into the audio will not be that loud.Speech recognition library.Speech recognition.Now if you are using python to version you can simply do pip install speech recognition.Speech recognition.Notice that here we have speech recognition.Writing speech recognition whole every time.Now this will work as a recognizer to recognise our audio so with.You can also use some audio files.Convert into text but in this video will be using.It will listen to the source and save it in audio.So now we have a audio player.Recognise google now we have various options recognised in google ibm excetra but for this one will be using recognise google.We have to pass our audio.Is audio into text.Remember my mentioned train accept now it may happen sometimes that the audio is not clear.it can be an audio file.Convert our audio into.In case if the audio is not identified correctly.This was all in speech recognition.And in this way we c