In [1]:
import speech_recognition as sr 
import os 
from pydub import AudioSegment
from pydub.silence import split_on_silence
import moviepy.editor as mp



In [2]:
clip = mp.VideoFileClip(r"work sde.3gpp")

In [3]:
clip.audio.write_audiofile(r"converted.wav")

MoviePy - Writing audio in converted.wav


                                                                        

MoviePy - Done.




In [4]:
# create a speech recognition object
r = sr.Recognizer()

In [5]:
# a function that splits the audio file into chunks
# and applies speech recognition
def get_large_audio_transcription(path):
    """
    Splitting the large audio file into chunks
    and apply speech recognition on each of these chunks
    """
    # open the audio file using pydub
    sound = AudioSegment.from_wav(path)  
    # split audio sound where silence is 700 miliseconds or more and get chunks
    chunks = split_on_silence(sound,
        # experiment with this value for your target audio file
        min_silence_len = 500,
        # adjust this per requirement
        silence_thresh = sound.dBFS-14,
        # keep the silence for 1 second, adjustable as well
        keep_silence=500,
    )
    folder_name = "audio-chunks"
    # create a directory to store the audio chunks
    if not os.path.isdir(folder_name):
        os.mkdir(folder_name)
    whole_text = ""
    # process each chunk 
    for i, audio_chunk in enumerate(chunks, start=1):
        # export audio chunk and save it in
        # the `folder_name` directory.
        chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
        audio_chunk.export(chunk_filename, format="wav")
        # recognize the chunk
        with sr.AudioFile(chunk_filename) as source:
            audio_listened = r.record(source)
            # try converting it to text
            try:
                text = r.recognize_google(audio_listened)
            except sr.UnknownValueError as e:
                print("Error:", str(e))
            else:
                text = f"{text.capitalize()}. "
                print(chunk_filename, ":", text)
                whole_text += text
    # return the text for all chunks detected
    return whole_text

In [6]:
path = "converted.wav"
# print("\nFull text:", get_large_audio_transcription(path))

In [7]:
import socket
socket.getaddrinfo('localhost', 8080)

[(<AddressFamily.AF_INET6: 23>, 0, 0, '', ('::1', 8080, 0, 0)),
 (<AddressFamily.AF_INET: 2>, 0, 0, '', ('127.0.0.1', 8080))]

In [8]:
text = get_large_audio_transcription(path)

audio-chunks\chunk1.wav : Project. 
audio-chunks\chunk2.wav : Ab exchange to kar sakte ho aapke pass dimag hai iski vajah se aapko liya gaya. 
audio-chunks\chunk3.wav : I'm going paid. 
Error: 
audio-chunks\chunk5.wav : What is the work of fiction do while he is at the company. 
audio-chunks\chunk6.wav : How to preparation for the company and that is. 
audio-chunks\chunk7.wav : Ok. 
audio-chunks\chunk8.wav : That's very interesting question the hardware by. 
audio-chunks\chunk9.wav : Starting working in a company. 
audio-chunks\chunk10.wav : If you just join your first company. 
audio-chunks\chunk11.wav : English should be about one or two months of training. 
audio-chunks\chunk12.wav : And in that issue some common concept that all is you should know about it be root to this how to write code and we know how to solve some simple mathematical problems. 
audio-chunks\chunk13.wav : Abhi. 
audio-chunks\chunk14.wav : You like me you have. 
audio-chunks\chunk15.wav : Motor difference in ori

audio-chunks\chunk105.wav : Ham log bahut jyada theory mein pocus karte bachpan se. 
audio-chunks\chunk106.wav : Agri infrastructure and. 
audio-chunks\chunk107.wav : Stock mein problem hai. 
audio-chunks\chunk108.wav : But ek apne attitude mein bhi problem hai. 
Error: 
audio-chunks\chunk110.wav : I remember in my college all other people used to be like. 
audio-chunks\chunk111.wav : Kya ho gaya. 
Error: 
audio-chunks\chunk113.wav : So what ek. 
audio-chunks\chunk114.wav : Ek bandar practical kar raha hai. 
audio-chunks\chunk115.wav : Output device. 
audio-chunks\chunk116.wav : Ab agar assignment diya hai to kyon karenge. 
audio-chunks\chunk117.wav : Kharab ki kya puchta hun student ko assignment lene ke liye. 
audio-chunks\chunk118.wav : How to take responsibility in life you have to take responsibility. 
Error: 
audio-chunks\chunk120.wav : You got the universe actress genuinely. 
audio-chunks\chunk121.wav : And in this way. 
audio-chunks\chunk122.wav : You will start going ahead of.

In [9]:
text

"Project. Ab exchange to kar sakte ho aapke pass dimag hai iski vajah se aapko liya gaya. I'm going paid. What is the work of fiction do while he is at the company. How to preparation for the company and that is. Ok. That's very interesting question the hardware by. Starting working in a company. If you just join your first company. English should be about one or two months of training. And in that issue some common concept that all is you should know about it be root to this how to write code and we know how to solve some simple mathematical problems. Abhi. You like me you have. Motor difference in orissa koi aapko bolata hai ki yah project mein yah chij change kar do. Ab exchange ko kar sakte ho. I'm going paid. But what you want to do it you want to make it flexible. Change karte ho aaj karte ho. To aap. Thoda aage ka soch ke first in nashik pahle aaj karte ho. Yah sari chij hoti hai yah aap. Divide food kar rahe ho. To aapko surah fatiha i would personally recommend ki you focus a 

In [10]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation

In [11]:
stopwords = list(STOP_WORDS)

In [12]:
nlp = spacy.load('en_core_web_sm')

In [13]:
doc = nlp(text)

In [14]:
tokens = [token.text for token in doc]
print(tokens)

['Project', '.', 'Ab', 'exchange', 'to', 'kar', 'sakte', 'ho', 'aapke', 'pass', 'dimag', 'hai', 'iski', 'vajah', 'se', 'aapko', 'liya', 'gaya', '.', 'I', "'m", 'going', 'paid', '.', 'What', 'is', 'the', 'work', 'of', 'fiction', 'do', 'while', 'he', 'is', 'at', 'the', 'company', '.', 'How', 'to', 'preparation', 'for', 'the', 'company', 'and', 'that', 'is', '.', 'Ok', '.', 'That', "'s", 'very', 'interesting', 'question', 'the', 'hardware', 'by', '.', 'Starting', 'working', 'in', 'a', 'company', '.', 'If', 'you', 'just', 'join', 'your', 'first', 'company', '.', 'English', 'should', 'be', 'about', 'one', 'or', 'two', 'months', 'of', 'training', '.', 'And', 'in', 'that', 'issue', 'some', 'common', 'concept', 'that', 'all', 'is', 'you', 'should', 'know', 'about', 'it', 'be', 'root', 'to', 'this', 'how', 'to', 'write', 'code', 'and', 'we', 'know', 'how', 'to', 'solve', 'some', 'simple', 'mathematical', 'problems', '.', 'Abhi', '.', 'You', 'like', 'me', 'you', 'have', '.', 'Motor', 'difference

In [15]:
punctuation = punctuation + '\n'
punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\n'

In [16]:
word_frequencies = {}
for word in doc:
    if word.text.lower() not in stopwords:
        if word.text.lower() not in punctuation:
            if word.text not in word_frequencies.keys():
                word_frequencies[word.text] = 1
            else:
                word_frequencies[word.text] += 1
                
print(word_frequencies)

{'Project': 1, 'Ab': 5, 'exchange': 2, 'kar': 13, 'sakte': 3, 'ho': 10, 'aapke': 1, 'pass': 2, 'dimag': 1, 'hai': 26, 'iski': 1, 'vajah': 1, 'se': 6, 'aapko': 4, 'liya': 1, 'gaya': 4, 'going': 5, 'paid': 2, 'work': 2, 'fiction': 1, 'company': 10, 'preparation': 1, 'Ok': 3, 'interesting': 2, 'question': 4, 'hardware': 1, 'Starting': 1, 'working': 1, 'join': 1, 'English': 1, 'months': 1, 'training': 2, 'issue': 1, 'common': 3, 'concept': 1, 'know': 6, 'root': 1, 'write': 1, 'code': 1, 'solve': 2, 'simple': 1, 'mathematical': 1, 'problems': 1, 'Abhi': 3, 'like': 3, 'Motor': 1, 'difference': 1, 'orissa': 1, 'koi': 2, 'bolata': 1, 'ki': 14, 'yah': 5, 'project': 1, 'mein': 14, 'chij': 4, 'change': 1, 'ko': 6, 'want': 3, 'flexible': 1, 'Change': 1, 'karte': 6, 'aaj': 2, 'aap': 9, 'Thoda': 1, 'aage': 1, 'ka': 1, 'soch': 2, 'ke': 11, 'nashik': 1, 'pahle': 1, 'Yah': 2, 'sari': 1, 'hoti': 5, 'Divide': 1, 'food': 1, 'rahe': 6, 'surah': 1, 'fatiha': 1, 'personally': 1, 'recommend': 1, 'focus': 2, '

In [17]:
max_frequency = max(word_frequencies.values())
max_frequency

26

In [18]:
for word in word_frequencies.keys():
    word_frequencies[word] = word_frequencies[word]/max_frequency

print(word_frequencies)

{'Project': 0.038461538461538464, 'Ab': 0.19230769230769232, 'exchange': 0.07692307692307693, 'kar': 0.5, 'sakte': 0.11538461538461539, 'ho': 0.38461538461538464, 'aapke': 0.038461538461538464, 'pass': 0.07692307692307693, 'dimag': 0.038461538461538464, 'hai': 1.0, 'iski': 0.038461538461538464, 'vajah': 0.038461538461538464, 'se': 0.23076923076923078, 'aapko': 0.15384615384615385, 'liya': 0.038461538461538464, 'gaya': 0.15384615384615385, 'going': 0.19230769230769232, 'paid': 0.07692307692307693, 'work': 0.07692307692307693, 'fiction': 0.038461538461538464, 'company': 0.38461538461538464, 'preparation': 0.038461538461538464, 'Ok': 0.11538461538461539, 'interesting': 0.07692307692307693, 'question': 0.15384615384615385, 'hardware': 0.038461538461538464, 'Starting': 0.038461538461538464, 'working': 0.038461538461538464, 'join': 0.038461538461538464, 'English': 0.038461538461538464, 'months': 0.038461538461538464, 'training': 0.07692307692307693, 'issue': 0.038461538461538464, 'common': 0

In [19]:
sentence_tokens = [sent for sent in doc.sents]
print(sentence_tokens)

[Project., Ab exchange to kar sakte ho aapke pass dimag hai iski vajah se aapko liya gaya., I'm going paid., What is the work of fiction do while he is at the company., How to preparation for the company and that is., Ok., That's very interesting question the hardware by., Starting working in a company., If you just join your first company., English should be about one or two months of training., And in that issue some common concept that all is you should know about it be root to this how to write code, and we know how to solve some simple mathematical problems., Abhi., You like me you have., Motor difference in orissa koi aapko bolata hai ki yah project mein yah chij change kar do., Ab exchange ko kar sakte ho., I'm going paid., But what you want to do it you want to make it flexible., Change karte ho aaj karte ho., To aap., Thoda aage ka soch ke first in nashik pahle aaj karte ho., Yah sari chij hoti hai yah aap., Divide food kar rahe ho., To aapko surah fatiha i would personally re

In [20]:
sentence_scores = {}
for sent in sentence_tokens:
    for word in sent:
        if word.text.lower() in word_frequencies.keys():
            if sent not in sentence_scores.keys():
                sentence_scores[sent] = word_frequencies[word.text.lower()]
            else:
                sentence_scores[sent] += word_frequencies[word.text.lower()]
                
sentence_scores

{Project.: 0.038461538461538464,
 Ab exchange to kar sakte ho aapke pass dimag hai iski vajah se aapko liya gaya.: 2.884615384615384,
 I'm going paid.: 0.2692307692307693,
 What is the work of fiction do while he is at the company.: 0.5,
 How to preparation for the company and that is.: 0.42307692307692313,
 Ok.: 0.038461538461538464,
 That's very interesting question the hardware by.: 0.2692307692307693,
 Starting working in a company.: 0.42307692307692313,
 If you just join your first company.: 0.42307692307692313,
 English should be about one or two months of training.: 0.11538461538461539,
 And in that issue some common concept that all is you should know about it be root to this how to write code: 0.5384615384615384,
 and we know how to solve some simple mathematical problems.: 0.423076923076923,
 You like me you have.: 0.11538461538461539,
 Motor difference in orissa koi aapko bolata hai ki yah project mein yah chij change kar do.: 3.5384615384615383,
 Ab exchange ko kar sakte ho

In [21]:
from heapq import nlargest

In [22]:
select_length = int(len(sentence_tokens)*0.3)
select_length

40

In [23]:
summary = nlargest(select_length, sentence_scores, key = sentence_scores.get)
summary

[State the person bacche nahin patna pakistan padh rahe ho interview ke baad kya hota hai login login complicity kya hota hai.,
 Elasticsearch kaise kar raha hoga andar hi andar aur hamen elasticsearch jarurat hai kaun si prani course call kar raha hai.,
 Abhi agar aap ko pata hai ki data bases kya hoti hai.,
 To aap per developer yah bahut hi constrained tarike mat bheja to main jin logon train ki jo 11 sal 12 sal experience wale log hain complexity hoti hai.,
 Open soch mushkil hota hai ki aap andar hi andar uska documentation padh kar khud se samajhte karna.,
 Us time par jo log kiye kar chuke the active test or group discussion vo log jakar mode shortcut rahe the. 4 sal bad ke bad tum padh rahe ho.,
 Motor difference in orissa koi aapko bolata hai ki yah project mein yah chij change kar do.,
 Jo aapko bhi naya technology diya gaya kisi ko nahin pata agar open source to achcha google mein uske bare mein.,
 Then we can understand ki technology book kaun sa problem solve kar raha hai.

In [24]:
final_summary = [word.text for word in summary]
summary = ' '.join(final_summary)

In [25]:
print(text)

Project. Ab exchange to kar sakte ho aapke pass dimag hai iski vajah se aapko liya gaya. I'm going paid. What is the work of fiction do while he is at the company. How to preparation for the company and that is. Ok. That's very interesting question the hardware by. Starting working in a company. If you just join your first company. English should be about one or two months of training. And in that issue some common concept that all is you should know about it be root to this how to write code and we know how to solve some simple mathematical problems. Abhi. You like me you have. Motor difference in orissa koi aapko bolata hai ki yah project mein yah chij change kar do. Ab exchange ko kar sakte ho. I'm going paid. But what you want to do it you want to make it flexible. Change karte ho aaj karte ho. To aap. Thoda aage ka soch ke first in nashik pahle aaj karte ho. Yah sari chij hoti hai yah aap. Divide food kar rahe ho. To aapko surah fatiha i would personally recommend ki you focus a l

In [26]:
print(summary)

State the person bacche nahin patna pakistan padh rahe ho interview ke baad kya hota hai login login complicity kya hota hai. Elasticsearch kaise kar raha hoga andar hi andar aur hamen elasticsearch jarurat hai kaun si prani course call kar raha hai. Abhi agar aap ko pata hai ki data bases kya hoti hai. To aap per developer yah bahut hi constrained tarike mat bheja to main jin logon train ki jo 11 sal 12 sal experience wale log hain complexity hoti hai. Open soch mushkil hota hai ki aap andar hi andar uska documentation padh kar khud se samajhte karna. Us time par jo log kiye kar chuke the active test or group discussion vo log jakar mode shortcut rahe the. 4 sal bad ke bad tum padh rahe ho. Motor difference in orissa koi aapko bolata hai ki yah project mein yah chij change kar do. Jo aapko bhi naya technology diya gaya kisi ko nahin pata agar open source to achcha google mein uske bare mein. Then we can understand ki technology book kaun sa problem solve kar raha hai. Ab exchange to k

In [27]:
# with sr.Microphone() as source:
#     # read the audio data from the default microphone
#     audio_data = r.record(source, duration=5)
#     print("Recognizing...")
#     # convert speech to text
#     text = r.recognize_google(audio_data)
#     print(text)

In [28]:
# text = r.recognize_google(audio_data, language="es-ES")