In [23]:
def transcribe_audio():
    
    from deepspeech import Model
    import numpy as np
    import os
    import wave
    import json
    import pyaudio
    import io


    chunk = 1024  # Record in chunks of 1024 samples
    sample_format = pyaudio.paInt16  # 16 bits per sample
    channels = 1
    fs = 16000  
    seconds = 15

    p = pyaudio.PyAudio()  # Create an interface to PortAudio

    print('Recording for {} seconds'.format(seconds))


    stream = p.open(format=sample_format,
                    channels=channels,
                    rate=fs,
                    frames_per_buffer=chunk,
                    input=True)

    frames = []  # Initialize array to store frames

    # Store data in chunks for 15 seconds
    for i in range(0, int(fs / chunk * seconds)):
        data = stream.read(chunk)
        frames.append(data)

    # Stop and close the stream 
    stream.stop_stream()
    stream.close()
    # Terminate the PortAudio interface
    p.terminate()

    print('Stopped.')

    # Save the recorded data as a WAV file but don't save it in storage
    # Use IO library to store the WAV file in temporary format
    container = io.BytesIO()
    wf = wave.open(container, 'wb')
    wf.setnchannels(channels)
    wf.setsampwidth(p.get_sample_size(sample_format))
    wf.setframerate(fs)
    wf.writeframes(b''.join(frames))
    wf.close()

    # Read the data up to this point
    container.seek(0)
    data_package = container.read()
    

    #load the pre-trained deep speech model and scorer from the same directory (need to download from the DeepSpeech website)
    DEEPSPEECH_MODEL_DIR = 'deepspeech'
    MODEL_FILE_PATH = os.path.join(DEEPSPEECH_MODEL_DIR, 'deepspeech-0.9.3-models.pbmm')
    SCORER_FILE_PATH = os.path.join(DEEPSPEECH_MODEL_DIR, 'deepspeech-0.9.3-models.scorer')
    beam_width = 100
    lm_alpha = 0.93
    lm_beta = 1.18

    model = Model(MODEL_FILE_PATH)
    model.enableExternalScorer(SCORER_FILE_PATH)
    model.setScorerAlphaBeta(lm_alpha, lm_beta)
    model.setBeamWidth(beam_width)
    
    data = np.frombuffer(data_package, np.int16)
    #Perform transcription of audio wave file
    text = model.stt(data)
    return text



In [14]:
def cosine_similarity(text1, text2):
    #Use the sentence transformers like to load a pre-trained BERT model for text-vector embeddings
    from sentence_transformers import SentenceTransformer
    #Cosine similiarity method from sk-learn compares the vector embeddings for semantic similarity 
    from sklearn.metrics.pairwise import cosine_similarity
    model = SentenceTransformer('bert-base-nli-mean-tokens')
    sentence_embeddings = model.encode([text1,text2])
    similiarity=cosine_similarity([sentence_embeddings[0]], sentence_embeddings[1:])
    return similiarity

In [81]:
def word_memorized(df, name):
    from time import sleep
    from datetime import date
    #If cosine_similarity >= 0.80 -> word memorized
    #If cosine_similarity < 0.80 -> you need to revise this word
    name = str(name)
    today = str(date.today())
    memorized_words = []
    not_memorized_words = []
    for i in range(len(df)):
        word = df.iloc[i].word
        definition = df.iloc[i].definition
        try:
            import random
            choice=random.choice([1,2])
            if choice==1:
                answer_given = input('Write the definition for {}: '.format(word.upper()))
            else:
                print('Record the definition for {}:'.format(word.upper()))
                sleep(10)   
                answer_given = transcribe_audio()

            similiarity=cosine_similarity(definition, answer_given)
            similiarity_word = cosine_similarity(word, answer_given)
            if (similiarity>=0.8) or (similiarity_word>=0.8):
                print('Word {} memorized'.format(word.upper()))
                memorized_words.append(word)
            else:
                print('Word {} not memorized yet or properly \ncorrect definition: {}\nyour answer {}:\nscore:{}'.format(word.upper(), definition, answer_given.upper(), similiarity))
                not_memorized_words.append(word)
        except KeyboardInterrupt:
            print ('KeyboardInterrupt exception is caught')
    if len(not_memorized_words)!=0:
        df_not_memorized = df.loc[df.word.isin(not_memorized_words)]
        df_not_memorized.to_csv('{}-{}-not-memorized.csv'.format(name.lower(),today), index=False)
    if len(memorized_words)!=0:
        df_memorized = df.loc[df.word.isin(memorized_words)]
    return not_memorized_words

In [85]:
import pandas as pd
#data = pd.read_csv('words_with_definitions.csv')
data['word'] = data['word'].str.strip()
data['definition'] = data['definition'].str.strip()

In [83]:
word_memorized(data.sample(1),'Arsalan')

Record the definition for FEASIBLE:
Recording for 15 seconds
Stopped.
Word FEASIBLE not memorized yet or properly 
correct definition: possible to do easily or conveniently
your answer :
score:[[0.5208689]]


['feasible']