In [1]:
# pip install --upgrade google-cloud-speech
import os
import io
from google.cloud import speech_v1p1beta1 as speech
from google.cloud.speech_v1p1beta1 import enums
from pydub import AudioSegment
import pandas as pd
import time

In [20]:
# Check audio file properites
# sound_file = AudioSegment.from_file('./Datasets/sample_audio/QUICKTEST/31120-20200501-0034sample21.wav', format="wav")

# print(f'Sample Width: {sound_file.sample_width}')
# print(f'Channel Count: {sound_file.channels}')
# print(f'Duration: {len(sound_file) / 1000}s')
# print(f'Sample Rate: {sound_file.frame_rate}')
# print(f'Sample Rate: {sound_file.dBFS}')

In [7]:
def transcribe(input_path,output_path,output_filename,street_file):
    # define key path (security feature)
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = '/Users/alex/ga-dsi-11/police-scanner-speech-to-text-c09b11750e4e.json'
    
    # Instantiate speech to text model, DataFrame, and time
    client = speech.SpeechClient()
    df = pd.DataFrame()
    total_time_start = time.time()
    
    # List of streets
    street_df = pd.read_csv(street_file)
    street_list = []
    for j in street_df['0']:
        street_list.append(j) 
        
    
    # loop through files in our input path
    for i in os.listdir(input_path):
        if i.endswith('.wav'):
            loop_time = time.time()
            
            # open our audio file
            with io.open(f'{input_path}{i}', 'rb') as audio_file:
                content = audio_file.read()
                audio = speech.types.RecognitionAudio(content=content)
            print(f"File {i} opened")
            
            # model parameters
            config = speech.types.RecognitionConfig(
                encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
                sample_rate_hertz = 22050,
                language_code = 'en-US',
                audio_channel_count = 1,
                enable_separate_recognition_per_channel = True,
                use_enhanced = True,
                model = 'phone_call',
                speech_contexts = [
                                    {'boost': 20.0}])
            
            # This models equivalent of fit/predict
            response = client.recognize(config, audio)
            
            # build dictionary -> dataframe
#             print(response.results)
            for result in response.results:
                d = {}
                d['transcript'] = result.alternatives[0].transcript
                d['confidence_interval'] = result.alternatives[0].confidence
                d['file_name'] = i
                df = df.append(d, ignore_index=True)
                print(f"File {i} results added to dataframe: {time.time() - loop_time}s")


    
    # export df of transcriptions
    df.to_csv(f'{output_path}{output_filename}', index=False)
    
    print(f"Total time elapsed: {time.time() - total_time_start}s")
    
    #return dataframe incase we would like to use it in local file
    return df

In [11]:
transcription_df = transcribe('./Datasets/sample_audio/test/',   # Indivitual INPUT PATH
                              './Datasets/sample_transcript/', # Indivitual OUTPUT PATH
                              'ALEX_sample_transcript_QUICKTEST.csv', # Indivitual transcription csv name
                              './Datasets/cambridge.csv'
                             )

transcription_df


File Alexsample18-25818-20200501-0413.wav opened
File Alexsample17-25818-20200501-0413.wav opened
File Alexsample13-25818-20200501-0413.wav opened
File Alexsample13-25818-20200501-0413.wav results added to dataframe: 23.451646327972412s
File Alexsample10-25818-20200501-0413.wav opened
File Alexsample10-25818-20200501-0413.wav results added to dataframe: 3.9653687477111816s
File Alexsample14-25818-20200501-0413.wav opened
File Alexsample14-25818-20200501-0413.wav results added to dataframe: 3.368769884109497s
File Alexsample12-25818-20200501-0413.wav opened
File Alexsample12-25818-20200501-0413.wav results added to dataframe: 3.8850531578063965s
File Alexsample16-25818-20200501-0413.wav opened
File Alexsample16-25818-20200501-0413.wav results added to dataframe: 7.202057838439941s
File Alexsample16-25818-20200501-0413.wav results added to dataframe: 7.207916975021362s
File Alexsample15-25818-20200501-0413.wav opened
File Alexsample15-25818-20200501-0413.wav results added to dataframe: 4

'car locations required to be outside in the motor vehicle'