### Setup

In [1]:
import os
import io
import json
import base64
import requests
import concurrent.futures
import time
from IPython.display import display, Markdown, Latex

import shapely

import numpy as np
import pandas as pd

import vertexai
from vertexai.preview.language_models import TextGenerationModel, TextEmbeddingModel, ChatModel
from google.cloud import aiplatform
from google.cloud import storage
from google.cloud import speech_v1p1beta1 as speech  #v1 has detect different speakers, v2 does not yet
from google.cloud import bigquery


In [2]:
project = !gcloud config get-value project
PROJECT_ID = project[0]
PROJECT_ID

'mg-ce-demos'

In [3]:
REGION = 'us-central1'
LOCATION = 'us'

In [4]:
#speech client
stt = speech.SpeechClient()

# gcs client
gcs = storage.Client(project = PROJECT_ID)

# vertex ai clients
vertexai.init(project = PROJECT_ID, location = REGION)
aiplatform.init(project = PROJECT_ID, location = REGION)

# bigquery client
bq = bigquery.Client(project = PROJECT_ID)

### Get audio files
Need to loop through all files in a GCS bucket

In [5]:
example_audio_uri = "speech/brooklyn_bridge.flac"
example_audio_uri2 = "speech/sample-podcasts/GCPEpisode328-DatabaseMigrationService-2min sample.flac"
example_audio_uri3 = "speech/multi.wav"

gcs_bucket = 'cloud-samples-data'
bucket = gcs.bucket(gcs_bucket)
blob = bucket.blob(example_audio_uri2)

audio_bytes = blob.download_as_bytes()

In [6]:
#gcs_uri = "gs://cloud-samples-data/speech/sample-podcasts/GCPEpisode328-DatabaseMigrationService-2min sample.flac"

In [7]:
type(audio_bytes)

bytes

### STT setup - including multiple speakers

In [43]:
diarization_config = speech.SpeakerDiarizationConfig(
    enable_speaker_diarization=True,
    min_speaker_count=2,
    max_speaker_count=8,
)

In [44]:
config = speech.RecognitionConfig(
    encoding=speech.RecognitionConfig.AudioEncoding.FLAC,
    audio_channel_count=2,
    enable_word_confidence=True,
    enable_automatic_punctuation=True,
    profanity_filter=True,
    language_code="en-US",
    diarization_config=diarization_config,
)

In [45]:
audio = speech.RecognitionAudio(content=audio_bytes)

In [46]:
# Detects speech in the audio file
operation = stt.long_running_recognize(config=config, audio=audio)

print("Waiting for operation to complete...")
response = operation.result(timeout=180)

Waiting for operation to complete...


In [47]:
#response

In [48]:
result = response.results[-1]
words_info = result.alternatives[0].words
#result
#words_info

In [49]:
speaker = 1
text = ''
order = 0
transcript = pd.DataFrame(columns=['file', 'order', 'speaker_tag', 'text'])

for word_info in words_info:
    if word_info.confidence >= 0.9:
        if word_info.speaker_tag==speaker:
            text=text+" "+word_info.word
        else:
            transcript.loc[len(transcript.index)] = [example_audio_uri2, order, str('speaker_'+str(speaker)), text] 
            order += 1
            speaker=word_info.speaker_tag
            text=""+word_info.word

transcript.loc[len(transcript.index)] = [example_audio_uri2, order, str('speaker_'+str(speaker)), text]

In [54]:
transcript['text'].replace('', np.nan, inplace=True)
transcript = transcript.dropna() 
transcript

Unnamed: 0,file,order,speaker_tag,text
1,speech/sample-podcasts/GCPEpisode328-DatabaseM...,1,speaker_2,Hello everyone and welcome to episode number 3...
2,speech/sample-podcasts/GCPEpisode328-DatabaseM...,2,speaker_3,"talked and databases talk and data movement, s..."
3,speech/sample-podcasts/GCPEpisode328-DatabaseM...,3,speaker_2,"So okay, this is the perfect person for the pe..."
4,speech/sample-podcasts/GCPEpisode328-DatabaseM...,4,speaker_3,One of is where does your dad live did a resid...
5,speech/sample-podcasts/GCPEpisode328-DatabaseM...,5,speaker_2,lot packed into this episode and Shahar Dina g...
6,speech/sample-podcasts/GCPEpisode328-DatabaseM...,6,speaker_3,Journeys.
7,speech/sample-podcasts/GCPEpisode328-DatabaseM...,7,speaker_2,We get a little sneak peek of it this episode ...
8,speech/sample-podcasts/GCPEpisode328-DatabaseM...,8,speaker_3,So what's the first thing that you saw that wa...
9,speech/sample-podcasts/GCPEpisode328-DatabaseM...,9,speaker_2,There's a happening on the blog in on the inte...


### Build prompts for summarization

In [55]:
prompt_context = []

for i, row in transcript.iterrows():
    prompt_context.append("{0}".format(row["text"]))

#prompt_context

In [56]:
prompt_context_by_speaker = []

for tag in transcript.speaker_tag.unique():
    prompt_context_temp = []
    for i, row in transcript[transcript.speaker_tag == tag].iterrows():
        prompt_context_temp.append("{0}".format(row["text"]))
    prompt_context_by_speaker.append([tag, prompt_context_temp])

prompt_context_by_speaker

[['speaker_2',
  ["Hello everyone and welcome to episode number 328 of the weekly Google Cloud platform podcast. This is Stephanie walk and today I'm here with gav Weiss",
   'So okay, this is the perfect person for the perfect episode. What are we going to talk about today?',
   'lot packed into this episode and Shahar Dina gave you as well. I feel like we have a great combination of folks here from the product and side to talk about this product. I also feel like we should have a separate episode on just your career',
   "We get a little sneak peek of it this episode but yeah, just waiting What's in store for you to learn about yuna's how that ties into a database migration service. But before we get into that, why we go ahead and cover some of the cool things of the week?",
   "There's a happening on the blog in on the interwebs around Google Cloud. By we just out with a new flexible committed use discount. So, these are spend commitments for predictable, and simple discount. for ex

In [57]:
#print(str.join("\n", prompt_context_by_speaker[1][1]))

### Submit prompts to GenAI APIs

#### Prediction for the entire transcription

In [58]:
textgen_model = vertexai.preview.language_models.TextGenerationModel.from_pretrained('text-bison@001')

In [59]:
preamble = "Summarize the following opinion in 2-3 sentences: "

In [69]:
prompt_1 = preamble + "\n\n" + str.join("\n", prompt_context)

In [70]:
response_full = textgen_model.predict(
                    prompt_1,
                    max_output_tokens=1024,
                    temperature=0.4,
                    top_p=0.8,
                    top_k=40,
                )

In [71]:
display(Markdown(str('**Prompt**')))
display(Markdown(str(prompt_1)))
print('\n')
display(Markdown(str('**Transcription Summary**')))
display(Markdown(str(response_full)))

**Prompt**

Summarize the following opinion in 2-3 sentences: 

Hello everyone and welcome to episode number 328 of the weekly Google Cloud platform podcast. This is Stephanie walk and today I'm here with gav Weiss
talked and databases talk and data movement, same thing.
So okay, this is the perfect person for the perfect episode. What are we going to talk about today?
One of is where does your dad live did a residency? Where can you get the most out of your data or opinion? It's in the cloud. So today we have our friends from Tel Aviv to talk about how to get your dick. Talking database, migration service, it's going to be a great conversation about all things. What you should think about why we care about it, why you should care about it and how we can make it easy.
lot packed into this episode and Shahar Dina gave you as well. I feel like we have a great combination of folks here from the product and side to talk about this product. I also feel like we should have a separate episode on just your career
Journeys.
We get a little sneak peek of it this episode but yeah, just waiting What's in store for you to learn about yuna's how that ties into a database migration service. But before we get into that, why we go ahead and cover some of the cool things of the week?
So what's the first thing that you saw that was super awesome. This week,
There's a happening on the blog in on the interwebs around Google Cloud. By we just out with a new flexible committed use discount. So, these are spend commitments for predictable, and simple discount. for example, you get 28% year,





**Transcription Summary**

if you commit to spending 1 year of compute Engine. The speaker thinks that the cloud is the best place to store data and that Google Cloud Platform's Database Migration Service makes it easy to move data to the cloud.

#### Prediction for the transcription by speaker

In [72]:
preamble = "Summarize the following opinion in 2-3 sentences: "

In [74]:
for prompt in prompt_context_by_speaker:
    prompt_n = preamble + "\n\n" + str.join("\n", prompt[1])
    response_speaker = textgen_model.predict(
                    prompt_n,
                    max_output_tokens=1024,
                    temperature=0.4,
                    top_p=0.8,
                    top_k=40,
                )
    display(Markdown(str('**Prompt - {}**'.format(prompt[0]))))
    display(Markdown(str(prompt_n)))
    print('\n')
    display(Markdown(str('**Transcription Summary**')))
    display(Markdown(str(response_speaker)))
    print('------------------------')



**Prompt - speaker_2**

Summarize the following opinion in 2-3 sentences: 

Hello everyone and welcome to episode number 328 of the weekly Google Cloud platform podcast. This is Stephanie walk and today I'm here with gav Weiss
So okay, this is the perfect person for the perfect episode. What are we going to talk about today?
lot packed into this episode and Shahar Dina gave you as well. I feel like we have a great combination of folks here from the product and side to talk about this product. I also feel like we should have a separate episode on just your career
We get a little sneak peek of it this episode but yeah, just waiting What's in store for you to learn about yuna's how that ties into a database migration service. But before we get into that, why we go ahead and cover some of the cool things of the week?
There's a happening on the blog in on the interwebs around Google Cloud. By we just out with a new flexible committed use discount. So, these are spend commitments for predictable, and simple discount. for example, you get 28% year,





**Transcription Summary**

if you commit to spending 1 year. The author thinks that the new flexible committed use discount is a great way to save money on Google Cloud. It's simple to use and provides a predictable discount.

------------------------


**Prompt - speaker_3**

Summarize the following opinion in 2-3 sentences: 

talked and databases talk and data movement, same thing.
One of is where does your dad live did a residency? Where can you get the most out of your data or opinion? It's in the cloud. So today we have our friends from Tel Aviv to talk about how to get your dick. Talking database, migration service, it's going to be a great conversation about all things. What you should think about why we care about it, why you should care about it and how we can make it easy.
Journeys.
So what's the first thing that you saw that was super awesome. This week,





**Transcription Summary**

I talked about data and data movement. I think the most important thing is to get your data in the cloud. I had a great conversation with my friends from Tel Aviv about database migration service. We talked about why we care about it, why you should care about it and how we can make it easy.

------------------------


### Functions - for future reference

In [None]:
def mp3_to_wav(audio_file_name):
    if audio_file_name.split('.')[1] == 'mp3':    
        sound = AudioSegment.from_mp3(audio_file_name)
        audio_file_name = audio_file_name.split('.')[0] + '.wav'
        sound.export(audio_file_name, format="wav")

In [None]:
def frame_rate_channel(audio_file_name):
    with wave.open(audio_file_name, "rb") as wave_file:
        frame_rate = wave_file.getframerate()
        channels = wave_file.getnchannels()
        return frame_rate,channels

In [None]:
def stereo_to_mono(audio_file_name):
    sound = AudioSegment.from_wav(audio_file_name)
    sound = sound.set_channels(1)
    sound.export(audio_file_name, format="wav")