### Post Call Transcription and Analysis with Azure OpenAI 
This demo notebook shows how to perform analysis on a Swiss German audio interaction between two parties. The audio file is downloaded from an Azure blob store. Example use cases include the analysis of call center customer interactions. We provide the following capabilities:
- sentiment analysis
- general summarization of the call
- summarization of each speaker's remarks with speaker recognition

Before running the cells, create environment variables in a .env file for the openAI API key and endpoint and the speech API key. There are some packages that need to be installed via ``%pip install`` (for example python-dotenv, azure-cognitiveservices-speech, openai).

In [None]:
from azureml.core import Workspace, Model, Dataset, Datastore, Experiment, Environment, ScriptRunConfig, RunConfiguration
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.pipeline.core import Pipeline
from azureml.pipeline.steps import PythonScriptStep
from azureml.core.environment import CondaDependencies
import joblib
import os
import pandas as pd
from datetime import datetime
import logging
import requests
from azure.identity import ChainedTokenCredential,ManagedIdentityCredential,DefaultAzureCredential
from azure.storage.filedatalake import DataLakeServiceClient
import azureml.core
from time import sleep
import time
import azure.functions as func
import json 
from azure.storage.blob import BlobServiceClient, ContainerClient, BlobBlock, BlobClient
import azure.cognitiveservices.speech as speechsdk
from dotenv import load_dotenv
load_dotenv()
import string
import openai
import sys
import numpy as np
from openai.embeddings_utils import get_embedding, cosine_similarity
import io
from pydub import AudioSegment

openai.api_type = "azure"
openai.api_key = os.getenv('OPENAI_API_KEY') 
openai.api_base = os.getenv('OPENAI_API_ENDPOINT') 
openai.api_version = '2023-05-15'

SPEECH_KEY = os.getenv("SPEECH_API_KEY")
print('SDK version:', azureml.core.VERSION)

### Fetch the Audio from Azure Blob Store

In [21]:
def download_file_from_blob(account_url, container_name, blob_name, out_file):
    credential = DefaultAzureCredential()
    # create the blobserviceclient object
    blob_service_client = BlobServiceClient(account_url, credential=credential)
    blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
    stream = io.BytesIO()
    file = blob_client.download_blob().readall()
    with open(out_file, mode='bx') as f:
        f.write(file)

In [22]:
download_file_from_blob("https://<name of blob store>.blob.core.windows.net", "<name of container>", "<file path>", "<name of output file>.wav")

### Speech to Text via Cognitive Services

In [3]:
def recognize_speech_from_file(filename):
    # Set up the subscription info for the Speech Service:
    # Replace with your own subscription key and service region (e.g., "westus").
    speech_key = SPEECH_KEY
    service_region = "westeurope"

    speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
    audio_config = speechsdk.audio.AudioConfig(filename=filename)
    # Creates a speech recognizer using a file as audio input, also specify the speech language
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, language = "de-CH",audio_config=audio_config)

    global done 
    done = False
    global recognized_text_list 
    recognized_text_list=[]
    def stop_cb(evt: speechsdk.SessionEventArgs):
        """callback that signals to stop continuous recognition upon receiving an event `evt`"""
        print('CLOSING on {}'.format(evt))
        global done
        done = True

    def recognize_cb(evt: speechsdk.SpeechRecognitionEventArgs):
        """callback for recognizing the recognized text"""
        global recognized_text_list
        recognized_text_list.append(evt.result.text)
        # print('RECOGNIZED: {}'.format(evt.result.text))

    # Connect callbacks to the events fired by the speech recognizer
    # speech_recognizer.recognizing.connect(lambda evt: print('RECOGNIZING: {}'.format(evt)))
    speech_recognizer.recognized.connect(recognize_cb)
    speech_recognizer.session_started.connect(lambda evt: print('STT SESSION STARTED: {}'.format(evt)))
    speech_recognizer.session_stopped.connect(lambda evt: print('STT SESSION STOPPED {}'.format(evt)))
    # speech_recognizer.canceled.connect(lambda evt: print('CANCELED {}'.format(evt)))
    # stop continuous recognition on either session stopped or canceled events
    speech_recognizer.session_stopped.connect(stop_cb)
    # speech_recognizer.canceled.connect(stop_cb)

    # Start continuous speech recognition
    speech_recognizer.start_continuous_recognition()
    while not done:
        time.sleep(.5)

    speech_recognizer.stop_continuous_recognition()

    return recognized_text_list

In [None]:
text = recognize_speech_from_file("<name of output file>.wav")
print(text)

### Generate Responses via Azure OpenAI

#### Create a Prompt for Sentiment Analysis

In [None]:
prompt_sentiment = f"Detect whether the sentiment of the customer is positive or negative. Just say positive or negative.\n\n{' '.join(text)}"

deployment_name="text-davinci-003-demo-we"

result = openai.Completion.create(engine=deployment_name,prompt=prompt_sentiment,temperature=0,max_tokens=30,)["choices"][0]["text"].strip(" \n")
print(result)

#### Create a Prompt for Summarization

In [None]:
prompt_summary = f"Summarize the contents of the customer call.\n\n{' '.join(text)}"
deployment_name="text-davinci-003-demo-we"

result = openai.Completion.create(engine=deployment_name,prompt=prompt_summary,temperature=0,max_tokens=3000,)["choices"][0]["text"].strip(" \n")
print(result)

#### Create a Prompt for Speaker Recognition

In [None]:
prompt_test = f"Split the conversation by the two speakers and summarize in german what each speaker said.\n\n{' '.join(text)}"
deployment_name="text-davinci-003-demo-we"

result = openai.Completion.create(engine=deployment_name,prompt=prompt_test,temperature=0,max_tokens=3000,)["choices"][0]["text"].strip(" \n")
print(result)