In [5]:
import requests, time, os, json, re
import json
import pandas as pd
from presidio_analyzer import AnalyzerEngine, PatternRecognizer, Pattern
from presidio_anonymizer import AnonymizerEngine
from openai import OpenAI
import warnings
warnings.filterwarnings('ignore')

Transcribe

In [13]:
def iniialize_payload(language = 'en'):

    # voicegain
    host = "https://api.voicegain.ai/v1"
    data_url = "{}/data/file".format(host)

    #host="voicegain"
     headers = {"Authorization": JWT}

    audio_type = "audio/wav"

    asr_body = {
        "sessions": [{
        "asyncMode": "OFF-LINE",
        "poll": {
            "persist": 120000
        },
        "content": {
            "incremental": ["progress"],
            "full" : ["words","transcript"]
        }
        }],
        "audio":{
            "source": {
                "dataStore": {
                    "uuid": "<data-object-UUID>"
                }
            }
        },
        "settings": {
            "asr": {
                "acousticModelNonRealTime" : "whisper",
                "languages" : [language],
            }
        }
    }
    return data_url, headers, audio_type, asr_body, host

In [11]:
def helper(asr_body,headers,fname,host):
    
    #print("making asr request for file {fname}", flush=True)
    #print(asr_body)
    asr_response = requests.post("{}/asr/transcribe/async".format(host), json=asr_body, headers=headers).json()
    print(asr_response)
    session_id = asr_response["sessions"][0]["sessionId"]
    polling_url = asr_response["sessions"][0]["poll"]["url"]
    #print("sessionId: {}".format(session_id), flush=True)
    #print("poll.url: {}".format(polling_url), flush=True)

    index = 0
    c=0
    while True:
        if (index < 5):
            time.sleep(0.3)
        else:
            time.sleep(4)
        poll_response = requests.get(polling_url + "?full=false", headers=headers).json()
        #print(poll_response)
        phase = poll_response["progress"]["phase"]
        is_final = poll_response["result"]["final"]
        print("Phase: {} Final: {}".format(phase, is_final), flush=True)
        
        if phase == "QUEUED":
            c+=1
        
        if c>5:
            return " "
            
        index += 1
        if is_final:
            break

    txt_url = "{}/asr/transcribe/{}/transcript?format=json-mc".format(host, session_id)
    print("Retrieving transcript using url: {}".format(txt_url), flush=True)
    txt_response = requests.get(txt_url, headers=headers).json()
    return txt_response

In [8]:
def process_one_file(audio_fname,df):
    
    data_url, headers, audio_type, asr_body, host = iniialize_payload('en')
    
    #Base filename
    filename = os.path.basename(audio_fname)
    
    #Clean path name, for processing
    path, fname = os.path.split(audio_fname)
    print("Processing {}/{}".format(path, fname), flush=True)

    #uploading file
    data_body = {
        "name": re.sub("[^A-Za-z0-9]+", "-", fname),
        "description": audio_fname,
        "contentType": audio_type,
        "tags": ["test"]
    }
    multipart_form_data = {
        'file': (audio_fname, open(audio_fname, 'rb'), audio_type),
        'objectdata': (None, json.dumps(data_body), "application/json")
    }
    #print("uploading audio data {} ...".format(audio_fname), flush=True)
    data_response = None
    data_response_raw = None
    try:
        data_response_raw = requests.post(data_url, files=multipart_form_data, headers=headers)
        data_response = data_response_raw.json()
    except Exception as e:
        print(str(data_response_raw))
        exit()
    print("data response: {}".format(data_response), flush=True)
    if data_response.get("status") is not None and data_response.get("status") == "BAD_REQUEST":
        print("error uploading file {}".format(audio_fname), flush=True)
        exit()
    object_id = data_response["objectId"]
    #print("objectId: {}".format(object_id), flush=True)

    ## set the audio id in the asr request
    asr_body["audio"]["source"]["dataStore"]["uuid"] = object_id
    language = asr_body['settings']['asr']['languages']
    #print(language)
    
    #Change the language as per the aws dataset
    txt_response = helper(asr_body,headers,fname,host)

    #Save into dataset
    dataset = pd.DataFrame(columns=['file', 'utterance', 'confidence', 'start', 'duration', 'spk', 'language'])
    for idx, item in enumerate(txt_response):
        utterances = [word['utterance'] for word in item['words']]
        dataset.loc[idx] = [filename, ' '.join(utterances), item['words'][0]['confidence'], item['start'], item['duration'], item['spk'], language]

    sorted_df = dataset.sort_values(by=['file', 'start'])
        
    #appending datasets
    display(sorted_df)
    sorted_df.reset_index(drop=True, inplace=True)  # Reset the index to have unique values
    df = pd.concat([df, sorted_df], ignore_index=True)
    
    return txt_response,df

In [9]:
## MAIN ##
list_of_files = []
input_path='C:/Users/niles/Desktop/Genpact Project/Call_Recordings'

for root, dirs, files in os.walk(input_path):
    for file in files:
        
        list_of_files.append(os.path.join(root, file))
for name in list_of_files:
    print(name)

C:/Users/niles/Desktop/Genpact Project/Call_Recordings\ttsmaker-file-2024-3-2-14-52-11.mp3
C:/Users/niles/Desktop/Genpact Project/Call_Recordings\ttsmaker-file-2024-3-2-14-52-46.mp3
C:/Users/niles/Desktop/Genpact Project/Call_Recordings\ttsmaker-file-2024-3-2-14-53-45.mp3
C:/Users/niles/Desktop/Genpact Project/Call_Recordings\ttsmaker-file-2024-3-2-14-55-16.mp3
C:/Users/niles/Desktop/Genpact Project/Call_Recordings\ttsmaker-file-2024-3-2-14-56-36.mp3
C:/Users/niles/Desktop/Genpact Project/Call_Recordings\ttsmaker-file-2024-3-2-14-59-31.mp3
C:/Users/niles/Desktop/Genpact Project/Call_Recordings\ttsmaker-file-2024-3-2-14-59-4.mp3
C:/Users/niles/Desktop/Genpact Project/Call_Recordings\ttsmaker-file-2024-3-2-15-0-11.mp3
C:/Users/niles/Desktop/Genpact Project/Call_Recordings\ttsmaker-file-2024-3-2-15-1-36.mp3
C:/Users/niles/Desktop/Genpact Project/Call_Recordings\ttsmaker-file-2024-3-2-15-2-20.mp3
C:/Users/niles/Desktop/Genpact Project/Call_Recordings\ttsmaker-file-2024-3-2-15-2-46.mp3
C:/U

In [14]:
detailed_transcription_df = pd.DataFrame(columns=['file', 'utterance', 'confidence', 'start', 'duration', 'spk', 'language'])
start_time = time.time()

i = 0  # Start from index 25
for name in list_of_files[i:5]:  # Loop through elements from index 25 to 49
    print(i)
    i += 1
    txt_response, detailed_transcription_df = process_one_file(name, detailed_transcription_df)

end_time = time.time()

duration = end_time - start_time

print(f"Duration: {duration} seconds")
print("THE END", flush=True)

0
Processing C:/Users/niles/Desktop/Genpact Project/Call_Recordings/ttsmaker-file-2024-3-2-14-52-11.mp3
data response: {'name': 'ttsmaker-file-2024-3-2-14-52-11-mp3', 'accountId': '9550477e-75c5-4264-a5f6-bf5b358de449', 'contextId': '4b8f32ab-6aa9-48a3-a208-99d83ab4187e', 'contentType': 'audio/wav', 'fileName': '0b7f2c41-f645-4aa4-8d09-e9365faa14ec', 'description': 'C:/Users/niles/Desktop/Genpact Project/Call_Recordings\\ttsmaker-file-2024-3-2-14-52-11.mp3', 'dateCreated': '2024-04-02T17:47:46.004Z', 'dateModified': '2024-04-02T17:47:46.004Z', 'tags': ['test'], 'transcoded': False, 'longPersist': False, 'encryption': 'standard', 'objectId': '0b7f2c41-f645-4aa4-8d09-e9365faa14ec', 'sosRef': {'bucket': 'aoto.internal.fqrkvhvl.voicegain.ai', 'hash': '0b7f2c41-f645-4aa4-8d09-e9365faa14ec'}}
{'sessions': [{'sessionId': 'O-2-0luio9mvu1abgm3m68b7ia9yv1vv', 'sessionUrl': 'https://api.voicegain.ai/v1/asr/transcribe/O-2-0luio9mvu1abgm3m68b7ia9yv1vv', 'asyncMode': 'OFF-LINE', 'poll': {'afterlife'

Unnamed: 0,file,utterance,confidence,start,duration,spk,language
0,ttsmaker-file-2024-3-2-14-52-11.mp3,"Agent, thank you for calling Amazon customer s...",0.890625,40,21060,1,[en]
1,ttsmaker-file-2024-3-2-14-52-11.mp3,Let's look into this. Can you confirm your del...,0.997559,22060,15780,1,[en]
2,ttsmaker-file-2024-3-2-14-52-11.mp3,"I'll initiate an investigation, and you'll rec...",0.99585,38820,15220,1,[en]
3,ttsmaker-file-2024-3-2-14-52-11.mp3,I'll prioritize this for you and keep you post...,0.962646,54740,14260,1,[en]


1
Processing C:/Users/niles/Desktop/Genpact Project/Call_Recordings/ttsmaker-file-2024-3-2-14-52-46.mp3
data response: {'name': 'ttsmaker-file-2024-3-2-14-52-46-mp3', 'accountId': '9550477e-75c5-4264-a5f6-bf5b358de449', 'contextId': '4b8f32ab-6aa9-48a3-a208-99d83ab4187e', 'contentType': 'audio/wav', 'fileName': '454c46eb-398d-40db-ae0a-276e29ee6cf4', 'description': 'C:/Users/niles/Desktop/Genpact Project/Call_Recordings\\ttsmaker-file-2024-3-2-14-52-46.mp3', 'dateCreated': '2024-04-02T17:52:17.771Z', 'dateModified': '2024-04-02T17:52:17.771Z', 'tags': ['test'], 'transcoded': False, 'longPersist': False, 'encryption': 'standard', 'objectId': '454c46eb-398d-40db-ae0a-276e29ee6cf4', 'sosRef': {'bucket': 'aoto.internal.fqrkvhvl.voicegain.ai', 'hash': '454c46eb-398d-40db-ae0a-276e29ee6cf4'}}
{'sessions': [{'sessionId': 'O-1-0luiofgop02e6mfft88ar6f8rh9u', 'sessionUrl': 'https://api.voicegain.ai/v1/asr/transcribe/O-1-0luiofgop02e6mfft88ar6f8rh9u', 'asyncMode': 'OFF-LINE', 'poll': {'afterlife'

Unnamed: 0,file,utterance,confidence,start,duration,spk,language
0,ttsmaker-file-2024-3-2-14-52-46.mp3,"Agent, thank you for calling Amazon customer s...",0.766113,20,21900,1,[en]
1,ttsmaker-file-2024-3-2-14-52-46.mp3,"Agent, great. If the problem continues, please...",0.980957,22200,10920,1,[en]


2
Processing C:/Users/niles/Desktop/Genpact Project/Call_Recordings/ttsmaker-file-2024-3-2-14-53-45.mp3
data response: {'name': 'ttsmaker-file-2024-3-2-14-53-45-mp3', 'accountId': '9550477e-75c5-4264-a5f6-bf5b358de449', 'contextId': '4b8f32ab-6aa9-48a3-a208-99d83ab4187e', 'contentType': 'audio/wav', 'fileName': 'fadb2af0-0b21-4b97-a3d4-be9328d492e6', 'description': 'C:/Users/niles/Desktop/Genpact Project/Call_Recordings\\ttsmaker-file-2024-3-2-14-53-45.mp3', 'dateCreated': '2024-04-02T17:52:29.883Z', 'dateModified': '2024-04-02T17:52:29.883Z', 'tags': ['test'], 'transcoded': False, 'longPersist': False, 'encryption': 'standard', 'objectId': 'fadb2af0-0b21-4b97-a3d4-be9328d492e6', 'sosRef': {'bucket': 'aoto.internal.fqrkvhvl.voicegain.ai', 'hash': 'fadb2af0-0b21-4b97-a3d4-be9328d492e6'}}
{'sessions': [{'sessionId': 'O-3-0luiofpum1obn0c7ey9u34rr7g7j', 'sessionUrl': 'https://api.voicegain.ai/v1/asr/transcribe/O-3-0luiofpum1obn0c7ey9u34rr7g7j', 'asyncMode': 'OFF-LINE', 'poll': {'afterlife'

Unnamed: 0,file,utterance,confidence,start,duration,spk,language
0,ttsmaker-file-2024-3-2-14-53-45.mp3,"Agent, thank you for calling Amazon customer s...",0.902344,80,18800,1,[en]
1,ttsmaker-file-2024-3-2-14-53-45.mp3,Can you confirm the last four digits of the ca...,0.998047,19880,17140,1,[en]
2,ttsmaker-file-2024-3-2-14-53-45.mp3,"Additionally, I've added a $10 credit for the ...",0.996582,37920,19000,1,[en]
3,ttsmaker-file-2024-3-2-14-53-45.mp3,"If you have further questions, feel free to co...",0.972656,58020,4580,1,[en]


3
Processing C:/Users/niles/Desktop/Genpact Project/Call_Recordings/ttsmaker-file-2024-3-2-14-55-16.mp3
data response: {'name': 'ttsmaker-file-2024-3-2-14-55-16-mp3', 'accountId': '9550477e-75c5-4264-a5f6-bf5b358de449', 'contextId': '4b8f32ab-6aa9-48a3-a208-99d83ab4187e', 'contentType': 'audio/wav', 'fileName': 'fcc05d7e-90f2-469d-bc5a-a3ca5e2204c2', 'description': 'C:/Users/niles/Desktop/Genpact Project/Call_Recordings\\ttsmaker-file-2024-3-2-14-55-16.mp3', 'dateCreated': '2024-04-02T17:52:42.173Z', 'dateModified': '2024-04-02T17:52:42.173Z', 'tags': ['test'], 'transcoded': False, 'longPersist': False, 'encryption': 'standard', 'objectId': 'fcc05d7e-90f2-469d-bc5a-a3ca5e2204c2', 'sosRef': {'bucket': 'aoto.internal.fqrkvhvl.voicegain.ai', 'hash': 'fcc05d7e-90f2-469d-bc5a-a3ca5e2204c2'}}
{'sessions': [{'sessionId': 'O-2-0luiofzhe03q5firkw4kyn4gcysu', 'sessionUrl': 'https://api.voicegain.ai/v1/asr/transcribe/O-2-0luiofzhe03q5firkw4kyn4gcysu', 'asyncMode': 'OFF-LINE', 'poll': {'afterlife'

Unnamed: 0,file,utterance,confidence,start,duration,spk,language
0,ttsmaker-file-2024-3-2-14-55-16.mp3,"Agent, thank you for calling Amazon customer s...",0.688477,20,21960,1,[en]
1,ttsmaker-file-2024-3-2-14-55-16.mp3,Is there anything else you'd like assistance w...,0.993164,22080,17000,1,[en]


4
Processing C:/Users/niles/Desktop/Genpact Project/Call_Recordings/ttsmaker-file-2024-3-2-14-56-36.mp3
data response: {'name': 'ttsmaker-file-2024-3-2-14-56-36-mp3', 'accountId': '9550477e-75c5-4264-a5f6-bf5b358de449', 'contextId': '4b8f32ab-6aa9-48a3-a208-99d83ab4187e', 'contentType': 'audio/wav', 'fileName': 'bcf70082-608f-483c-9341-eb9eb71f1d11', 'description': 'C:/Users/niles/Desktop/Genpact Project/Call_Recordings\\ttsmaker-file-2024-3-2-14-56-36.mp3', 'dateCreated': '2024-04-02T17:52:55.508Z', 'dateModified': '2024-04-02T17:52:55.508Z', 'tags': ['test'], 'transcoded': False, 'longPersist': False, 'encryption': 'standard', 'objectId': 'bcf70082-608f-483c-9341-eb9eb71f1d11', 'sosRef': {'bucket': 'aoto.internal.fqrkvhvl.voicegain.ai', 'hash': 'bcf70082-608f-483c-9341-eb9eb71f1d11'}}
{'sessions': [{'sessionId': 'O-2-0luiog9u00skhyxzn7ozj9bgbf0k', 'sessionUrl': 'https://api.voicegain.ai/v1/asr/transcribe/O-2-0luiog9u00skhyxzn7ozj9bgbf0k', 'asyncMode': 'OFF-LINE', 'poll': {'afterlife'

Unnamed: 0,file,utterance,confidence,start,duration,spk,language
0,ttsmaker-file-2024-3-2-14-56-36.mp3,"Agent, thank you for calling Amazon customer s...",0.60791,40,20840,1,[en]
1,ttsmaker-file-2024-3-2-14-56-36.mp3,"Customer, it's missing the power cord and the ...",0.96582,21120,20800,1,[en]
2,ttsmaker-file-2024-3-2-14-56-36.mp3,Have a good day.,0.987793,42080,660,1,[en]


Duration: 324.0118806362152 seconds
THE END


In [15]:
def clean_df(detailed_transcription_df):
    detailed_transcription_df.rename(columns={'utterance': 'Dialogue'}, inplace=True)
    interaction_df = detailed_transcription_df.groupby('file')['Dialogue'].agg(' '.join).reset_index()
    interaction_df['new'] = interaction_df['Dialogue'].str.replace('Agent, ', 'Agent: ').replace('Customer, ', 'Customer: ')
    interaction_df['new'] = interaction_df['new'].replace('Agent:', '\nAgent:').replace('Customer:', '\nCustomer:').replace('.', '.\n').replace('?', '?\n')
    return interaction_df

Unnamed: 0,file,Dialogue,new
0,ttsmaker-file-2024-3-2-14-52-11.mp3,"Agent, thank you for calling Amazon customer s...",Agent: thank you for calling Amazon customer s...
1,ttsmaker-file-2024-3-2-14-52-46.mp3,"Agent, thank you for calling Amazon customer s...",Agent: thank you for calling Amazon customer s...
2,ttsmaker-file-2024-3-2-14-53-45.mp3,"Agent, thank you for calling Amazon customer s...",Agent: thank you for calling Amazon customer s...
3,ttsmaker-file-2024-3-2-14-55-16.mp3,"Agent, thank you for calling Amazon customer s...",Agent: thank you for calling Amazon customer s...
4,ttsmaker-file-2024-3-2-14-56-36.mp3,"Agent, thank you for calling Amazon customer s...",Agent: thank you for calling Amazon customer s...


PII Removal

In [50]:
def pii(text):

    analyzer = AnalyzerEngine()

    regex_order_number = r"(\b\d{6}\b)"  # Adjusted regex for 6-digit order numbers
    order_number = Pattern(name="order number", regex=regex_order_number, score=0.4)
    order_number_recognizer = PatternRecognizer(supported_entity="ORDER", 
                                            patterns=[order_number],
                                            context=["order","order number","order id","id"])
    analyzer.registry.add_recognizer(order_number_recognizer)
    
    regex_credit_card = r"(\b\d{4}\b)"  # Adjusted regex for 4-digit credit card numbers
    credit_card = Pattern(name="cred card", regex=regex_credit_card, score=0.4)
    credit_card_recognizer = PatternRecognizer(supported_entity="CARD", 
                                        patterns=[credit_card],
                                        context=["card", "credit"])
    analyzer.registry.add_recognizer(credit_card_recognizer)

    results = analyzer.analyze(text=text,
                            entities=["PHONE_NUMBER","PERSON","EMAIL_ADDRESS","LOCATION","ORDER","CARD"],
                            language='en')
    
    anonymizer = AnonymizerEngine()
    result = anonymizer.anonymize(
        text=text,
        analyzer_results=results
    )

    return result.text


Loading the Masked Transcriptions in DB

Pipeline

In [None]:
def
#Fetch audio from s3
#transcribe
interaction_df = clean_df(detailed_transcription_df)
interaction_df['Masked_Dialogue'] = interaction_df['Dialogue'].apply(pii)
#load

Finding insights using openAI's GPT 3.5 turbo model (using API and Prompt Engineering)

In [55]:
client = OpenAI(
    # This is the default and can be omitted
    api_key="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",
)

In [56]:
def get_completion_from_messages(system_prompt, user_prompt, interaction, model="gpt-3.5-turbo", temperature=0.4):

    chat_completion = client.chat.completions.create(
        messages =
        [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt + f"Interaction: {interaction}"},
        ],
        model=model,
        temperature=temperature
        
    )

    return chat_completion.choices[0].message.content

In [57]:
def get_prompt():
    system_prompt = """Analyze the provided Amazon agent-customer interaction and extract the following insights:
    1) Issue Category: Identify the main issue or concern raised by the customer, such as "Refund Status", "Product Return", "App Issue", etc.
    2) Detailed Issue: The issue in detail, like if delivery issue, then how many days it was delayed etc.
    3) Problem Resolution: Determine if the problem was successfully resolved during the interaction. Classify as "Resolved" or "Not Resolved".
    4) Customer Tone: Assess the tone of the customer's messages. Classify as "Positive", "Negative", "Neutral", or "Mixed".
    5) Agent Tone: Evaluate the tone of the agent's responses. Classify as "Professional", "Friendly", "Helpful", or "Neutral".
    6) Agent Introduction: Check if the agent introduced themselves at the beginning of the interaction. Classify as "Yes" or "No".
    7) Empathy Level: Gauge the level of empathy displayed by the agent towards the customer's issue. Classify as "High", "Moderate", or "Low".
    8) Clarity of Communication: Assess how clearly the agent communicates instructions or solutions to the customer. Classify as "Clear", "Somewhat Clear", or "Unclear".
    9) Customer Engagement: Determine the level of engagement exhibited by the customer throughout the interaction. Classify as "High", "Moderate", or "Low".
    10) Agent Knowledge: Evaluate the agent's knowledge about the products or services related to the customer's issue. Classify as "High", "Moderate", or "Low".
    11) Follow-up Actions: Identify any follow-up actions promised by the agent to resolve the customer's issue.
    12) Problem Complexity: Assess the complexity of the problem faced by the customer. Classify as "High", "Moderate", or "Low".
    13) Customer Patience: Evaluate the patience exhibited by the customer during the interaction. Classify as "High", "Moderate", or "Low".
    14) Agent Proactiveness: Determine if the agent proactively offers additional assistance or solutions beyond the immediate issue.
    15) Customer Feedback: Capture any feedback provided by the customer regarding their overall experience with the support interaction.
    16) Agent Supportiveness: Evaluate how supportive the agent was in addressing the customer's concerns. Classify as "High", "Moderate", or "Low".
    17) Customer Knowledge: Assess the customer's understanding of the problem and ability to provide relevant details.
    18) Overall Interaction Rating: Provide an overall rating for the interaction based on the combined performance of the agent and the customer satisfaction.

    Output format:
    {
        "issue": [
            {
                "issue_category": "string",
                "detailed_issue": "string",
                "problem_resolution": "Resolved / Not Resolved"
            }
        ],
        "agent_introduction": true // or false,
        "customer_tone": "e.g., Positive",
        "agent_tone": "e.g., Professional",
        "customer_satisfaction": "Satisfied / Not Satisfied",
        "empathy_level": "High / Moderate / Low",
        "clarity_of_communication": "Clear / Somewhat Clear / Unclear",
        "customer_engagement": "High / Moderate / Low",
        "agent_knowledge": "High / Moderate / Low",
        "follow_up_actions": "string or null",
        "problem_complexity": "High / Moderate / Low",
        "customer_patience": "High / Moderate / Low",
        "agent_proactiveness": "Yes / No",
        "customer_feedback": "string or null",
        "agent_supportiveness": "High / Moderate / Low",
        "customer_knowledge": "High / Moderate / Low",
        "overall_interaction_rating": "Rating out of 5 or 10"
    }
    """
    user_prompt = """Please respond in English: 'en'."""

    return system_prompt, user_prompt

In [21]:
def parse_json(response):
    response_json = json.loads(response)
    
    # Extracting issue details
    issue_category_list = []
    detailed_issue_list = []
    problem_resolution_list = []

    for issue_entry in response_json.get('issue', []):
        issue_category_list.append(issue_entry.get('issue_category', ''))
        detailed_issue_list.append(issue_entry.get('detailed_issue', ''))
        problem_resolution_list.append(issue_entry.get('problem_resolution', ''))

    agent_introduction = response_json['agent_introduction']
    customer_tone = response_json['customer_tone']
    agent_tone = response_json['agent_tone']
    customer_satisfaction = response_json['customer_satisfaction']

    empathy_level = response_json.get('empathy_level', '')
    clarity_of_communication = response_json.get('clarity_of_communication', '')
    customer_engagement = response_json.get('customer_engagement', '')
    agent_knowledge = response_json.get('agent_knowledge', '')
    follow_up_actions = response_json.get('follow_up_actions', '')
    problem_complexity = response_json.get('problem_complexity', '')
    customer_patience = response_json.get('customer_patience', '')
    agent_proactiveness = response_json.get('agent_proactiveness', '')
    customer_feedback = response_json.get('customer_feedback', '')
    agent_supportiveness = response_json.get('agent_supportiveness', '')
    customer_knowledge = response_json.get('customer_knowledge', '')
    overall_interaction_rating = response_json.get('overall_interaction_rating', '')

    return (
        issue_category_list, detailed_issue_list, problem_resolution_list, 
        agent_introduction, customer_tone, agent_tone, customer_satisfaction,
        empathy_level, clarity_of_communication, customer_engagement,
        agent_knowledge, follow_up_actions, problem_complexity,
        customer_patience, agent_proactiveness, customer_feedback,
        agent_supportiveness, customer_knowledge, overall_interaction_rating
    )

In [103]:
def interaction_insight_for_one(temp_df, review):
    system_prompt, user_prompt = get_prompt()
    gpt_response = get_completion_from_messages(system_prompt, user_prompt, review)
    print(gpt_response)
    (issue_category_list, detailed_issue_list, problem_resolution_list, 
    agent_introduction, customer_tone, agent_tone, customer_satisfaction,
    empathy_level, clarity_of_communication, customer_engagement,
    agent_knowledge, follow_up_actions, problem_complexity,
    customer_patience, agent_proactiveness, customer_feedback,
    agent_supportiveness, customer_knowledge, overall_interaction_rating) = parse_json(gpt_response)

    print(issue_category_list)

    new_row = {
        'Issue_Category_List': issue_category_list,
        'Detailed_Issue_List': [detailed_issue_list],
        'Problem_Resolution_List': [problem_resolution_list],
        'Agent_Introduction': [agent_introduction],
        'Customer_Tone': [customer_tone],
        'Agent_Tone': [agent_tone],
        'Customer_Satisfaction': [customer_satisfaction],
        'Empathy_Level': [empathy_level],
        'Clarity_of_Communication': [clarity_of_communication],
        'Customer_Engagement': [customer_engagement],
        'Agent_Knowledge': [agent_knowledge],
        'Follow_Up_Actions': [follow_up_actions],
        'Problem_Complexity': [problem_complexity],
        'Customer_Patience': [customer_patience],
        'Agent_Proactiveness': [agent_proactiveness],
        'Customer_Feedback': [customer_feedback],
        'Agent_Supportiveness': [agent_supportiveness],
        'Customer_Knowledge': [customer_knowledge],
        'Overall_Interaction_Rating': [overall_interaction_rating]
    }

    # print(temp_df)
    # print(type(temp_df))
    # print(new_row)

    # Append the new row to the DataFrame
    new_df = pd.DataFrame(new_row)
    result_df = pd.concat([temp_df, new_df], ignore_index=True)
    return result_df

In [None]:
def interaction_insights_for_all(interaction_df):
    interaction_insights_df = pd.DataFrame(columns=['Issue_Category_List', 'Detailed_Issue_List', 'Problem_Resolution_List',
                                            'Agent_Introduction', 'Customer_Tone', 'Agent_Tone', 'Customer_Satisfaction',
                                            'Empathy_Level', 'Clarity_of_Communication', 'Customer_Engagement', 'Agent_Knowledge',
                                            'Follow_Up_Actions', 'Problem_Complexity', 'Customer_Patience', 'Agent_Proactiveness',
                                            'Customer_Feedback', 'Agent_Supportiveness', 'Customer_Knowledge',
                                            'Overall_Interaction_Rating'])
    i=0
    for interaction in interaction_df['Masked_Dialogues']:
        interaction_insights_df = interaction_insight_for_one(interaction_insights_df, interaction)
        i+=1
        print(i)

        if i>2:
            break
    display(interaction_insights_df)
    return interaction_insights_df

Store insights in S3 Buckets

Pipeline

In [None]:
def pipeline:
interaction_insights_df = interaction_insights_for_all(interaction_df)
#store df in s3