## Initial Library Import

In [1]:
import pandas as pd
import numpy as np
import os
import glob
import re

from time import time 
from collections import defaultdict

from LeIA import SentimentIntensityAnalyzer

## Data Import

Import chat file from user input:

In [2]:
file = input("Please input the target file path:")
files = [file]

Please input the target file path:test_chat.txt


Function to generate messages and chats dataframe:

In [3]:
def import_data(chat_files_list):
    # Create dictionaries to structure dataframe data
    chats_dict = {'id':[],'num_client_messages':[],'num_service_messages':[]}
    messages_dict = {'id':[],'chat_id':[],'order_in_chat':[],'text':[],'source':[]}

    line_type = 'n'
    sender_type = 'a'

    # Start global chat and message counter
    message_id = 0
    chat_id = 1
    message_order = 0
    num_client_messages = 0
    num_service_messages = 0

    for chat in chat_files_list:
        with open(chat, 'r',encoding='utf-8') as file:
            # Iterate through each line in chat file
            for line in file:

                # Skip empty line
                if line.strip() == '':
                    continue

                 # End of chat (counters: update chat and restart messages)
                if line.strip() == '_(Não é preciso responder essa mensagem.)_':
                    chats_dict['id'].append(chat_id)
                    chats_dict['num_client_messages'].append(num_client_messages)
                    chats_dict['num_service_messages'].append(num_service_messages)
                    chat_id += 1
                    num_client_messages = 0
                    num_service_messages = 0
                    message_order = 0
                    continue

                # Verification of line type
                try:
                    second_item = line.split(' ')[1]                    
                    devided_second = second_item.split('--')[1]

                    if devided_second == 'Atendente:':
                        sender_type = 'a'
                        line_type = 'new'
                    elif devided_second == 'Cliente:':
                        sender_type = 'c'
                        line_type = 'new'
                    else:
                        line_type = 'continuation'                       
                except:
                    line_type = 'continuation'

                if sender_type == 'a':
                    if line_type == 'new':
                        # Updates and appends messages id to message dict
                        message_id += 1
                        messages_dict['id'].append(message_id)

                        # Appends chat id to message dict
                        messages_dict['chat_id'].append(chat_id)

                        # Updates and adds message order to message dict
                        messages_dict['order_in_chat'].append('NA')

                        # Updates number of service messages
                        num_service_messages += 1

                        # Adds text line to message dict
                        message_text = line.split('Atendente:')[1].strip()
                        messages_dict['text'].append(message_text)

                        # Adds message source
                        messages_dict['source'].append('A')

                    else :
                        # Adds text to previous line
                        messages_dict['text'][message_id-1] = messages_dict['text'][message_id-1].strip() + ' ' + line.strip()

                elif sender_type == 'c':
                    if line_type == 'new':
                        # Updates and appends messages id to message dict
                        message_id += 1
                        messages_dict['id'].append(message_id)

                        # Appends chat id to message dict
                        messages_dict['chat_id'].append(chat_id)

                        # Updates and adds message order to message dict
                        message_order += 1
                        messages_dict['order_in_chat'].append(message_order)

                        # Updates number of client messages
                        num_client_messages += 1

                        # Adds text line to message dict
                        message_text = line.split('Cliente:')[1].strip()
                        messages_dict['text'].append(message_text)

                        # Adds message source
                        messages_dict['source'].append('C')

                    else:
                        # Adds text to previous line
                        messages_dict['text'][message_id-1] = messages_dict['text'][message_id-1].strip() + ' ' + line.strip()
    
    chats_df = pd.DataFrame(data=chats_dict)
    messages_df = pd.DataFrame(data=messages_dict)
    return {'chats':chats_df,'messages':messages_df}


In [4]:
# Transform txt data into pandas dataframe:
dataframes = import_data(files)
test_chat_df, test_messages_df = dataframes['chats'], dataframes['messages']

## Data Processing

Cleanup data from attendant messages:

In [5]:
# Cleanup function
def messages_cleanup(messages_df):
    cleaned_message_df = messages_df[messages_df['source'] == 'C']
    cleaned_message_df.reset_index(drop=True,inplace=True)
    
    return cleaned_message_df

In [6]:
cl_test_messages_df = messages_cleanup(test_messages_df)

## Model Application

Apply LeIA model:

In [7]:
# Function to normalize LeIA compounds:
def extract_leia_sentiment(compound):
    sent_output = {'label':'', 'new_score':0}    
    
    sent_output['new_score'] = (compound + 1) / 2
    
    if compound == 0:
        sent_output['label'] = 0
    elif compound > 0.2:
        sent_output['label'] = 2        
    elif compound > 0:
        sent_output['label'] = 1
    elif compound >= -0.2 :
        sent_output['label'] = -1
    else:
        sent_output['label'] = -2
        
    return sent_output

In [8]:
# LeIA Method Function
def chat_classification(messages_df):
    
    analyzer = SentimentIntensityAnalyzer()
    
    # Apply leia classifier
    classified_df = messages_df.assign(
    score=messages_df['text'].apply(lambda x: analyzer.polarity_scores(x)['compound']))
    
    # Generate labels and normalized classification score
    classified_df = classified_df.assign(
        classification_score = classified_df['score'].apply(lambda x: extract_leia_sentiment(x)['new_score']),
        classification_label = classified_df['score'].apply(lambda x: extract_leia_sentiment(x)['label']),
    )
    
    # Remove unecessary columns
    classified_df.drop(columns=['score','source'],inplace=True)
    
    return classified_df

In [9]:
classified_messages_df = chat_classification(cl_test_messages_df)

## Application of Chat Sentiment Calculation Method

Calculate message weights:

In [10]:
# Function to calculate individual message weight:
def calculate_weight(order:int, n_messages:int):
    if n_messages < 1 :
        raise Exception('There should be at least one message to be analyzed')
    den = 0
    for i in range(1, n_messages + 1):
        den += i**2
    w = (order**2) / den
    
    return w

In [11]:
# Function to generate a dataframe with weighted messages:
def generate_weighted_df(df):
    n_messages = df.shape[0]
    df = df.assign(
        message_weight = df.apply(lambda x: calculate_weight(x['order_in_chat'],n_messages), axis=1)
    )
    
    return df

In [12]:
weighted_df = generate_weighted_df(classified_messages_df)

Calculate whole chat sentiment:

In [13]:
# Function to calculate chat sentiment based on 
# message weights and classification score

def calculate_chat_sentiment_coef(df):
    num = 0
    den = 0
    for idx, row in df.iterrows():
        num += row['classification_label'] * row['message_weight']
        den += row['message_weight']
    coef = num / den
    return coef

In [14]:
coefficient = calculate_chat_sentiment_coef(weighted_df)
print(f"The satisfaction coefficient of the provided chat is: {coefficient}.")

The satisfaction coefficient of the provided chat is: 0.6016042780748664.


In [15]:
# Function to generate the satisfaction label of the chat:
def generate_sentiment_label(coef:float):
    label = ''
    if coef > 2 or coef <-2:
        return 'Houve um erro, por favor entre em contato com o suporte da ChatGuru'
    
    if coef <= -1:
        label = 'Insatisfeito'        
    elif coef < -0.2:
        label = 'Levemente Insatisfeito'
    elif coef <= 0.2:
        label = 'Neutro'
    elif coef < 1:
        label = 'Levemente Satisfeito'
    else:
        label = 'Satisfeito'
    
    return label

In [16]:
sat_label = generate_sentiment_label(coefficient)
print(f'The satisfaction label for the calculated coefficient is "{sat_label}".' )

The satisfaction label for the calculated coefficient is "Levemente Satisfeito".


## Report Creation

### Report Library Import

In [17]:
import reportlab
from reportlab.lib.pagesizes import letter
from reportlab.lib import colors
from reportlab.platypus import SimpleDocTemplate, Paragraph, Table, TableStyle, Spacer, ListFlowable, PageBreak
from  reportlab.platypus.tableofcontents import TableOfContents
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib.styles import ParagraphStyle as PS

Format chat to be inserted into report as analysis object:

In [18]:
# Function to format .txt chat and return it as a list:
def format_chat(chat_path):
    
    chat_text = []
    line_type = ''
    sender_type = ''
    
    with open(chat_path, 'r',encoding='utf-8') as file:
        for line in file:
            
            # Skip empty line
            if line.strip() == '':
                continue

             # End of chat
            if line.strip() == '_(Não é preciso responder essa mensagem.)_':
                break

            # Verification of line type
            try:
                second_item = line.split(' ')[1]             
                devided_second = second_item.split('--')[1]

                if devided_second.lower() == 'atendente:':
                    sender_type = 'a'
                    line_type = 'new'
                elif devided_second.lower() == 'cliente:':
                    sender_type = 'c'
                    line_type = 'new'
                else:
                    line_type = 'continuation'                       
            except:
                line_type = 'continuation'

            if sender_type == 'a':
                if line_type == 'new':
                    # Adds text line whole chat string
                    message_text = line.split('Atendente:')[1].strip()
                    chat_text.append('[Atendente] '+ message_text)

                else :
                    # Adds text to previous line
                    index = len(chat_text) - 1
                    chat_text[index] = chat_text[index] + line.strip()

            elif sender_type == 'c':
                if line_type == 'new':
                    # Adds text line whole chat string
                    message_text = line.split('Cliente:')[1].strip()
                    chat_text.append('[Cliente] '+ message_text)

                else :
                    # Adds text to previous line
                    index = len(chat_text) - 1
                    chat_text[index] = chat_text[index] + line.strip()

    return chat_text

In [19]:
# Create formated chat list
formated_chat = format_chat(file)

Create report (MVP version):

In [20]:
def create_report(report_name:str, formated_chat:str, sentiment_coef:float):
    # Create document
    doc = SimpleDocTemplate(report_name, pagesize=letter)
    
    # Report element list
    elements = []
    # Styles Class Instance
    styles = getSampleStyleSheet()
    # Creates a style for centered text
    centered_style = PS(name='CenteredStyle', parent=styles['Heading3'], alignment=1)
    
    # TITLE  
    title = Paragraph('Relatório de Análise de Sentimento do Chat', styles['Title'])
    elements.append(title)
    elements.append(Spacer(1,20))
    
    # SECTION - "Method for Sentiment Analysis"
    section_title = Paragraph('Método para Análise de Sentimento',styles['Heading1'])
    elements.append(section_title)
    
    # SUBSECTION - "Method Description"
    subtitle = Paragraph('Descrição do Método',styles['Heading2'])
    elements.append(subtitle)
    elements.append(Spacer(1,10))
    
    # TEXT - "method description"
    method_introduction = 'O método para a obtenção da estimativa do sentimento de um cliente durante '\
    'uma interação com o atendimento consiste em :'
    
    text = Paragraph(method_introduction,styles['Normal'])
    elements.append(text)
    elements.append(Spacer(1,10))    
    
    method_list = [
        'Análise do sentimento de todas as mensagens dos clientes',
        'Cálculo do peso de cada mensagem',
        'Cálculo da média do sentimento do chat completo',
        'Interpretação do resultado da análise'
    ]
    
    numbered_list = ListFlowable(
        [Paragraph(f"{item}", styles['Normal']) for i, item in enumerate(method_list, start=1)],
        bulletType='bullet',
        leftIndent=20,
    ) 
    elements.append(numbered_list)
    elements.append(Spacer(1,10))
    
    entry_1 = 'O primeiro passo consiste na aplicação do modelo de aprendizado de máquina treinado para a '\
    'classificação do sentimento do cliente em cada uma das mensagens enviadas para o atendente, gerando assim '\
    'um nível estimado de satisfação do cliente que varia entre "Satisfeito", "Levemente Satisfeito", "Neutro"'\
    ', "Levemente Insatisfeito" ou "Insatisfeito".'
    
    text = Paragraph(entry_1,styles['Normal'])
    elements.append(text)
    elements.append(Spacer(1,10))
    
    entry_2 = 'O que se segue é a transformação das classificações dos sentimentos individuais '\
    'expressos em cada uma das mensagens em pesos matemáticos que compõem o sentimento do cliente '\
    'durante todo o atendimento. Esses pesos são definidos seguindo-se a metodologia formulada internamente'\
    ' pelo time de Inteligência Artificial da ChatGuru.'
    
    text = Paragraph(entry_2,styles['Normal'])
    elements.append(text)
    elements.append(Spacer(1,10))
    
    entry_3 = 'Usando-se parâmetros obtidos do chat completo e do modelo de IA da ChatGuru, é calculado um '\
    'coeficiente numérico de satisfação do atendimento completo.'
    
    text = Paragraph(entry_3,styles['Normal'])
    elements.append(text)
    elements.append(Spacer(1,10))
    
    entry_4 = 'Por fim, esse coeficiente de satisfação é interpretado em termos não-matemáticos para ser '\
    'apreciado pelo contratante do serviço.'
    
    text = Paragraph(entry_4,styles['Normal'])
    elements.append(text)
    elements.append(Spacer(1,20))
    
    # SECTION - "Sentiment Analysis"
    section_title = Paragraph('Análise de Sentimento',styles['Heading1'])
    elements.append(section_title)
    
    # SUBSECTION - "Chat Presentation"
    subtitle = Paragraph('Apresentação do Chat',styles['Heading2'])
    elements.append(subtitle)
    elements.append(Spacer(1,10))
    
    # TEXT - "chat content"
    for line in formated_chat:
        chat = Paragraph(line, styles['Normal'])
        elements.append(chat)
        elements.append(Spacer(1,5))
    
    # SUBSECTION - "Analysis result"
    subtitle = Paragraph('Resultados da Análise',styles['Heading2'])
    elements.append(subtitle)
    elements.append(Spacer(1,10))
    
    # TEXT - "analysis result intro"
    analysis_result = 'Ao se aplicar o método já descrito neste relatório, o coeficiente de satisfação do usuário na '\
    'conversa apresentada como objeto de análise foi de:'
    text = Paragraph(analysis_result, styles['Normal'])
    elements.append(text)
    elements.append(Spacer(1,8))
    
    # TEXT - "sentiment coefficient"
    str_coef = str(round(sentiment_coef,3))
    text = f"coeficiente de satisfação = {str_coef}"
    centered_text = Paragraph(text, centered_style)
    elements.append(centered_text)
    elements.append(Spacer(1,10))
    
    # TEXT - "result interpretation"
    sentiment_label = generate_sentiment_label(sentiment_coef)
    interpretation = f"Dado o coeficiente de satisfação apresentado, podemos estimar que o cliente se sentiu:"
    text = Paragraph(interpretation, styles['Normal'])
    elements.append(text)
    
    centered_text = Paragraph(sentiment_label, centered_style)
    elements.append(centered_text)
    elements.append(Spacer(1,10))

    # Build the rest of the report
    doc.build(elements)

    return doc

In [21]:
import os
report_name = f"sentiment_analysis_report-{file.split('.txt')[0]}.pdf"
try:
    chat_sentiment_report = create_report(report_name, formated_chat, coefficient)
    print(f'The report was created at {os.getcwd()}')
except:
    print('Error on report creation.')

The report was created at C:\Users\higor\OneDrive\Área de Trabalho\Higor\ChatGuru\chatguru-ai-challenge
