## Imports and Setup


In [1]:
import functions
import openai

from connection import openai_key

In [7]:
openai.api_key = openai_key

## Chat Import


In [62]:
# chat_id = input('Please enter the chat ID: ')
chat_id = "5ef75e9235d9a3cae54c1973"
print(f"Analisando chat: {chat_id}")

5ef75e9235d9a3cae54c1973


In [63]:
# Retrieve messages:
messages = functions.get_chat_messages(chat_id)


# Create messages dataframe:w
messages_df = functions.import_data(messages)

In [64]:
print("Number of messages in chat:")
messages_df.shape[0]

Number of messages in chat:


628

## Pre-processing


In [66]:
from collections import defaultdict
import pandas as pd
from typing import Dict


def split_dataframe(df: pd.DataFrame, n_parts: int) -> Dict[int, pd.DataFrame]:
    """
    Split a chat dataframe into n parts.
    """
    dataframe_dict = defaultdict()
    n_rows = len(df)
    n_rows_per_part = n_rows // n_parts
    n_rows_leftover = n_rows % n_parts
    start = 0
    end = n_rows_per_part
    for i in range(n_parts):
        if i == n_parts - 1:
            end += n_rows_leftover
        dataframe_dict[i] = df.iloc[start:end]
        start = end
        end += n_rows_per_part
    return dataframe_dict

In [68]:
import pandas as pd
import functions
from typing import List, Dict


def generate_gpt_message_input(
    messages_df: pd.DataFrame, input_type: str = "full"
) -> List[Dict]:
    """ """
    initial_prompt = 'Eu vou fornecer uma conversa entre um cliente(C) e um atendente(A) e você deve me retornar o sentimento do cliente em,\
 no máximo 2 palavras. Por exemplo: "Muito Satisfeito" ou " Insatisfeito".\n \
\nOs sentimentos possíveis do cliente, e consequentemente as opções de classificação de sentimento, são: "Muito Satisfeito", "Satisfeito", "Neutro", "Insatisfeito", "Muito Insatisfeito".'
    single_message = ""
    messages = []
    messages.append({"role": "system", "content": f"{initial_prompt}"})

    if input_type == "client":
        single_message = functions.format_client_messages_to_gpt(messages_df)["string"]
        messages.append({"role": "user", "content": single_message})
    if input_type == "full":
        single_message = functions.format_chat_to_gpt(messages_df)["string"]
        messages.append({"role": "user", "content": single_message})

    return messages

In [70]:
def get_single_chat_gpt_sentiment(gpt_formatted_messages):
    """
    Get sentiment of a single chat using GPT-3
    """
    # Get sentiment of the chat

    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=gpt_formatted_messages,
        temperature=0,
    )
    return response["choices"][0]["message"]["content"]

In [71]:
def get_chat_split(msg_df: pd.DataFrame):
    # Formata o chat para o formato de entrada da OpenAI API
    initial_input_messages = generate_gpt_message_input(msg_df)

    # Calcula o número de tokens do chat
    n_tokens = functions.num_tokens_from_messages(initial_input_messages)

    # Divisão da dataframe baseado nos tokens
    chat_parts = 1

    dataframe_dict = split_dataframe(msg_df, chat_parts)

    # Find minimum division of dataframe for number of tokens smaller than 3k
    while n_tokens > 3000:
        major_n_tokens = 0
        dataframe_dict = split_dataframe(msg_df, chat_parts)

        # Get dataframe with biggest token number and update n_tokens
        for key, df in dataframe_dict.items():
            gpt_input_messages = generate_gpt_message_input(df)
            n_tokens_local = functions.num_tokens_from_messages(gpt_input_messages)
            if n_tokens_local > major_n_tokens:
                major_n_tokens = n_tokens_local
                n_tokens = n_tokens_local

        chat_parts += 1

    return dataframe_dict

In [72]:
def sentiment_to_score(sentiment: str):
    score = 0

    if sentiment == "Satisfeito":
        score = 1
    elif sentiment == "Muito Satisfeito":
        score = 2
    elif sentiment == "Neutro":
        score = 0
    elif sentiment == "Muito Insatisfeito":
        score = -2
    elif sentiment == "Insatisfeito":
        score = -1
    return score

In [104]:
import pandas as pd


def get_gpt_sentiment(messages_df: pd.DataFrame):
    """
    Get the sentiment of a chat in the format of a list of GPT3 messages.

    Input:
        messages_df: Pandas DataFrame, the chat history.

    Output:
        sentiment_output: Dictionay containing "average" 
        (average sentiment scores of all chat parts) and "sentiment_dict"
        (dictonary of scores of each chat part).
    """

    sentiment = 0
    sentiments = defaultdict(lambda: None)

    chat_parts = get_chat_split(messages_df)
    print(f"The chat was divided into {len(chat_parts)} part(s)")
    for order, chat_df in chat_parts.items():
        gpt_formatted_messages = generate_gpt_message_input(chat_df)
        str_sentiment = get_single_chat_gpt_sentiment(gpt_formatted_messages)
        partial_sentiment = sentiment_to_score(str_sentiment)
        sentiment += partial_sentiment
        sentiments[order] = partial_sentiment

    sentiment /= len(chat_parts)

    sentiment_output = {"average": sentiment, "sentiment_dict": sentiments}

    return sentiment_output

In [91]:
gpt_return = get_gpt_sentiment(messages_df)

The chat was divided into 5 part(s)


In [105]:
test

{'average': 1.4,
 'sentiment_dict': defaultdict(None, {0: 2, 1: 1, 2: 1, 3: 1, 4: 2})}