# Review Conversations

## Import Libraries

In [5]:
import pandas as pd
import glob
from datetime import datetime

## Get Latest Augmented Data

In [7]:
aug_intent_df = pd.read_csv('data/augmented_data/augmented_intents.csv', encoding='utf-8-sig')

In [8]:
aug_intent_df.head()

Unnamed: 0,text,intent
0,hello,greeting
1,hi,greeting
2,howdy,greeting
3,greetings,greeting
4,welcome,greeting


## Get Conversation Log Data

In [9]:
path = r'data/conversation_logs'
all_files = glob.glob(path + "/*.csv")
li = []

In [10]:
for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

In [11]:
conv_log_df = pd.concat(li, axis=0, ignore_index=True)

In [22]:
conv_log_df.head()

Unnamed: 0,text,intent
1,this is my first conversation,hru.q
3,ya ya ya,hobbies.q
8,hello hello,greeting
9,how is your day?,hru.q
10,let's keep this simple,encouragement.s


## Preprocess

In [13]:
# Drop columns
conv_log_df = conv_log_df.drop(['Unnamed: 0', 'user_session', 'time', 'character', 'bot_response', 'predicted_intent_prob', 'is_toxic', 'is_toxic_prob', 'toxic', 'retrained_label'], axis=1)

In [15]:
# Remove duplicates
conv_log_df = conv_log_df.drop_duplicates(subset='user_input', keep='first')

In [17]:
# Finding unique inputs
conv_log_df = conv_log_df[~conv_log_df['user_input'].isin(aug_intent_df['text'])]

In [21]:
# Change Column Names
conv_log_df.rename(columns = {'user_input': 'text', 'predicted_intent':'intent'}, inplace=True)

In [18]:
datetime_obj = datetime.now()
timestamp_str = datetime_obj.strftime('%d-%b-%Y(%H-%M-%S)')

In [23]:
# Export to reviewed conversations
conv_log_df.to_csv('data/reviewed_conversations/reviewed_conversation_' + timestamp_str + '.csv', index=False)