# Customer Intention Analysis

## 1/ Import libraries

In [86]:
import pandas as pd
import numpy as np
import csv

In [87]:
from sqlalchemy import Column, String, Integer, ForeignKey, DateTime, func, Boolean, MetaData, Table, Float
from sqlalchemy.dialects.mysql import TINYINT
from sqlalchemy.orm import relationship, backref
from sqlalchemy.ext.declarative import declarative_base

In [88]:
import requests
import json

In [89]:
from tqdm import tqdm

## 2/ Create an engine

In [90]:
from sqlalchemy import create_engine
engine = create_engine('mysql+mysqldb://phuongdaingo:0505@localhost:3306/customerintention', echo=True) 

## 3/ Design and map SQL Alchemy's table with TablePlus relational database's table

In [91]:
Base = declarative_base() 
metadata = MetaData(bind=engine) 

class Conversation_Intention(Base):
    __tablename__ = Table('conversation_intention', Base.metadata,
                    autoload=True, autoload_with=engine) # metadata goes from database 
    # Database (TablePlus) will regularize PK, Python won't dp so (primary_key=True) since this is for mapping tables only. 
    # If Python is used for creating tables, we will need ID as a PK so 'primary_key=True' will be included.
    id = Column(Integer, primary_key=True) 
    conversation_id = Column(Integer)
    reference_id = Column(Integer)
    intention = Column(String())
    score = Column(Float) # must have data type, Integer doesn't need to have Integer(8)
    
class Conversation_Entities(Base):
    __tablename__ = Table('conversation_entities', Base.metadata,
                    autoload=True, autoload_with=engine) # metadata goes from database 
    # Database (TablePlus) will regularize PK, Python won't dp so (primary_key=True) since this is for mapping tables only. 
    # If Python is used for creating tables, we will need ID as a PK so 'primary_key=True' will be included.
    id = Column(Integer, primary_key=True) 
    conversation_id = Column(Integer)
    conversation_entity = Column(Integer)
    conversation_entity_score = Column(Float)
    conversation_entity_string = Column(String()) # must have data type, Integer doesn't need to have Integer(8)
    
# Mapping classes with tables in TablePlus's databases
# Should not create tables by Python but TablePlus
from sqlalchemy.orm import sessionmaker
Session = sessionmaker(bind=engine) 
session = Session() # object

2021-10-13 01:05:59,757 INFO sqlalchemy.engine.Engine SHOW VARIABLES LIKE 'sql_mode'
2021-10-13 01:05:59,758 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-10-13 01:05:59,774 INFO sqlalchemy.engine.Engine SHOW VARIABLES LIKE 'lower_case_table_names'
2021-10-13 01:05:59,775 INFO sqlalchemy.engine.Engine [generated in 0.00065s] ()
2021-10-13 01:05:59,779 INFO sqlalchemy.engine.Engine SELECT DATABASE()
2021-10-13 01:05:59,779 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-10-13 01:05:59,786 INFO sqlalchemy.engine.Engine SHOW CREATE TABLE `conversation_intention`
2021-10-13 01:05:59,787 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-10-13 01:05:59,803 INFO sqlalchemy.engine.Engine SHOW CREATE TABLE `conversation_entities`
2021-10-13 01:05:59,803 INFO sqlalchemy.engine.Engine [raw sql] ()


In [93]:
df_Conversation = pd.read_csv("C:\Programming\CustomerIntention\src\data\Conversation.csv", encoding = 'utf-8') 

In [None]:
df_Conversation.head()

In [None]:
df_Conversation = df_Conversation.set_index('Message_ID')

In [None]:
df_Conversation.head()

In [100]:
df_customer_filtered_Conversation = pd.read_csv("C:\Programming\CustomerIntention\src\data\customer_filtered_Conversation.csv", encoding = 'utf-8') 

In [None]:
df_customer_filtered_Conversation.head()

In [None]:
df_customer_filtered_Conversation = df_customer_filtered_Conversation.set_index('Message_ID')

In [None]:
df_customer_filtered_Conversation.head()

##  4/ Prepare to insert all rows from Conversation to CSV files

In [102]:
# object_length = df_customer_filtered_Conversation.shape[0]
# object_ids = df_customer_filtered_Conversation['Message_ID'].values[:object_length] 
# object_messages = df_customer_filtered_Conversation['Message'].values[:object_length] 

### Test with the first 19 rows (move on)

In [None]:
object_length = 19
object_ids = df_customer_filtered_Conversation['ID'].values[:object_length]
object_messages = df_customer_filtered_Conversation['Message'].values[:object_length]

### Create ID for Intention Table

In [104]:
intention_table_id = list(range(object_length))
#print(intention_table_id)

### Create ID for Entities Table

In [105]:
entities_table_id = list(range(object_length))
#print(entities_table_id)

### Create 6 new lists to containt values of responses

In [106]:
conversation_intention_conversation_id = []

conversation_entity = []
conversation_entity_score = []
conversation_entity_word = []

conversation_intention_label = []
conversation_intention_score = []


### Test with 2 requests (move on)

In [65]:
import requests
import json

url = 'http://nni.cot.ai:19721/query'
temp = [{
    "reference_id": 0,
    "text": "Tôi nặng 85 kg dùng thuốc giảm cân được không?"
},
{
    "reference_id": 1,
    "text": "Tôi bị tim dùng thuốc giảm cân được không?"
}]
response = requests.post(url, json = temp)

    

In [66]:
response.json()

[{'reference_id': 0,
  'intent': [{'label': 'CCTT-tình trạng bản thân', 'score': 0.683584451675415},
   {'label': 'Hỏi-TTSP-bệnh', 'score': 0.2502438724040985},
   {'label': 'Hỏi-cách sử dụng', 'score': 0.006138267926871777}],
  'entities': [{'entity': 'cân nặng',
    'score': 0.002189846243709326,
    'word': '85 kg'}]},
 {'reference_id': 1,
  'intent': [{'label': 'Hỏi-TTSP-bệnh', 'score': 0.9590045213699341},
   {'label': 'Hỏi-cách sử dụng', 'score': 0.0028975072782486677},
   {'label': 'Hỏi-đảm bảo về sản phẩm', 'score': 0.0021935892291367054}],
  'entities': [{'entity': 'bệnh lý',
    'score': 0.001567621249705553,
    'word': 'tim'}]}]

In [67]:
for item in response.json(): # loop through each result in the my_object batch of 1000 rows
    #print(results[i])
    conversation_intention_conversation_id.append(item['reference_id'])

    if item['entities'] == []:
        conversation_entity.append("")
        conversation_entity_score.append("")
        conversation_entity_word.append("")
    else: # getting the first entity only
        conversation_entity.append(item['entities'][0]['entity'])
        conversation_entity_score.append(item['entities'][0]['score'])
        conversation_entity_word.append(item['entities'][0]['word'])
        
        # Append the reference_id of the chat line, the customer's most possible intention (intent), the customer's intention highest score
    conversation_intention_label.append(item['intent'][0]['label'])
    conversation_intention_score.append(item['intent'][0]['score'])           

In [None]:
print(conversation_intention_conversation_id)

In [None]:
print(conversation_entity)

In [None]:
print(conversation_entity_score)

In [None]:
print(conversation_entity_word)

In [None]:
print(conversation_intention_label)

In [None]:
print(conversation_intention_score)

In [None]:
# 1st reponse
response.json()[0]

In [None]:
# ref_id
response.json()[0]['reference_id']

In [None]:
# intent
response.json()[0]['intent']

In [None]:
# intent 0 with the highest score
response.json()[0]['intent'][0]

In [None]:
# intent 0 with the highest score - label
response.json()[0]['intent'][0]['label']

In [None]:
# intent 0 with the highest score - label
response.json()[0]['intent'][0]['score']

In [None]:
# List type
type(response.json())

In [None]:
# Dict type
type(response.json()[0])

## 5/ Generate 500 responses per batch from API

I stopped at 1% as it would be at least 10 hours to plug all responses of intentions and entities to these lists. This is just a demo.

In [None]:
import requests
import json

url = 'http://nni.cot.ai:19721/query'

my_object = []
for i, (id, m) in tqdm(enumerate(zip(object_ids, object_messages)), total = len(object_ids)): 
    temp = {
        "reference_id": int(id),
        "text": m
    }
    my_object.append(temp)
    if (i+1) % 500 == 0 or i == len(object_ids) - 1: # 2 cases: divisible or last iteration 
        response = requests.post(url, json = my_object)
        my_object = []        
    for item in response.json(): # loop through each result in the my_object batch of 1000 rows
        #print(item)
        conversation_intention_conversation_id.append(item['reference_id'])

        if item['entities'] == []:
            conversation_entity.append("")
            conversation_entity_score.append("")
            conversation_entity_word.append("")
        else:
            conversation_entity.append(item['entities'][0]['entity'])
            conversation_entity_score.append(item['entities'][0]['score'])
            conversation_entity_word.append(item['entities'][0]['word'])

            # Append the reference_id of the chat line, the customer's most possible intention (intent), the customer's intention highest score
        conversation_intention_label.append(item['intent'][0]['label'])
        conversation_intention_score.append(item['intent'][0]['score'])       

  1%|▊                                                                       | 17980/1643453 [08:03<6:06:44, 73.87it/s]

### Print out all 6 lists for testing the 19 rows (move on) 

In [None]:
print(conversation_intention_conversation_id)

print(conversation_entity)
print(conversation_entity_score) 
print(conversation_entity_word) 

print(conversation_intention_label) 
print(conversation_intention_score) 

## 6/ Create 2 lists of IDs for the 2 new CSV files

### 6.1/ Create ID for Entities Table:

In [None]:
# Create ID for Entities Table:
entities_table_id = list(range(object_length))
print(entities_table_id)

### 6.2/ Create ID for Intention Table:

In [None]:
# Create ID for Intention Table:
intention_table_id = list(range(object_length))
print(intention_table_id)

The 6 new lists contain Entity, Entity Score, Entity Word, Conversation ID, Intention Label, Intention Score of the Customer Intention table

In [46]:
# print(results[0])
# print(results[0]['entities'])
# print(results[1]['entities'])
# print(results[1]['entities'][0]['entity'])
# print(results[1]['entities'][0]['score'])
# print(results[1]['entities'][0]['word'])

{'reference_id': 0, 'intent': [{'label': 'chào hỏi', 'score': 0.8976910710334778}, {'label': 'Chat tự động', 'score': 0.009650635533034801}, {'label': 'Phàn nàn-giao thiếu', 'score': 0.007310293149203062}], 'entities': []}
[]
[{'entity': 'tên', 'score': 0.0023483354598283768, 'word': 'Nha Kim'}]
tên
0.0023483354598283768
Nha Kim


In [47]:
# conversation_intention_conversation_id = []

# conversation_entity = []
# conversation_entity_score = []
# conversation_entity_word = []

# conversation_intention_label = []
# conversation_intention_score = []

# for i in range(object_length): # loop through each result
#     conversation_intention_conversation_id.append(results[i]['reference_id'])
    
#     if results[i]['entities'] == []:
#         conversation_entity.append("")
#         conversation_entity_score.append("")
#         conversation_entity_word.append("")
#     else:
#         conversation_entity.append(results[i]['entities'][0]['entity'])
#         conversation_entity_score.append(results[i]['entities'][0]['score'])
#         conversation_entity_word.append(results[i]['entities'][0]['word'])
        
#     # Append the reference_id of the chat line, the customer's most possible intention (intent), the customer's intention highest score
#     conversation_intention_label.append(results[i]['intent'][0]['label'])
#     conversation_intention_score.append(results[i]['intent'][0]['score'])

## 7/ Plug all 4 lists into the Customer Intention Data Frame

In [None]:
df_Conversation_Entities = pd.DataFrame(np.column_stack([entities_table_id, conversation_intention_conversation_id, conversation_entity, conversation_entity_score, conversation_entity_word]), 
                               columns=['ID','Conversation_ID', 'Conversation_Entity', 'Conversation_Entity_Score', 'Conversation_Entity_Word'])

In [None]:
df_Conversation_Entities.head(19)

In [None]:
df_Conversation_Intention = pd.DataFrame(np.column_stack([intention_table_id, conversation_intention_conversation_id, conversation_intention_label, conversation_intention_score]), 
                               columns=['ID','Conversation_ID', 'Intention_Label', 'Intention_Score'])

In [None]:
df_Conversation_Intention.head(19)

## 9/ Save the df_Customer_Intention for Visualization in Tableau

In [54]:
df_Conversation_Intention.to_csv('C:\Programming\CustomerIntention\src\data\Conversation_Intention.csv', encoding='utf-8')

In [55]:
df_Conversation_Entities.to_csv('C:\Programming\CustomerIntention\src\data\Conversation_Entities.csv', encoding='utf-8')