In [1]:
import dialogflow_v2 as dialogflow
import os

credentials_file = 'devbot-qludto-c49ed3f01f08.json'  
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_file

from google.oauth2 import service_account
credentials = service_account.Credentials.from_service_account_file(credentials_file)

In [3]:
def create_intent(project_id, display_name, training_phrases_parts, message_texts):
    """Create an intent of the given intent type."""
    #import dialogflow_v2 as dialogflow
    intents_client = dialogflow.IntentsClient(credentials=credentials)

    parent = intents_client.project_agent_path(project_id)
    training_phrases = []
    for training_phrases_part in training_phrases_parts:
        part = dialogflow.types.Intent.TrainingPhrase.Part(
            text=training_phrases_part)
        # Here we create a new training phrase for each provided part.
        training_phrase = dialogflow.types.Intent.TrainingPhrase(parts=[part])
        training_phrases.append(training_phrase)

    text = dialogflow.types.Intent.Message.Text(text=message_texts)
    message = dialogflow.types.Intent.Message(text=text)

    intent = dialogflow.types.Intent(
        display_name=display_name,
        training_phrases=training_phrases,
        messages=[message])

    response = intents_client.create_intent(parent, intent)

    print('Intent created: {}'.format(response))

In [4]:
project_id = 'newagent-fpxjnq'
language_code = ''
display_name = list(['delete', 'transfer', 'alarm'])
training_phrases_parts = list([['delete the row please.', 'delete the column please.'],
                            ['transfer 50 euro please', 'i want to make transfer', 'transfer money'],
                            ['set alarm', 'set clock at 6', 'remind me at 9pm']])
message_texts = None #'delete the value'

#credentials_file = 'NewAgent-676cf2e40a3f.json'

In [52]:
#create_intent(project_id, display_name, training_phrases_parts, message_texts)

# create intents.

In [5]:
intents_client = dialogflow.IntentsClient()
parent = intents_client.project_agent_path(project_id)

for i in range(len(display_name)):
    training_phrases = []
    for training_phrases_part in training_phrases_parts[i]:
        part = dialogflow.types.Intent.TrainingPhrase.Part(
            text=training_phrases_part)
        # Here we create a new training phrase for each provided part.
        training_phrase = dialogflow.types.Intent.TrainingPhrase(parts=[part])
        training_phrases.append(training_phrase)

    text = dialogflow.types.Intent.Message.Text(text=message_texts)
    message = dialogflow.types.Intent.Message(text=text)

    intent = dialogflow.types.Intent(
        display_name=display_name[i],
        training_phrases=training_phrases,
        messages=[message])

    response = intents_client.create_intent(parent, intent)

    print('Intent created: {}'.format(response))

Intent created: name: "projects/newagent-fpxjnq/agent/intents/c2a01474-1e27-41a8-8709-861ad05dec7c"
display_name: "delete"
priority: 500000
messages {
  text {
  }
}

Intent created: name: "projects/newagent-fpxjnq/agent/intents/c2687222-2a85-44ae-a192-0b5297863033"
display_name: "transfer"
priority: 500000
messages {
  text {
  }
}

Intent created: name: "projects/newagent-fpxjnq/agent/intents/e4de3247-0e1e-42ea-8d3e-6a97e5da205f"
display_name: "alarm"
priority: 500000
messages {
  text {
  }
}



# training the agent model.

In [6]:
client = dialogflow.AgentsClient()

parent = client.project_path(project_id)

response = client.train_agent(parent)


# get the intent ID.

In [7]:
name = display_name[2]

intents_client = dialogflow.IntentsClient()
parent = intents_client.project_agent_path(project_id)

intents = intents_client.list_intents(parent)

intent_names = [
    intent.name for intent in intents
    if intent.display_name == name]

intent_ids = [
    intent_name.split('/')[-1] for intent_name
    in intent_names]

intent_ids

['e4de3247-0e1e-42ea-8d3e-6a97e5da205f']

# detect the intent.

In [9]:
project_id = 'devbot-qludto'

DIALOGFLOW_PROJECT_ID = project_id
DIALOGFLOW_LANGUAGE_CODE = 'en'
SESSION_ID = '1'


#text_to_be_analyzed = "transfer money"
text_to_be_analyzed = "set alarm"


session_client = dialogflow.SessionsClient()
session = session_client.session_path(DIALOGFLOW_PROJECT_ID, SESSION_ID)

text_input = dialogflow.types.TextInput(text=text_to_be_analyzed, language_code=DIALOGFLOW_LANGUAGE_CODE)
query_input = dialogflow.types.QueryInput(text=text_input)

try:
    response = session_client.detect_intent(session=session, query_input=query_input)
except InvalidArgument:
    raise

print("Query text:", response.query_result.query_text)
print("Detected intent:", response.query_result.intent.display_name)
print("Detected intent confidence:", response.query_result.intent_detection_confidence)
#print("Fulfillment text:", response.query_result.fulfillment_text)


Query text: set alarm
Detected intent: alarm
Detected intent confidence: 1.0


# delete all intents.

In [5]:
project_id = 'devbot-qludto'

client = dialogflow.IntentsClient()

parent = client.project_agent_path(project_id)

intents = client.list_intents(parent)
for i in intents:
    print(i.display_name)

print('finished!')

finished!


In [8]:
#test_data.dropna(axis=0, how='any', inplace=True) 

client = dialogflow.IntentsClient()

parent = client.project_agent_path(project_id)

intents = client.list_intents(parent)

response = client.batch_delete_intents(parent, intents)

In [9]:
intents = client.list_intents(parent)
for i in intents:
    print(i.display_name)

# Evaluation

In [1]:
import time
import pandas as pd
import numpy as np

def detect_intent(project_id, text_to_be_analyzed):
    DIALOGFLOW_PROJECT_ID = project_id
    DIALOGFLOW_LANGUAGE_CODE = 'en'
    SESSION_ID = '1'

    #text_to_be_analyzed = "transfer money"
    text_to_be_analyzed = text_to_be_analyzed #"set alarm"


    session_client = dialogflow.SessionsClient()
    session = session_client.session_path(DIALOGFLOW_PROJECT_ID, SESSION_ID)

    text_input = dialogflow.types.TextInput(text=text_to_be_analyzed, language_code=DIALOGFLOW_LANGUAGE_CODE)
    query_input = dialogflow.types.QueryInput(text=text_input)

    try:
        response = session_client.detect_intent(session=session, query_input=query_input)
        return response.query_result.intent.display_name
    
    except: #InvalidArgument:
        pass #raise
        
    '''
    print("Query text:", response.query_result.query_text)
    print("Detected intent:", response.query_result.intent.display_name)
    print("Detected intent confidence:", response.query_result.intent_detection_confidence)
    #print("Fulfillment text:", response.query_result.fulfillment_text)
    '''
    
    


def eval_intent(project_id, test_data, write_file):  # input test data shoulb be in format .csv
    
    for i in range(len(test_data)):
        if i % 150 == 1:
            time.sleep(65)
            print('Evaluating intent need to sleep for 65s!')

        pred_intent = detect_intent(project_id, test_data.loc[i, 'Phrase'])
        test_data.loc[i, 'Pred_intent'] = pred_intent
        
    test_data.dropna(axis=0, how='any', inplace=True)  
    test_data.to_csv('./dialogflow_result/test_data_result.csv')
    print('Evaluation is done!')
    
    
    intent_set = list(set(list(test_data.Intent.unique()) + list(test_data.Pred_intent.unique())))
    con_mat =  np.zeros([len(intent_set), len(intent_set)])  # True * Prediction

    for i in range(len(test_data)):
        con_mat[intent_set.index(test_data.loc[i, 'Intent']), intent_set.index(test_data.loc[i, 'Pred_intent'])] += 1
        np.save(write_file+'/con_mat.npy', con_mat)
    print('confusion matrix is done!')

    
    TP,TN,FP,FN,accuracy,precision,recall,f1 = 0,0,0,0,0,0,0,0
    precision_sum, recall_sum, f1_sum = 0,0,0
    d = 0
    for i in range(len(con_mat)-1):
        TP = con_mat[i][i]
        FN = (np.sum(con_mat[i, :i]) + np.sum(con_mat[i, i+1:]))
        FP = (np.sum(con_mat[:i, i]) + np.sum(con_mat[i+1:, i]))
        TN = (np.sum(con_mat) - TP - FN - FP)

        if  (TP + FP) != 0 and  (TP + FN) != 0:
            d += 1
            accuracy += TP / (TP + TN + FP + FN)

            precision = TP / (TP + FP)
            precision_sum += precision

            recall = TP / (TP + FN)
            recall_sum += recall

            f1 = (2 * precision * recall / (precision + recall))
            f1_sum += f1
    print(accuracy,precision,recall,f1)

    TP = con_mat[-1][-1]
    FN = np.sum(con_mat[len(con_mat)-1, :len(con_mat)-1])
    FP = np.sum(con_mat[:len(con_mat)-1, len(con_mat)-1])
    TN = (np.sum(con_mat) - TP - FN - FP)

    if  (TP + FP) != 0 and  (TP + FN) != 0:
        d += 1
        accuracy += TP / (TP + TN + FP + FN)

        precision = TP / (TP + FP)
        precision_sum += precision

        recall = TP / (TP + FN)
        recall_sum += recall

        f1 = (2 * precision * recall / (precision + recall))
        f1_sum += f1

    print(accuracy, precision_sum/d, recall_sum/d, f1_sum/d)

    overall_accuracy, overall_precision, overall_f1 = accuracy, precision_sum/d, f1_sum/d
    
    return overall_accuracy, overall_precision, overall_f1


In [3]:
project_id = 'newagent-fpxjnq'
df_test = pd.read_csv('./data/df_test.csv', index_col=0)
eval_metrics = []
sub_file_name = 'name'
write_file = './dialogflow_result/sub_datasets/'

In [4]:
# rasa NLU models evaluation part.
overall_accuracy, overall_precision, overall_f1_score = eval_intent(project_id, df_test, write_file)
eval_metrics.append([sub_file_name, overall_accuracy, overall_precision, overall_f1_score])

result = pd.DataFrame(eval_metrics, columns=['model_name', 'overall_accuracy', 'overall_precision', 'overall_f1_score'])
result.to_csv('./dialogflow_result/dialogflow_eval_result.csv')   # have to remove all previous result.csv at first
print('The agent is evaluated successfully!')

Evaluating intent need to sleep for 65s!
Evaluating intent need to sleep for 65s!
Evaluating intent need to sleep for 65s!
Evaluating intent need to sleep for 65s!
Evaluating intent need to sleep for 65s!
Evaluating intent need to sleep for 65s!
Evaluating intent need to sleep for 65s!
Evaluating intent need to sleep for 65s!
Evaluating intent need to sleep for 65s!
Evaluating intent need to sleep for 65s!
Evaluating intent need to sleep for 65s!
Evaluating intent need to sleep for 65s!
Evaluating intent need to sleep for 65s!
Evaluating intent need to sleep for 65s!
Evaluating intent need to sleep for 65s!
Evaluating intent need to sleep for 65s!
Evaluating intent need to sleep for 65s!
Evaluating intent need to sleep for 65s!
Evaluating intent need to sleep for 65s!
Evaluating intent need to sleep for 65s!
Evaluating intent need to sleep for 65s!
Evaluating intent need to sleep for 65s!
Evaluating intent need to sleep for 65s!
Evaluating intent need to sleep for 65s!
Evaluating inten

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/Users/xinsun/anaconda3/envs/dev/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3326, in run_code
  File "<ipython-input-4-8d342b6dd97a>", line 2, in <module>
    overall_accuracy, overall_precision, overall_f1_score = eval_intent(project_id, df_test, write_file)
  File "<ipython-input-2-ac239fe20823>", line 44, in eval_intent
    pred_intent = detect_intent(project_id, test_data.loc[i, 'Phrase'])
  File "<ipython-input-2-ac239fe20823>", line 14, in detect_intent
    session_client = dialogflow.SessionsClient()
  File "/Users/xinsun/anaconda3/envs/dev/lib/python3.7/site-packages/dialogflow_v2/gapic/sessions_client.py", line 187, in __init__
  File "/Users/xinsun/anaconda3/envs/dev/lib/python3.7/site-packages/dialogflow_v2/gapic/transports/sessions_grpc_transport.py", line 69, in __init__
  File "/Users/xinsun/anaconda3/envs/dev/lib/python3.7/site-packages/dialogflow_v2/gapic/transports/sessions_grpc_transport.py", line 99,

OSError: [Errno 24] Too many open files: 'newagent-fpxjnq-08c3138e5a20.json'

In [None]:
print('finish!')

In [20]:
test_data = pd.read_csv('./dialogflow_result/test_data_result.csv', index_col=0)

test_data.dropna(axis=0, how='any', inplace=True)
test_data = test_data.reset_index(drop=True)

intent_set = list(set(list(test_data.Intent.unique()) + list(test_data.Pred_intent.unique())))

con_mat =  np.zeros([len(intent_set), len(intent_set)])  # True * Prediction
print(len(intent_set))
print(intent_set) 

for i in range(len(test_data)):
    #print(test_data.loc[i, 'Intent'])
    #print(test_data.loc[i, 'Pred_intent'])
    try:
        test_data.loc[i, 'Intent']
        test_data.loc[i, 'Pred_intent']
    except:print(i)
    #con_mat[intent_set.index(test_data.loc[i, 'Intent'])][intent_set.index(test_data.loc[i, 'Pred_intent'])] += 1



151
['roll_dice', 'make_call', 'tire_change', 'damaged_card', 'food_last', 'current_location', 'routing', 'play_music', 'timezone', 'date', 'plug_type', 'what_can_i_ask_you', 'cancel', 'shopping_list_update', 'income', 'pto_used', 'distance', 'how_old_are_you', 'confirm_reservation', 'traffic', 'recipe', 'lost_luggage', 'restaurant_reservation', 'uber', 'pay_bill', 'goodbye', 'book_flight', 'calculator', 'yes', 'text', 'redeem_rewards', 'international_visa', 'vaccines', 'credit_limit_change', 'insurance_change', 'mpg', 'change_accent', 'calendar_update', 'calendar', 'timer', 'carry_on', 'ingredients_list', 'payday', 'alarm', 'schedule_meeting', 'account_blocked', 'meeting_schedule', 'spending_history', 'transfer', 'restaurant_reviews', 'report_fraud', 'car_rental', 'flip_coin', 'pin_change', 'insurance', 'find_phone', 'next_holiday', 'gas', 'who_do_you_work_for', 'how_busy', 'restaurant_suggestion', 'cancel_reservation', 'oos', 'pto_request', 'repeat', 'change_volume', 'nutrition_info'

In [21]:
test_data.loc[332]

Phrase         speak in french please
Intent                change_language
Pred_intent           change_language
Name: 332, dtype: object

In [88]:
TP,TN,FP,FN,accuracy,precision,recall,f1 = 0,0,0,0,0,0,0,0
precision_sum, recall_sum, f1_sum = 0,0,0

d = 0
for i in range(len(con_mat)-1):
    TP = con_mat[i][i]
    FN = (np.sum(con_mat[i, :i]) + np.sum(con_mat[i, i+1:]))
    FP = (np.sum(con_mat[:i, i]) + np.sum(con_mat[i+1:, i]))
    TN = (np.sum(con_mat) - TP - FN - FP)
    #(np.sum(con_mat[:i, :i]) + np.sum(con_mat[i+1:, i+1:]) + np.sum(con_mat[:i, i+1:]) + np.sum(con_mat[i+1:, :i]) - TP - TN - FP)
    if  (TP + FP) != 0 and  (TP + FN) != 0:
        d += 1
        accuracy += TP / (TP + TN + FP + FN)
        
        precision = TP / (TP + FP)
        precision_sum += precision
        
        recall = TP / (TP + FN)
        recall_sum += recall
        
        f1 = (2 * precision * recall / (precision + recall))
        f1_sum += f1
        
print(accuracy,precision,recall,f1)


TP = con_mat[-1][-1]
FN = np.sum(con_mat[len(con_mat)-1, :len(con_mat)-1])
FP = np.sum(con_mat[:len(con_mat)-1, len(con_mat)-1])
TN = (np.sum(con_mat) - TP - FN - FP)
#print(TP,FN,FP,TN)

if  (TP + FP) != 0 and  (TP + FN) != 0:
    d += 1
    accuracy += TP / (TP + TN + FP + FN)
    
    precision = TP / (TP + FP)
    precision_sum += precision

    recall = TP / (TP + FN)
    recall_sum += recall

    f1 = (2 * precision * recall / (precision + recall))
    f1_sum += f1
    
print(accuracy, precision_sum/d, recall_sum/d, f1_sum/d)

print(d)

'''
for i in range(len(con_mat)-1):
    TP += con_mat[i][i]
    FN += (np.sum(con_mat[i, :i]) + np.sum(con_mat[i, i+1:]))
    FP += (np.sum(con_mat[:i, i]) + np.sum(con_mat[i+1:, i]))
print(TP,FN,FP)


TP += con_mat[-1][-1]
FN += np.sum(con_mat[len(con_mat)-1, :len(con_mat)-1])
FP += np.sum(con_mat[:len(con_mat)-1, len(con_mat)-1])
print(TP,FN,FP)

accuracy = TP / np.sum(con_mat)
precision = TP / (TP + FP)
recall = TP / (TP + FN)
f1 = 2 * precision * recall / (precision + recall)
print(accuracy,precision,recall,f1,np.sum(con_mat))
'''

0.8060000000000005 0.8571428571428571 0.8 0.8275862068965518
0.8360000000000005 0.9550415457856802 0.8392156862745095 0.8862033722288207
34


'\nfor i in range(len(con_mat)-1):\n    TP += con_mat[i][i]\n    FN += (np.sum(con_mat[i, :i]) + np.sum(con_mat[i, i+1:]))\n    FP += (np.sum(con_mat[:i, i]) + np.sum(con_mat[i+1:, i]))\nprint(TP,FN,FP)\n\n\nTP += con_mat[-1][-1]\nFN += np.sum(con_mat[len(con_mat)-1, :len(con_mat)-1])\nFP += np.sum(con_mat[:len(con_mat)-1, len(con_mat)-1])\nprint(TP,FN,FP)\n\naccuracy = TP / np.sum(con_mat)\nprecision = TP / (TP + FP)\nrecall = TP / (TP + FN)\nf1 = 2 * precision * recall / (precision + recall)\nprint(accuracy,precision,recall,f1,np.sum(con_mat))\n'

In [75]:
print('accuracy: ', accuracy)
print('precision: ', precision)
print('f1: ', f1)

accuracy:  0.836
precision:  0.836
f1:  0.836


In [112]:
test_data = './data/df_test.csv'
ids = np.random.randint(0,5400,3000)
df_test = pd.read_csv(test_data, index_col=0).loc[ids].reset_index(drop=True)


In [113]:
len(ids)
max(ids)
min(ids)

2

In [114]:
df_test

Unnamed: 0,Phrase,Intent
0,tell me what the gas mileage is on my car,mpg
1,i'd like to know the last time my car got look...,last_maintenance
2,can i put in a pto request for september 1st t...,pto_request
3,did carrots get on my shopping list,shopping_list
4,will i get charged for using my credit card in...,international_fees
...,...,...
2995,date please,date
2996,when will i received a replacement card,replacement_card_duration
2997,tell ann and scott where i am,share_location
2998,can you please disconnect from my phone,sync_device


In [5]:
s = 'sdf'
s[::1]

'sdf'

# Loop run

In [1]:
import dialogflow_NLU

import dialogflow_v2 as dialogflow
import os

credentials_file = 'newagent-fpxjnq-08c3138e5a20.json'  
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_file


In [2]:
project_id = 'newagent-fpxjnq'
language_code = ''
display_name = list(['delete', 'transfer', 'alarm'])
training_phrases_parts = list([['delete the row please.', 'delete the column please.'],
                            ['transfer 50 euro please', 'i want to make transfer', 'transfer money'],
                            ['set alarm', 'set clock at 6', 'remind me at 9pm']])

In [3]:
dialogflow_NLU.create_intent(project_id, display_name, training_phrases_parts)

In [6]:
dialogflow_NLU.train_agent(project_id)

In [7]:
text_to_be_analyzed = ['transfer money', 'set alarm']

for text in text_to_be_analyzed:
    dialogflow_NLU.detect_intent(project_id, text)

Query text: transfer money
Detected intent: transfer
Detected intent confidence: 1.0
Query text: set alarm
Detected intent: alarm
Detected intent confidence: 1.0


In [25]:
dialogflow_NLU.delete_all_intents(project_id)

Intent before delete:  0
--------------------
Intent after delete:  0


In [26]:
import pandas as pd

data_file = '/users/xinsun/Downloads/oos-eval-master/data/data_full.json'
df = pd.read_json(data_file, typ='series')

df_oos_train = pd.DataFrame(df['oos_train'], columns=['Phrase', 'Intent'])
df_oos_test = pd.DataFrame(df['oos_test'], columns=['Phrase', 'Intent'])
df_oos_val = pd.DataFrame(df['oos_val'], columns=['Phrase', 'Intent'])
df_train = pd.DataFrame(df['train'], columns=['Phrase', 'Intent'])
df_test = pd.DataFrame(df['test'], columns=['Phrase', 'Intent'])
df_val = pd.DataFrame(df['val'], columns=['Phrase', 'Intent'])

In [27]:
intents_set = list(df_train['Intent'].unique())

In [0]:
df_train.head(2)

In [28]:
phrases_set = [list(df_train[df_train.Intent==intent].Phrase.values) for intent in intents_set]

In [29]:
intents_set[0]

'translate'

In [30]:
phrases_set[0][:2]

['what expression would i use to say i love you if i were an italian',
 "can you tell me how to say 'i do not speak much spanish', in spanish"]

In [14]:
project_id = 'newagent-fpxjnq'
language_code = ''
display_name = intents_set[:30]
training_phrases_parts = phrases_set[:30]

In [15]:
dialogflow_NLU.delete_all_intents(project_id)

Intent before delete:  0
--------------------
Intent after delete:  0


In [16]:
dialogflow_NLU.create_intent(project_id, display_name, training_phrases_parts)

dialogflow_NLU.train_agent(project_id)

In [22]:
text_to_be_analyzed = ['transfer money', 'how can i say in english', 'set the alarm']

for i in range(len(text_to_be_analyzed)):
    dialogflow_NLU.detect_intent(project_id, text_to_be_analyzed[i])


Query text: transfer money
Detected intent: transfer
Detected intent confidence: 0.8908178806304932
Query text: how can i say in english
Detected intent: translate
Detected intent confidence: 0.8272097110748291
Query text: set the alarm
Detected intent: 
Detected intent confidence: 0.0


In [23]:
dialogflow_NLU.delete_all_intents(project_id)

Intent before delete:  50
--------------------
Intent after delete:  0


# Get the sub-dataset

In [26]:
import pandas as pd

data_file = './data/data_full.json'

# get the test dataset
df = pd.read_json(data_file, typ='series')
df_test = pd.DataFrame(df['test'], columns=['Phrase', 'Intent'])
df_oov_test = pd.DataFrame(df['oos_test'], columns=['Phrase', 'Intent'])
df_test = pd.concat([df_test, df_oov_test]).reset_index(drop=True)

write_test_file = './data/df_test.csv'

df_test[['Phrase', 'Intent']].to_csv(write_test_file) 

In [None]:
test = pd.read_csv(write_test_file, index_col=0)
len(test.Intent.unique())
test

In [None]:
a

In [146]:
import pandas as pd

d1 = './dialogflow_result/cl_cluster/dialogflow_eval_result_cl_1.csv'
d2 = './dialogflow_result/cl_cluster/dialogflow_eval_result_cl_2.csv'
d3 = './dialogflow_result/cl_cluster/dialogflow_eval_result_cl_3.csv'
d4 = './dialogflow_result/cl_cluster/dialogflow_eval_result_cl_4.csv'
d5 = './dialogflow_result/cl_cluster/dialogflow_eval_result_cl_5.csv'
d6 = './dialogflow_result/cl_cluster/dialogflow_eval_result_cl_6.csv'
d7 = './dialogflow_result/cl_cluster/dialogflow_eval_result_cl_7.csv'
d8 = './dialogflow_result/cl_cluster/dialogflow_eval_result_cl_8.csv'

d9 = './dialogflow_result/cl_cluster/dialogflow_eval_result_cl_9.csv'
d10 = './dialogflow_result/cl_cluster/dialogflow_eval_result_cl_10.csv'
d11 = './dialogflow_result/cl_cluster/dialogflow_eval_result_cl_11.csv'
d12 = './dialogflow_result/cl_cluster/dialogflow_eval_result_cl_12.csv'
d13 = './dialogflow_result/cl_cluster/dialogflow_eval_result_cl_13.csv'

model_set_cl = eval(open('./dialogflow_result/cl_cluster/model_set_cl.txt', 'r').readlines()[0])

model_set_cl_2 = eval(open('./dialogflow_result/cl_cluster/model_set_cl_2.txt', 'r').readlines()[0])
print(len(model_set_cl))
print(len(model_set_cl_2))

# get the test dataset
df1 = pd.read_csv(d1, index_col=0)
df2 = pd.read_csv(d2, index_col=0)
df3 = pd.read_csv(d3, index_col=0)
df4 = pd.read_csv(d4, index_col=0)
df5 = pd.read_csv(d5, index_col=0)
df6 = pd.read_csv(d6, index_col=0)
df7 = pd.read_csv(d7, index_col=0)
df8 = pd.read_csv(d8, index_col=0)

df9 = pd.read_csv(d9, index_col=0)
df10 = pd.read_csv(d10, index_col=0)
df11 = pd.read_csv(d11, index_col=0)
df12 = pd.read_csv(d12, index_col=0)
df13 = pd.read_csv(d13, index_col=0)


df_cl = pd.concat([df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11, df12, df13]).reset_index(drop=True)
print(df_cl.shape)

31
91
(44, 10)


In [147]:
df_cl.head(2)

Unnamed: 0,model_name,dim1_rm,dim2_sent_len,dim3_sent_num,dim4_pattern,dim5_SDPs,dim6_keywords,overall_accuracy,overall_precision,overall_f1_score
0,False-False-False-question-True-False,False,False,False,question,True,False,0.488743,0.619705,
1,False-False-False-question-False-True,False,False,False,question,False,True,0.568227,0.665787,


In [148]:
d0 = './dialogflow_result/gcp/dialogflow_eval_result_0.csv'
d1 = './dialogflow_result/gcp/dialogflow_eval_result_1.csv'
d2 = './dialogflow_result/gcp/dialogflow_eval_result_2.csv'
d3 = './dialogflow_result/gcp/dialogflow_eval_result_3.csv'
d4 = './dialogflow_result/gcp/dialogflow_eval_result_4.csv'
d5 = './dialogflow_result/gcp/dialogflow_eval_result_5.csv'
d6 = './dialogflow_result/gcp/dialogflow_eval_result_6.csv'
d7 = './dialogflow_result/gcp/dialogflow_eval_result_7.csv'
d8 = './dialogflow_result/gcp/dialogflow_eval_result_8.csv'
d9 = './dialogflow_result/gcp/dialogflow_eval_result_9.csv'
d10 = './dialogflow_result/gcp/dialogflow_eval_result_10.csv'
d11 = './dialogflow_result/gcp/dialogflow_eval_result_11.csv'
d12 = './dialogflow_result/gcp/dialogflow_eval_result_12.csv'
d13 = './dialogflow_result/gcp/dialogflow_eval_result_13.csv'

model_set_gcp = eval(open('./dialogflow_result/gcp/model_set.txt', 'r').readlines()[0])
print(len(model_set_gcp))


# get the test dataset
df0 = pd.read_csv(d0, index_col=0)
df1 = pd.read_csv(d1, index_col=0)
df2 = pd.read_csv(d2, index_col=0)
df3 = pd.read_csv(d3, index_col=0)
df4 = pd.read_csv(d4, index_col=0)
df5 = pd.read_csv(d5, index_col=0)
df6 = pd.read_csv(d6, index_col=0)
df7 = pd.read_csv(d7, index_col=0)
df8 = pd.read_csv(d8, index_col=0)
df9 = pd.read_csv(d9, index_col=0)
df10 = pd.read_csv(d10, index_col=0)
df11 = pd.read_csv(d11, index_col=0)
df12 = pd.read_csv(d12, index_col=0)
df13 = pd.read_csv(d13, index_col=0)


df_gcp = pd.concat([df0, df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11, df12, df13]).reset_index(drop=True)
print(df_gcp.shape)

60
(50, 10)


In [149]:
model_set_plan = model_set_gcp
for i in model_set_cl_2:
    if i not in model_set_plan:
        model_set_plan.append(i)

print(len(model_set_plan))

model_set_plan

90


[[True, False, False, False, False, True],
 [True, False, False, False, False, False],
 [True, False, False, 'statement', True, False],
 [True, False, False, 'statement', False, True],
 [True, False, False, 'statement', False, False],
 [True, False, False, 'question', True, False],
 [True, False, False, 'question', False, True],
 [True, False, False, 'question', False, False],
 [True, False, 15, False, True, False],
 [True, False, False, False, True, False],
 [True, False, 15, False, False, True],
 [True, False, 15, False, False, False],
 [True, False, 15, 'statement', True, False],
 [True, False, 15, 'statement', False, True],
 [True, False, 15, 'statement', False, False],
 [True, False, 15, 'question', True, False],
 [True, False, 15, 'question', False, True],
 [True, False, 15, 'question', False, False],
 [True, False, 50, False, True, False],
 [True, False, 50, False, False, True],
 [True, False, 50, False, False, False],
 [True, False, 50, 'statement', True, False],
 [True, False,

In [128]:
'''df_gcp['model']=None
for i in range(len(df_gcp)):
    df_gcp.loc[i, 'model'] = str(df_gcp.loc[i,'dim1_rm'])+', '+str(df_gcp.loc[i,'dim2_sent_len'])+', '+str(df_gcp.loc[i,'dim3_sent_num'])+', '+str(df_gcp.loc[i,'dim4_pattern'])+', '+str(df_gcp.loc[i,'dim5_SDPs'])+', '+str(df_gcp.loc[i,'dim6_keywords'])

model_gcp_done=[]
for i in df_gcp['model'].values:
    model_gcp_done.append(i)'''


'''df_cl['model']=None
for i in range(len(df_cl)):
    df_cl.loc[i, 'model'] = df_cl.loc[i,'model_name'].replace('-', ', ')
model_cl_done = list(df_cl['model'].values)'''


"df_cl['model']=None\nfor i in range(len(df_cl)):\n    df_cl.loc[i, 'model'] = df_cl.loc[i,'model_name'].replace('-', ', ')\nmodel_cl_done = list(df_cl['model'].values)"

In [129]:
'''f_gcp = open('./dialogflow_result/gcp/model_gcp_done.txt', 'w+')
f_gcp.write(str(model_gcp_done))
f_gcp.close()'''

'''f_cl = open('./dialogflow_result/cl_cluster/model_cl_done.txt', 'w+')
f_cl.write(str(model_cl_done))
f_cl.close()'''

"f_cl = open('./dialogflow_result/cl_cluster/model_cl_done.txt', 'w+')\nf_cl.write(str(model_cl_done))\nf_cl.close()"

In [133]:
model_gcp_done = eval(open('./dialogflow_result/gcp/model_gcp_done.txt', 'r').readlines()[0])

model_cl_done = eval(open('./dialogflow_result/cl_cluster/model_cl_done.txt', 'r').readlines()[0])


In [137]:
'''
model_set_done = model_gcp_done
for i in model_cl_done:
    if i not in model_set_done:
        model_set_done.append(i)
'''

In [139]:
'''len(model_set_done)

f_done = open('./dialogflow_result/model_set_done.txt', 'w+')
f_done.write(str(model_set_done))
f_done.close()

f_plan = open('./dialogflow_result/model_set_plan.txt', 'w+')
f_plan.write(str(model_set_plan))
f_plan.close()'''

In [154]:
df_result = pd.concat([df_gcp, df_cl])

df_result_nodup = df_result.drop_duplicates('model_name', keep='first', inplace=False).reset_index(drop=True)

In [8]:
df_result_nodup.to_csv('/Users/xinsun/Dev_env/Bot.Dataset.final/Dialogflow/dialogflow_result/dialogflow_result_final.csv')