In [26]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import manners_db_experiment_utils
from tqdm import tqdm
import os
import csv
import utils
from transformers import T5Tokenizer, T5ForConditionalGeneration

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [27]:
answer_choices = ['A', 'B', 'C', 'D', 'E']
robot_actions_list = ['Vacuum cleaning',
                      'Mopping the floor', 'Carry warm food', 'Carry cold food',
                      'Carry drinks', 'Carry small objects (plates, toys)',
                      'Carry big objects (tables, chairs)',
                      'Cleaning (Picking up stuff) / Starting conversation']

df = pd.read_csv('./data/not_normallized.csv')
test_set_df = pd.read_csv('./data/data_test.csv')
stamp_list = sorted(list(set(test_set_df['Stamp'].to_list())))

In [28]:
manners_t5_results_csv_path = './results/manners_db_t5_results.csv'
if not os.path.exists(manners_t5_results_csv_path):
    # Load model
    max_memory = {0: "20GIB", 1: "20GIB", 2: "20GIB", 3: "20GIB"}
    t5_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xxl", device_map="auto", max_memory=max_memory)
    t5_tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-xxl")

    manners_t5_results_file = open(manners_t5_results_csv_path, 'w')
    writer = csv.writer(manners_t5_results_file)
    header = ['stamp_id', 'prompt', 'action']
    header += answer_choices
    header.append('sum of prob')
    writer.writerow(header)

    for stamp_idx in tqdm(stamp_list):
        row = df[df['Stamp'] == stamp_idx].iloc[0]
        for action in robot_actions_list:
            template = manners_db_experiment_utils.create_prompt_t5(row, action)
            answer_probs = utils.get_probs_t5([template], answer_choices, t5_model, t5_tokenizer)[0]
            sum_of_probs = sum(answer_probs)
            result_row = [stamp_idx, template, action] + answer_probs + [sum_of_probs]
            writer.writerow(result_row)

    manners_t5_results_file.close()

In [29]:
manners_davinci_results_csv_path = './results/manners_db_davinci_results.csv'
if not os.path.exists(manners_davinci_results_csv_path):
    manners_davinci_results_file = open(manners_davinci_results_csv_path, 'w')
    writer = csv.writer(manners_davinci_results_file)
    header = ['stamp_id', 'prompt', 'action']
    header += answer_choices
    header.append('sum of prob')
    writer.writerow(header)

    for stamp_idx in tqdm(stamp_list):
        row = df[df['Stamp'] == stamp_idx].iloc[0]
        for action in robot_actions_list:
            template = manners_db_experiment_utils.create_prompt_davinci(row, action)
            answer_probs = utils.get_probs_davinci(template, answer_choices)
            sum_of_probs = sum(answer_probs)
            result_row = [stamp_idx, template, action] + answer_probs + [sum_of_probs]
            writer.writerow(result_row)

    manners_davinci_results_file.close()

In [30]:
davinci_results_df = pd.read_csv(manners_davinci_results_csv_path)
human_results_df = pd.read_csv('./data/data_test.csv')
manners_db_experiment_utils.analyze_result(davinci_results_df, human_results_df)

RMSE RESULTS:

Action Vacuum cleaning:
Arrow: RMSE 0.8896833242822784
Circle: RMSE 1.06752904250357
Overall: RMSE 0.971952893252713
------------------------------------
Action Mopping the floor:
Arrow: RMSE 0.8303646404036941
Circle: RMSE 1.1010504711846392
Overall: RMSE 0.9589266842780769
------------------------------------
Action Carry warm food:
Arrow: RMSE 0.6552871676125358
Circle: RMSE 0.5990757917011625
Overall: RMSE 0.6311712178087133
------------------------------------
Action Carry cold food:
Arrow: RMSE 0.5975874412789809
Circle: RMSE 0.8898529214216014
Overall: RMSE 0.7405341569172668
------------------------------------
Action Carry drinks:
Arrow: RMSE 0.6133566633575033
Circle: RMSE 0.44370554585547123
Overall: RMSE 0.5452526121511978
------------------------------------
Action Carry small objects (plates, toys):
Arrow: RMSE 0.6389389429086068
Circle: RMSE 0.5482293278096858
Overall: RMSE 0.6007166045079696
------------------------------------
Action Carry big objects (t

In [31]:
t5_results_df = pd.read_csv(manners_t5_results_csv_path)
human_results_df = pd.read_csv('./data/data_test.csv')
manners_db_experiment_utils.analyze_result(t5_results_df, human_results_df)

RMSE RESULTS:

Action Vacuum cleaning:
Arrow: RMSE 1.06420951528231
Circle: RMSE 0.9255738860700994
Overall: RMSE 1.0055673761248956
------------------------------------
Action Mopping the floor:
Arrow: RMSE 0.8929403958978613
Circle: RMSE 0.815586870400181
Overall: RMSE 0.8597626901332248
------------------------------------
Action Carry warm food:
Arrow: RMSE 0.6740623092988973
Circle: RMSE 0.7193649429539387
Overall: RMSE 0.6943597075898177
------------------------------------
Action Carry cold food:
Arrow: RMSE 0.6860830998419635
Circle: RMSE 0.7127685278503182
Overall: RMSE 0.697950399259879
------------------------------------
Action Carry drinks:
Arrow: RMSE 0.6430649250823601
Circle: RMSE 0.6369034627281756
Overall: RMSE 0.6403611855627444
------------------------------------
Action Carry small objects (plates, toys):
Arrow: RMSE 0.5722820332859196
Circle: RMSE 0.4805930379903932
Overall: RMSE 0.5338824031008821
------------------------------------
Action Carry big objects (tab