In [1]:
%load_ext autoreload
%autoreload 2
import trust_transfer_experiment_utils
import pandas as pd
import csv
import os
from tqdm import tqdm
import sys
sys.path.append('../')
import llm_utils
from transformers import T5Tokenizer, T5ForConditionalGeneration

task_descr = {
    'household': [
        ' ',
        'Pick and place a glass',
        'Pick and place a plastic can',
        'Pick and place a lemon',
        'Pick and place a plastic bottle',
        'Pick and place an apple',
        'Pick and place a plastic cup',
        'Navigate while avoiding moving people',
        'Navigate to the main room door',
        'Navigate while following a person',
        'Navigate to the dining table',
        'Navigate while avoiding obstacles',
        'Navigate to the living room'
    ],
    'driving': [
        ' ',
        'Parking backwards cars and people around, misaligned',
        'Parking backwards empty lot, misaligned',
        'Parking backwards cars and people around, aligned',
        'Parking forwards empty lot, aligned',
        'Parking forwards cars and people around, misaligned',
        'Parking forwards empty lot, misaligned',
        'Navigating lane merge with other moving vehicles',
        'Navigating lane merge on a clear road',
        'Navigating traffic-circle with other moving vehicles',
        'Navigating traffic-circle on a clear road',
        'Navigating T-junction with other moving vehicles',
        'Navigating T-junction on a clear road',
    ]
}

In [2]:
driving_csv_path = './data/trust_transfer_driving_cleaned.csv'
household_csv_path = './data/trust_transfer_household_cleaned.csv'

trust_transfer_driving_df = pd.read_csv(driving_csv_path)
trust_transfer_household_df = pd.read_csv(household_csv_path)

dom_list = ['household', 'driving']
prompt_structure_list = ['base', 'altered']

answer_choices = [str(i) for i in range(1, 8)]

In [3]:
model_loaded = False
for dom in dom_list:
    for prompt_structure in prompt_structure_list:
        post_obs_trust_csv_path = f'./results/trust_transfer_t5_post_obs_{dom}_{prompt_structure}.csv'
        if not os.path.exists(post_obs_trust_csv_path):
            # Load model
            if not model_loaded:
                max_memory = {0: "20GIB", 1: "20GIB", 2: "20GIB", 3: "20GIB"}
                t5_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xxl", device_map="auto", max_memory=max_memory)
                t5_tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-xxl")
                model_loaded = True

            post_obs_trust_csv_file = open(post_obs_trust_csv_path, 'w')
            writer = csv.writer(post_obs_trust_csv_file)
            header = ['id', 'test_task_descr_1', 'test_task_descr_2', 'test_task_descr_3', 'obs_task_descr_1', 'obs_task_descr_2', 'obs_tasks_perf_1', 'obs_tasks_perf_2', 'post_obs_task_descr', 'prompt']
            header += answer_choices
            writer.writerow(header)

            if dom == 'household':
                df = trust_transfer_household_df
                if prompt_structure == 'base':
                    prompt_creation_fn = trust_transfer_experiment_utils.create_template_household_t5
                else:
                    prompt_creation_fn = trust_transfer_experiment_utils.create_template_household_t5_altered
            else:
                df = trust_transfer_driving_df
                if prompt_structure == 'base':
                    prompt_creation_fn = trust_transfer_experiment_utils.create_template_driving_t5
                else:
                    prompt_creation_fn = trust_transfer_experiment_utils.create_template_driving_t5_altered

            for row_idx, row in tqdm(df.iterrows()):
                test_tasks_id = [int(row['C_ID']), int(row['D_ID']), int(row['E_ID'])]
                test_tasks_descr = [task_descr[dom][test_task_id] for test_task_id in test_tasks_id]
                pre_trust = [int(row['C1_rating']), int(row['D1_rating']), int(row['E1_rating'])]
                obs_tasks_id = [int(row['A_ID']), int(row['B_ID'])]
                obs_tasks_perf = [True, True] if int(row['B_SF']) else [False, False]
                obs_tasks_descr = [task_descr[dom][obs_task_id] for obs_task_id in obs_tasks_id]

                for idx in range(3):
                    template = prompt_creation_fn(test_tasks_descr, pre_trust, obs_tasks_descr, obs_tasks_perf, test_tasks_descr[idx])
                    probs = llm_utils.get_probs_t5([template], answer_choices, t5_model, t5_tokenizer)[0]
                    writer.writerow([row_idx] + test_tasks_descr + obs_tasks_descr + obs_tasks_perf + [test_tasks_descr[idx], template] + probs)
            post_obs_trust_csv_file.close()

In [4]:
for dom in dom_list:
    for prompt_structure in prompt_structure_list:
        post_obs_trust_csv_path = f'./results/trust_transfer_davinci_post_obs_{dom}_{prompt_structure}.csv'
        if not os.path.exists(post_obs_trust_csv_path):
            post_obs_trust_csv_file = open(post_obs_trust_csv_path, 'w')
            writer = csv.writer(post_obs_trust_csv_file)
            header = ['id', 'test_task_descr_1', 'test_task_descr_2', 'test_task_descr_3', 'obs_task_descr_1', 'obs_task_descr_2', 'obs_tasks_perf_1', 'obs_tasks_perf_2', 'post_obs_task_descr', 'prompt']
            header += answer_choices
            writer.writerow(header)

            if dom == 'household':
                df = trust_transfer_household_df
                if prompt_structure == 'base':
                    prompt_creation_fn = trust_transfer_experiment_utils.create_template_household_davinci()
                else:
                    prompt_creation_fn = trust_transfer_experiment_utils.create_template_household_davinci_altered
            else:
                df = trust_transfer_driving_df
                if prompt_structure == 'base':
                    prompt_creation_fn = trust_transfer_experiment_utils.create_template_driving_davinci
                else:
                    prompt_creation_fn = trust_transfer_experiment_utils.create_template_driving_davinci_altered

            for row_idx, row in tqdm(df.iterrows()):
                test_tasks_id = [int(row['C_ID']), int(row['D_ID']), int(row['E_ID'])]
                test_tasks_descr = [task_descr[dom][test_task_id] for test_task_id in test_tasks_id]
                pre_trust = [int(row['C1_rating']), int(row['D1_rating']), int(row['E1_rating'])]
                obs_tasks_id = [int(row['A_ID']), int(row['B_ID'])]
                obs_tasks_perf = [True, True] if int(row['B_SF']) else [False, False]
                obs_tasks_descr = [task_descr[dom][obs_task_id] for obs_task_id in obs_tasks_id]

                for idx in range(3):
                    template = prompt_creation_fn(test_tasks_descr, pre_trust, obs_tasks_descr, obs_tasks_perf, test_tasks_descr[idx])
                    probs = llm_utils.get_probs_davinci(template, answer_choices)
                    writer.writerow([row_idx] + test_tasks_descr + obs_tasks_descr + obs_tasks_perf + [test_tasks_descr[idx], template] + probs)
            post_obs_trust_csv_file.close()

In [5]:
print('T5 RESULTS:\n')
for prompt_structure in prompt_structure_list:
    mae_list = []
    cwm_list = []
    for dom in dom_list:
        post_obs_trust_csv_path = f'./results/trust_transfer_t5_post_obs_{dom}_{prompt_structure}.csv'
        llm_post_trust_df = pd.read_csv(post_obs_trust_csv_path)
        if dom == 'household':
            gt_df = trust_transfer_household_df
        else:
            gt_df = trust_transfer_driving_df
        mae, cwm = trust_transfer_experiment_utils.analyze_results(llm_post_trust_df, gt_df)
        print(f"Domain {dom} Prompt Structure {prompt_structure} MAE {mae}, CwM {cwm}")
        mae_list.append(mae)
        cwm_list.append(cwm)
    print(f"Overall MAE {(mae_list[0] * 96 + mae_list[1] * 93) / 189}, CwM {(cwm_list[0] * 96 + cwm_list[1] * 93) / 189}")
    print('------------------------------')

T5 RESULTS:

Post same as Pre 21
Domain household Prompt Structure base MAE 0.21115117215210447, CwM 0.7604166666666666
Post same as Pre 23
Domain driving Prompt Structure base MAE 0.21062865374388773, CwM 0.7204301075268817
Overall MAE 0.2108940599194899, CwM 0.7407407407407407
------------------------------
Post same as Pre 2
Domain household Prompt Structure altered MAE 0.16952754448010476, CwM 0.8020833333333334
Post same as Pre 3
Domain driving Prompt Structure altered MAE 0.15976733711975816, CwM 0.7956989247311828
Overall MAE 0.1647249027631088, CwM 0.798941798941799
------------------------------


In [6]:
print('Davinci RESULTS:\n')
for prompt_structure in prompt_structure_list:
    mae_list = []
    cwm_list = []
    for dom in dom_list:
        post_obs_trust_csv_path = f'./results/trust_transfer_davinci_post_obs_{dom}_{prompt_structure}.csv'
        llm_post_trust_df = pd.read_csv(post_obs_trust_csv_path)
        if dom == 'household':
            gt_df = trust_transfer_household_df
        else:
            gt_df = trust_transfer_driving_df
        mae, cwm = trust_transfer_experiment_utils.analyze_results(llm_post_trust_df, gt_df)
        print(f"Domain {dom} Prompt Structure {prompt_structure} MAE {mae}, CwM {cwm}")
        mae_list.append(mae)
        cwm_list.append(cwm)
    print(f"Overall MAE {(mae_list[0] * 96 + mae_list[1] * 93) / 189}, CwM {(cwm_list[0] * 96 + cwm_list[1] * 93) / 189}")
    print('------------------------------')

Davinci RESULTS:

Post same as Pre 0
Domain household Prompt Structure base MAE 0.1621312900357, CwM 0.8958333333333334
Post same as Pre 0
Domain driving Prompt Structure base MAE 0.15420239514865774, CwM 0.8064516129032258
Overall MAE 0.1582297703293776, CwM 0.8518518518518519
------------------------------
Post same as Pre 1
Domain household Prompt Structure altered MAE 0.16303994685915713, CwM 0.875
Post same as Pre 0
Domain driving Prompt Structure altered MAE 0.15503605938460954, CwM 0.8064516129032258
Overall MAE 0.15910152603834798, CwM 0.8412698412698413
------------------------------
