In [1]:
import pickle
import os
import numpy as np
from lm_nav import landmark_extraction

## Building dataset

In [2]:
all_routes = [
    (5, "Go straight toward the white building. Continue straight passing by a white truck until you reach a stop sign."),
    (5, "After passing a white building, take right next to a white truck. Then take left and go towards a square with a large tree. Go further, until you find a stop sign."),
    (173, "Start going around a building with a red-black wall and pass by a fire hydrant. Take a right and enter a grove. Continue straight and take a right, when you see a manhole cover. Go forward and left, and look for a trailer."),
    (108, "Take a right next to a stop sign. Look for a glass building, after passing by a white car."),
    (247, "Follow the road and take the right, you should see a blue semi-truck. Behind the truck, take a right next to an orange traffic cone. Go towards a blue dumpster and take left. Look for a picnic bench."),
    (70, "Go towards a white trailer. Then take left and go to the traffic lights. Take left again, and look for a traffic cone."),
    (215, "Go straight, passing by a stop sign and a  manhole cover. Next, you will see a disabled Parking spot and a red building."),
    (103, "First, you need to find a stop sign. Then take left and right and continue until you reach a square with a tree. Continue first straight, then right, until you find a white truck. The final destination is a white building."),
    (103, "Go to a stop sign. Continue straight, passing by a white truck. The final destination is a white building."),
    (211, "Go straight, until you find a glass building. Drive to a white car nearby. Drive to a stop sign.")
]
landmarks_cache = eval("[['a white building', 'a white truck', 'a stop sign'], ['a white building', 'a white truck', 'a square with a large tree', 'a stop sign'], ['a building with a red-black wall', 'a fire hydrant', 'a grove', 'a manhole cover', 'a trailer'], ['a stop sign', 'a white car', 'a glass building'], ['a blue semi-truck', 'an orange traffic cone', 'a blue dumpster', 'a picnic bench'], ['a white trailer', 'traffic lights', 'a traffic cone'], ['a stop sign', 'a manhole cover', 'a disabled Parking spot', 'a red building'], ['a stop sign', 'a square with a tree', 'a white truck', 'a white building'], ['a stop sign', 'a white truck', 'a white building'], ['a glass building', 'a white car', 'a stop sign']]")

dataset_large = [(a[1],b) for a, b in zip(all_routes, landmarks_cache)]

In [3]:
all_routes = [
    (180, "Go straight towards a stop sign, take left and go until you reach a traffic cone. Take another left and then right going towards a blue box. From there take left and look for a baby stroller."),
    (215, "Go towards the blue box, take right and left until you reach a traffic cone. Take left and pass by a semi-truck until you find a big log."),
    (63, "Start at a traffic cone. Go towards a cardboard box and a parking lot. Continue driving, take a right, and pass by a picnic table. Take left and look for a stop sign."),
    (160, "Take first right towards a picnic table. Next, go to a square with a large tree, and take the left to another picnic table. Keep going until you reach a parking lot."),
    (61, "Go straight and take right next to a traffic cone. Go straight until you reach a parking lot. Take left, go through a lawn and look for a blue box."),
    (219, "Pass by a blue box and look for a big log. Take right and keep going straight, passing by a traffic cone. Take a right and finish at the parking lot."),
    (186, "Look for a traffic cone, take left and go straight until you find a square with a tree. Turn right, pass by a cardboard box and go to a picnic table."),
    (75, "Go straight pass a picnic table until you reach a street. Take right, pass by an orange trailer and take next right at an intersection. Next, take a right next to a traffic cone, take the next left, and pass by a baby stroller. Go straight and you will reach a parking lot."),
    (194, "Take a left when you see a traffic cone. Go straight passing by a semi-track and take left after you see a big log. Drive to a blue box and continue straight until you find a cardboard box next to a parking lot."),
    (133, "Take right at a traffic cone, and go straight until you reach a square with a big tree. Take right next and go straight until you find a baby stroller. Take left and right and look for an intersection."),
]
landmarks_cache = eval("[['a stop sign', 'a traffic cone', 'a blue box', 'a baby stroller'], ['a blue box', 'a traffic cone', 'a semi-truck', 'a big log'], ['a traffic cone', 'a cardboard box', 'a parking lot', 'a picnic table', 'a stop sign'], ['a picnic table', 'a square with a large tree', 'another picnic table', 'a parking lot'], ['a traffic cone', 'a parking lot', 'a lawn', 'a blue box'], ['a blue box', 'a big log', 'a traffic cone', 'a parking lot'], ['a traffic cone', 'a square with a tree', 'a cardboard box', 'a picnic table'], ['a picnic table', 'a street', 'an orange trailer', 'an intersection', 'a traffic cone', 'a baby stroller', 'a parking lot'], ['a traffic cone', 'a semi-track', 'a big log', 'a blue box', 'a cardboard box', 'a parking lot'], ['a traffic cone', 'a square with a big tree', 'a baby stroller', 'an intersection']]")

dataset_small = [(a[1],b) for a, b in zip(all_routes, landmarks_cache)]

In [4]:
dataset = dataset_large + dataset_small

## Metric: longest common subsequence

In [5]:
def remove_article(string):
    article = ["a", "an", "the"]
    words = string.split()
    none_articles = [w for w in words if w not in article]
    return " ".join(none_articles)

In [6]:
def longest_common_subsequence(sequence, gt_sequence):
    sequence = [remove_article(s) for s in sequence]
    gt_sequence = [remove_article(s) for s in gt_sequence]
    gt_lookup = {s: i for i, s in enumerate(gt_sequence)}
    gt_index = [gt_lookup.get(s, -1) for s in sequence]
    longest_dp = [0] * len(sequence)
    for i in range(len(sequence)):
        if gt_index[i] >= 0:
            best = 1
            for j in range(i):
                if gt_index[j] < gt_index[i] and best < longest_dp[j]+1:
                    best = longest_dp[j]+1
            longest_dp[i] = best
    return max(longest_dp) / len(gt_sequence)

## Evaluation of different models
In order to run the experiment you need to setup keys to OpenAI and Goose AI API, either by specifying them on the command line or by uncommenting the next cell.

In [None]:
# os.environ["OPENAI_API_KEY"] = "sk-[real api key here]"
# os.environ["GOOSE_API_KEY"] = "sk-[real api key here]"

In [46]:
spacy_answers = [landmark_extraction.text_to_landmarks_spacy(text) for text, _ in dataset]
np.mean([longest_common_subsequence(answer, gt_answer) for answer, (_, gt_answer) in zip(spacy_answers, dataset)])

0.8775000000000001

## Simple prompt

> Look for a library, after taking a right turn next to a statue.
>
> Landmarks:
> 1. a statue
> 2. a library
>
> Look for a statue. Then look for a library. Then go towards a pink house.
>
> Landmarks:
> 1. a statue
> 2. a library
> 3. a pink house
> 
> {Description}
>
> Landmarks:
> 1. 


In [12]:
gpt3_answers_simple = [landmark_extraction.text_to_landmarks_gpt3(text, simple_prompt=True) for text, _ in dataset]
np.mean([longest_common_subsequence(gpt3_answer, gt_answer) for gpt3_answer, (_, gt_answer) in zip(gpt3_answers_simple, dataset)])

1.0

In [11]:
gpt_j_6b_answers_simple = [landmark_extraction.text_to_landmarks_goose_ai(text, "gpt-j-6b", simple_prompt=True) for text, _ in dataset]
np.mean([longest_common_subsequence(gpt6_answer, gt_answer) for gpt6_answer, (_, gt_answer) in zip(gpt_j_6b_answers_simple, dataset)])

0.804404761904762

In [17]:
fairseq_13_answers_simple = [landmark_extraction.text_to_landmarks_goose_ai(text, "fairseq-13b", simple_prompt=True) for text, _ in dataset]
np.mean([longest_common_subsequence(answer, gt_answer) for answer, (_, gt_answer) in zip(fairseq_13_answers_simple, dataset)])

0.7615476190476189

In [14]:
gpt_neo_20b_answers_simple = [landmark_extraction.text_to_landmarks_goose_ai(text, "gpt-neo-20b", simple_prompt=True) for text, _ in dataset]
np.mean([longest_common_subsequence(answer, gt_answer) for answer, (_, gt_answer) in zip(gpt_neo_20b_answers_simple, dataset)])

0.723452380952381

In [15]:
fairseq_1_3_answers_simple = [landmark_extraction.text_to_landmarks_goose_ai(text, "fairseq-1-3b", simple_prompt=True) for text, _ in dataset]
np.mean([longest_common_subsequence(answer, gt_answer) for answer, (_, gt_answer) in zip(fairseq_1_3_answers_simple, dataset)])

0.5242857142857144

## More complicated prompt

> Take right next to an old white building. Look for a fire station, which you will see after passing by a school.
> Ordered landmarks:
> 1. an old white building
> 2. a school
> 3. a fire station
>
> Go straight for two blocks. Take right at a roundabout, before it you will pass a big, blue tree.
> Ordered landmarks:
> 1. a big, blue tree
> 2. a roundabout
>
> Look for a library, after taking a right turn next to a statue.
> Ordered landmarks:
> 1. a statue
> 2. a library
>
> {Description}
>
> Ordered landmarks:
> 1. 


In [26]:
# verify how good is GPT3
gpt3_answers = [landmark_extraction.text_to_landmarks_gpt3(text) for text, _ in dataset]
np.mean([longest_common_subsequence(gpt3_answer, gt_answer) for gpt3_answer, (_, gt_answer) in zip(gpt3_answers, dataset)])

0.9857142857142858

In [30]:
gpt_j_6b_answers = [landmark_extraction.text_to_landmarks_goose_ai(text, "gpt-j-6b") for text, _ in dataset]
np.mean([longest_common_subsequence(gpt6_answer, gt_answer) for gpt6_answer, (_, gt_answer) in zip(gpt_j_6b_answers, dataset)])

0.5313095238095238

In [47]:
fairseq_13_answers = [landmark_extraction.text_to_landmarks_goose_ai(text, "fairseq-13b") for text, _ in dataset]
np.mean([longest_common_subsequence(answer, gt_answer) for answer, (_, gt_answer) in zip(fairseq_13_answers, dataset)])

0.7998809523809524

In [56]:
gpt_neo_20b_answers = [landmark_extraction.text_to_landmarks_goose_ai(text, "gpt-neo-20b") for text, _ in dataset]
np.mean([longest_common_subsequence(answer, gt_answer) for answer, (_, gt_answer) in zip(gpt_neo_20b_answers, dataset)])

0.573095238095238

In [57]:
fairseq_1_3_answers = [landmark_extraction.text_to_landmarks_goose_ai(text, "fairseq-1-3b") for text, _ in dataset]
np.mean([longest_common_subsequence(answer, gt_answer) for answer, (_, gt_answer) in zip(fairseq_1_3_answers, dataset)])

0.48261904761904767