In [None]:
import json
import numpy as np

from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

# PARITY
Baseline: MLP

```
0.946
delta_0.2 0.945
delta_0.25 0.9453333333333334
delta_0.45 0.9506666666666667
delta_0.65 0.9466666666666667
delta_0.85 0.9426666666666667
```

In [None]:
corpus_id = [json.loads(l) for l in open("datasets/PARITY/corpus_parity_id_4k_new.json", "r", encoding="utf-8").readlines()]
corpus_id_test = [json.loads(l) for l in open("datasets/PARITY/corpus_parity_id_4k_new_test.json", "r", encoding="utf-8").readlines()]
corpus_od_test_w_negs = [json.loads(l) for l in open("datasets/PARITY/corpus_parity_ood_4k_test_new_with_negs.json", "r", encoding="utf-8").readlines()]

def two_he(s, max_len=60):
    vec = [-1 for _ in range(max_len)]
    for i, c in enumerate(s):
        vec[i] = int(c)
    return vec

X = [two_he(e["Entry"]) for e in corpus_id]
labels = [int(e["Label"]) for e in corpus_id]

clf = MLPClassifier(random_state=13213)
clf.fit(X, labels)

X_test = [two_he(e["Entry"]) for e in corpus_id_test]
test_labels = [int(e["Label"]) for e in corpus_id_test]

print(clf.score(X_test, test_labels))

deltas = corpus_od_test_w_negs[0].keys()
for delta in deltas:
    test_set = [two_he(e["Entry"]) for e in corpus_od_test_w_negs[0][delta]]
    test_labels = [int(e["Label"]) for e in corpus_od_test_w_negs[0][delta]]
    print(delta, clf.score(test_set, test_labels))

# Pattern Matching

Baseline: K-Neighbours

```
0.8675
delta_0.2 0.8486666666666667
delta_0.25 0.8376666666666667
delta_0.45 0.811
delta_0.65 0.7473333333333333
delta_0.85 0.69
```

In [None]:
corpus_id = [json.loads(l) for l in open("datasets/Pattern_Matching/corpus_pattern_matching_id_4k_new.json", "r", encoding="utf-8").readlines()]
corpus_id_test = [json.loads(l) for l in open("datasets/Pattern_Matching/corpus_pattern_matching_id_4k_new_test.json", "r", encoding="utf-8").readlines()]
corpus_od_test_w_negs = [json.loads(l) for l in open("datasets/Pattern_Matching/corpus_pattern_matching_ood_4k_new_test.json", "r", encoding="utf-8").readlines()]

def two_he(s, max_len=2750):
    map_str_to = {"a": 0, "b": 1, "c": 2, "d": 3}
    vec = [-1 for _ in range(max_len)]
    for i, c in enumerate(s):
        vec[i] = map_str_to[c]
    return vec

X = [two_he(e["Entry"]) for e in corpus_id]
labels = [int(e["Label"]) for e in corpus_id]

clf = KNeighborsClassifier()
clf.fit(X, labels)

X_test = [two_he(e["Entry"]) for e in corpus_id_test]
test_labels = [int(e["Label"]) for e in corpus_id_test]

print(clf.score(X_test, test_labels))

deltas = corpus_od_test_w_negs[0].keys()
for delta in deltas:
    test_set = [two_he(e["Entry"]) for e in corpus_od_test_w_negs[0][delta]]
    test_labels = [int(e["Label"]) for e in corpus_od_test_w_negs[0][delta]]
    print(delta, clf.score(test_set, test_labels))

0.8675
delta_0.2 0.8486666666666667
delta_0.25 0.8376666666666667
delta_0.45 0.811
delta_0.65 0.7473333333333333
delta_0.85 0.69


# Vending Machine (Verification)
Baseline: Decision Tree

```
0.8415
delta_0.2 0.8216666666666667
delta_0.25 0.8266666666666667
delta_0.45 0.7936666666666666
delta_0.65 0.759
delta_0.85 0.699
```


In [None]:
corpus_id = [json.loads(l) for l in open("datasets/Vending_Machine/corpus_vending_machine_id_4k.json", "r", encoding="utf-8").readlines()]
corpus_id_test = [json.loads(l) for l in open("datasets/Vending_Machine/corpus_vending_machine_id_4k_test.json", "r", encoding="utf-8").readlines()]
corpus_od_test_w_negs = [json.loads(l) for l in open("datasets/Vending_Machine/corpus_vending_machine_ood_4k_test.json", "r", encoding="utf-8").readlines()]

def two_he(s, max_len=500):
    map_str_to = {"biscuit": 20, "coffee": 15, "soda": 25, "+": 1, "-": -1, "empty": -1000}
    vec = [0] + [-100 for _ in range(max_len - 1)]
    idx = 1
    for i, item in enumerate(s.split(",")):
        if item[0] == '+':
            vec[idx] = map_str_to['+']
            vec[idx + 1] = int(item[1:])
            idx += 2
        else:
            if i == len(s.split(",")) - 1:
                vec[idx] = map_str_to["empty"]
                vec[idx + 1] = int(item)
            else:
                vec[idx] = map_str_to["-"]
                vec[idx + 1] = map_str_to[item]
                idx += 2
    return vec

X = [two_he(e["Entry"]) for e in corpus_id]
labels = [int(e["Label"]) for e in corpus_id]

clf = DecisionTreeClassifier(random_state=13213)
clf.fit(X, labels)

X_test = [two_he(e["Entry"]) for e in corpus_id_test]
test_labels = [int(e["Label"]) for e in corpus_id_test]

print(clf.score(X_test, test_labels))

deltas = corpus_od_test_w_negs[0].keys()
for delta in deltas:
    test_set = [two_he(e["Entry"]) for e in corpus_od_test_w_negs[0][delta]]
    test_labels = [int(e["Label"]) for e in corpus_od_test_w_negs[0][delta]]
    print(delta, clf.score(test_set, test_labels))

0.8415
delta_0.2 0.8216666666666667
delta_0.25 0.8266666666666667
delta_0.45 0.7936666666666666
delta_0.65 0.759
delta_0.85 0.699


# Stack
Baseline: k-Neighbours

```
0.7175
delta_0.2 0.6663333333333333
delta_0.45 0.6176666666666667
delta_0.65 0.59
delta_0.85 0.5863333333333334
```

In [None]:
corpus_id = [json.loads(l) for l in open("datasets/Stack/corpus_stack_id_4k.json", "r", encoding="utf-8").readlines()]
corpus_id_test = [json.loads(l) for l in open("datasets/Stack/corpus_stack_id_4k_test.json", "r", encoding="utf-8").readlines()]
corpus_od_test_w_negs = [json.loads(l) for l in open("datasets/Stack/corpus_stack_ood_4k_test.json", "r", encoding="utf-8").readlines()]

import numpy as np

def two_he(s, max_len=3000):
    map_str_to = {"0": 0, "1": 1, "push": 2, "pop": 3, "stop": "", "empty": 4}
    deck = {"4": "-100", "3": -1}
    vec = [-100 for _ in range(max_len)]
    _s = s.replace(",", "")
    new_s = []
    for __s in _s.split("\n"):
        t = __s
        if t[0] == "[":
            t = ",".join(eval(t))            
        for k, v in map_str_to.items():
            t = t.replace(k, str(v))
        new_s += [int(c) if c not in deck else deck[c] for c in t]
        new_s.append(100)
    for i, c in enumerate(new_s):
        vec[i] = c
    _vec = [int(c) for c in vec]
    return _vec

X = [two_he(e["Entry"]) for e in corpus_id]
labels = [int(e["Label"]) for e in corpus_id]

clf = KNeighborsClassifier() #random_state=13213)
clf.fit(X, labels)

X_test = [two_he(e["Entry"]) for e in corpus_id_test]
test_labels = [int(e["Label"]) for e in corpus_id_test]

print(clf.score(X_test, test_labels))

deltas = corpus_od_test_w_negs[0].keys()
for delta in deltas:
    test_set = [two_he(e["Entry"]) for e in corpus_od_test_w_negs[0][delta]]
    test_labels = [int(e["Label"]) for e in corpus_od_test_w_negs[0][delta]]
    print(delta, clf.score(test_set, test_labels))

0.7175
delta_0.2 0.6663333333333333
delta_0.45 0.6176666666666667
delta_0.65 0.59
delta_0.85 0.5863333333333334


# Reversal
Baseline: k-Neighbours

```
0.7175
delta_0.2 0.6663333333333333
delta_0.45 0.6176666666666667
delta_0.65 0.59
delta_0.85 0.5863333333333334
```
```

In [None]:
corpus_id = [json.loads(l) for l in open("datasets/Reversal/corpus_reversal_id_4k.json", "r", encoding="utf-8").readlines()]
corpus_id_test = [json.loads(l) for l in open("datasets/Reversal/corpus_reversal_id_4k_test.json", "r", encoding="utf-8").readlines()]
corpus_od_test_w_negs = [json.loads(l) for l in open("datasets/Reversal/corpus_reversal_ood_4k_test.json", "r", encoding="utf-8").readlines()]

map_str_to = {"#": 0, "%": 2, "chtte": 4, "gfx": 3, "ltintprk": 4, "¯\\_(ツ)_/¯": 5}
def two_he(s, max_len=4000, reverse=False):
    vec = [-100 for _ in range(max_len)]
    _s = s
    for k, v in map_str_to.items():
        _s = _s.replace(k, str(v))
    j = 0
    for i, c in enumerate(_s):
        vec[j] = int(c) if c != "0" else -100
        if c == "0":
            for k in range(20):
                vec[j + 1] = -100
                j += 1
        j += 1
    if reverse:
        vec = vec[::-1]
    return vec

X = [two_he(e["Entry"]) for e in corpus_id]
X += [two_he(e["Entry"], reverse=True) for e in corpus_id]
labels = [int(e["Label"]) for e in corpus_id]
labels += labels

clf = KNeighborsClassifier()
clf.fit(X, labels)

X_test = [two_he(e["Entry"]) for e in corpus_id_test]
test_labels = [int(e["Label"]) for e in corpus_id_test]

print(clf.score(X_test, test_labels))

deltas = corpus_od_test_w_negs[0].keys()
for delta in deltas:
    X_test = [two_he(e["Entry"]) for e in corpus_od_test_w_negs[0][delta]]
    test_labels = [int(e["Label"]) for e in corpus_od_test_w_negs[0][delta]]
    print(delta, clf.score(X_test, test_labels))

0.741
delta_0.2 0.67
delta_0.45 0.644
delta_0.65 0.6116666666666667
delta_0.85 0.5586666666666666
