In [6]:
import itertools
len(list(itertools.combinations(list(range(0,2)), 3)))

0

In [11]:
list(itertools.combinations(list(range(0,2)), 4))

[]

In [13]:
import numpy as np

In [26]:
np.random.randint(0, 2, 3)

array([1, 1, 0])

In [32]:
# wth itertools

list(itertools.product(*[list(range(0,2))]*4))

[(0, 0, 0, 0),
 (0, 0, 0, 1),
 (0, 0, 1, 0),
 (0, 0, 1, 1),
 (0, 1, 0, 0),
 (0, 1, 0, 1),
 (0, 1, 1, 0),
 (0, 1, 1, 1),
 (1, 0, 0, 0),
 (1, 0, 0, 1),
 (1, 0, 1, 0),
 (1, 0, 1, 1),
 (1, 1, 0, 0),
 (1, 1, 0, 1),
 (1, 1, 1, 0),
 (1, 1, 1, 1)]

In [37]:
rule_0 = lambda x: x[0] == 1 
rule_1 = lambda x: x[1] == 1
rule_2 = lambda x: x[2] == 1
rule_3 = lambda x: x[3] == 1

In [34]:
# get examples satisfying an iterable of rules
def get_examples(rules):
    return [x for x in itertools.product(*[list(range(0,2))]*4) if all([rule(x) for rule in rules])]

In [35]:
get_examples([rule_0, rule_1, rule_2])

[(1, 1, 1, 0), (1, 1, 1, 1)]

In [38]:
get_examples([rule_3])

[(0, 0, 0, 1),
 (0, 0, 1, 1),
 (0, 1, 0, 1),
 (0, 1, 1, 1),
 (1, 0, 0, 1),
 (1, 0, 1, 1),
 (1, 1, 0, 1),
 (1, 1, 1, 1)]

In [None]:
# neither the positives nor the negatives distribution should distinguish between the hidden rules

In [None]:
# In training:

# positives distribution is things satisfying rule 1
# negatives distribution is things not satisfying rule 1

# coincidentally, the positives distribution is also things satisfying rule 2
# negatives distribution is things not satisfying rule 2

# testing dataset:

# things satisfying rule 1 only. 
# If the model gets poor performance, it's learned something other than rule 1

In [39]:
# train

def get_examples(rules):
    return [x for x in itertools.product(*[list(range(0,2))]*4) if all([rule(x) for rule in rules])]

train = get_examples([rule_1, lambda x: not rule_2(x)])

In [40]:
train

[(0, 1, 0, 0), (0, 1, 0, 1), (1, 1, 0, 0), (1, 1, 0, 1)]

In [41]:
len(train)

4

In [42]:
test = get_examples([rule_1, rule_2])

In [43]:
test

[(0, 1, 1, 0), (0, 1, 1, 1), (1, 1, 1, 0), (1, 1, 1, 1)]

In [44]:
len(test)

4

In [45]:
set(train).intersection(set(test))

set()

In [46]:
# train needs to be ambiguous about rule 2

In [48]:
# train is (rule 1 and rule 2) or (not rule 1 and not rule 2)
train = get_examples([rule_1, rule_2]) + get_examples([lambda x: not rule_1(x), lambda x: not rule_2(x)])

# test is the full dataset complement train, so it's (rule 1 and not rule 2) or (not rule 1 and rule 2)
test = get_examples([rule_1, lambda x: not rule_2(x)]) + get_examples([lambda x: not rule_1(x), rule_2])

In [53]:
train

[(0, 1, 1, 0),
 (0, 1, 1, 1),
 (1, 1, 1, 0),
 (1, 1, 1, 1),
 (0, 0, 0, 0),
 (0, 0, 0, 1),
 (1, 0, 0, 0),
 (1, 0, 0, 1)]

In [49]:
train

[(0, 1, 1, 0),
 (0, 1, 1, 1),
 (1, 1, 1, 0),
 (1, 1, 1, 1),
 (0, 0, 0, 0),
 (0, 0, 0, 1),
 (1, 0, 0, 0),
 (1, 0, 0, 1)]

In [50]:
test

[(0, 1, 0, 0),
 (0, 1, 0, 1),
 (1, 1, 0, 0),
 (1, 1, 0, 1),
 (0, 0, 1, 0),
 (0, 0, 1, 1),
 (1, 0, 1, 0),
 (1, 0, 1, 1)]

In [52]:
from sklearn.linear_model import LogisticRegression 

In [None]:
clf = LogisticRegression(random_state=0).fit(train, [1]*len(train))

In [54]:
import pandas as pd

In [67]:
data = pd.DataFrame(
    data = list(itertools.product(*[list(range(0,2))]*4)),
    columns=['x1', 'x2', 'x3', 'x4'])

In [68]:
train = data.loc[data.apply(lambda x: (rule_1(x) and rule_2(x)) or (not rule_1(x) and not rule_2(x)), axis=1)]
test = data.loc[data.apply(lambda x: (rule_1(x) and not rule_2(x)) or (not rule_1(x) and rule_2(x)), axis=1)]

In [69]:
train["label"] = train.x2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train["label"] = train.x2


In [87]:
print(train.to_markdown(index=False))

|   x1 |   x2 |   x3 |   x4 |   label |
|-----:|-----:|-----:|-----:|--------:|
|    0 |    0 |    0 |    0 |       0 |
|    0 |    0 |    0 |    1 |       0 |
|    0 |    1 |    1 |    0 |       1 |
|    0 |    1 |    1 |    1 |       1 |
|    1 |    0 |    0 |    0 |       0 |
|    1 |    0 |    0 |    1 |       0 |
|    1 |    1 |    1 |    0 |       1 |
|    1 |    1 |    1 |    1 |       1 |


In [71]:
test["label"] = test.x2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test["label"] = test.x2


In [72]:
test

Unnamed: 0,x1,x2,x3,x4,label
2,0,0,1,0,0
3,0,0,1,1,0
4,0,1,0,0,1
5,0,1,0,1,1
10,1,0,1,0,0
11,1,0,1,1,0
12,1,1,0,0,1
13,1,1,0,1,1


In [73]:
# train

clf = LogisticRegression(random_state=0).fit(train[['x1', 'x2', 'x3', 'x4']], train.label)

In [74]:
# eval on train

clf.score(train[['x1', 'x2', 'x3', 'x4']], train.label)

1.0

In [75]:
# eval on test

clf.score(test[['x1', 'x2', 'x3', 'x4']], test.label)

0.5

In [76]:
# get the test predictions

label_test = clf.predict(test[['x1', 'x2', 'x3', 'x4']])

In [78]:
label_test == test.x2

2      True
3      True
4     False
5     False
10     True
11     True
12    False
13    False
Name: x2, dtype: bool

In [79]:
label_test == test.x3

2     False
3     False
4      True
5      True
10    False
11    False
12     True
13     True
Name: x3, dtype: bool

In [83]:
# this performance indicates that the logistic regression model has learned (x2 and x3), which is also correct.

In [84]:
# So we need to come up with a task where models can learn a rule but not the complete rule.

In [82]:
label_test

array([0, 0, 0, 0, 0, 0, 0, 0])

In [80]:
test

Unnamed: 0,x1,x2,x3,x4,label
2,0,0,1,0,0
3,0,0,1,1,0
4,0,1,0,0,1
5,0,1,0,1,1
10,1,0,1,0,0
11,1,0,1,1,0
12,1,1,0,0,1
13,1,1,0,1,1


In [81]:
train

Unnamed: 0,x1,x2,x3,x4,label
0,0,0,0,0,0
1,0,0,0,1,0
6,0,1,1,0,1
7,0,1,1,1,1
8,1,0,0,0,0
9,1,0,0,1,0
14,1,1,1,0,1
15,1,1,1,1,1


In [6]:
%load_ext autoreload
%autoreload 2

from owain_app.catalog import Catalog

cat = Catalog()
cat.load_task(rule_names=["rule0","rule1"],num_rules=4,notation_type="string_notation")

[32m2023-11-25 11:42:20.849[0m | [1mINFO    [0m | [36mowain_app.catalog[0m:[36mload_task[0m:[36m64[0m - [1mDataset loaded from /Users/gabe/notes/Career/job_applications/Astra_Fellowship/owain_app/data/tasks/n=4/string_notation/rule0_and_rule1[0m


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


{'train': [['0011', False],
  ['1100', True],
  ['0001', False],
  ['1110', True],
  ['1101', True],
  ['0010', False],
  ['1111', True]],
 'val': [['0000', False]],
 'test': [['0111', False],
  ['1011', False],
  ['1001', False],
  ['1010', False],
  ['0101', False],
  ['0100', False],
  ['0110', False],
  ['1000', False]]}

In [7]:
# from owain_app.schemas import BinaryString, Label
from typing import List, Tuple

EXAMPLE_TEMPlATE = "input: {input}; label: {label}"
INSTRUCTIONS = "Classify the unlabeled example from the labeled examples. Respond with a single binary digit indicating the label."

def make_prompt(train_examples: List[Tuple], test_example: str, example_template: str = EXAMPLE_TEMPlATE, instructions: str = INSTRUCTIONS):
    example_content = [example_template.format(input=x, label=y) for x,y in train_examples]
    example_content = "\n".join(example_content)
    test_content = example_template.format(input=test_example, label="")

    return instructions + "\n\nLabeled Examples:\n```\n" + example_content + "\n```\n\nUnlabeled Example:\n```\n" + test_content
print(make_prompt([("0000", "0"), ("0001", "1")], "0010"))


Classify the unlabeled example from the labeled examples. Respond with a single binary digit indicating the label.

Labeled Examples:
```
input: 0000; label: 0
input: 0001; label: 1
```

Unlabeled Example:
```
input: 0010; label: 


In [117]:
pd.json_normalize(data["prompt"])

Unnamed: 0,prompt,train_examples,test_example,rule_names,split,label
0,Classify the unlabeled example from the labele...,"[[0011, False], [1100, True], [0001, False], [...",100,"[rule0, rule1]",test,
1,Classify the unlabeled example from the labele...,"[[0011, False], [1100, True], [0001, False], [...",110,"[rule0, rule1]",test,
2,Classify the unlabeled example from the labele...,"[[0011, False], [1100, True], [0001, False], [...",1001,"[rule0, rule1]",test,
3,Classify the unlabeled example from the labele...,"[[0011, False], [1100, True], [0001, False], [...",1010,"[rule0, rule1]",test,
4,Classify the unlabeled example from the labele...,"[[0011, False], [1100, True], [0001, False], [...",111,"[rule0, rule1]",test,
5,Classify the unlabeled example from the labele...,"[[0011, False], [1100, True], [0001, False], [...",101,"[rule0, rule1]",test,
6,Classify the unlabeled example from the labele...,"[[0011, False], [1100, True], [0001, False], [...",1011,"[rule0, rule1]",test,
7,Classify the unlabeled example from the labele...,"[[0011, False], [1100, True], [0001, False], [...",0,"[rule0, rule1]",val,False
8,Classify the unlabeled example from the labele...,"[[0011, False], [1100, True], [0001, False], [...",1000,"[rule0, rule1]",test,


In [8]:
from owain_app.catalog import Catalog
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
import jsonlines

catalog = Catalog()


def get_feature_columns(predictions_df):
    columns = [
        f"x{i+1}" for i in range(0, len(predictions_df["test_example"].values[0]))
    ]
    return columns


def normalize_predictions(data):
    responses = pd.json_normalize(data["response"])
    choices = pd.json_normalize(responses["choices"].apply(lambda x: x[0]))
    predictions = (
        choices["message.content"].map({"True": "1", "False": "0"}).rename("prediction")
    )
    return predictions


def normalize_prompt(data):
    return pd.json_normalize(data["prompt"])


def get_train_correlations(predictions_df):
    "A sanity check to see which features are correlated with the label in the training set"
    train_df = pd.DataFrame(predictions_df["train_examples"][0])
    train_df_ = pd.DataFrame(
        train_df[0].apply(lambda x: list(x)).values.tolist(),
        columns=["x1", "x2", "x3", "x4"],
    )
    train_df_["label"] = train_df[1].map({True: "1", False: "0"})
    train_correlations = train_df_.corr()["label"].round(2).sort_values(ascending=False)

    return train_correlations

def compute_val_accuracy(predictions_df):
    val_accuracy_n = len(predictions_df[predictions_df.split == "val"])

    val_accuracy_score = accuracy_score(
        predictions_df[predictions_df.split == "val"]["label"],
        predictions_df[predictions_df.split == "val"]["message.content"],
    )
    return val_accuracy_score, val_accuracy_n

def compute_accuracy_and_learned_rules():
    # read the data
    data_path = catalog.data_path / "model_responses_icl/n=4/rule0_and_rule1/all.jsonl"
    with jsonlines.open(data_path) as reader:
        data = [obj for obj in reader]

    # convert to dataframe
    data = pd.DataFrame(data, columns=["input", "response", "prompt"])

    predictions = normalize_predictions(data)
    prompt = normalize_prompt(data)

    predictions_df = pd.concat([prompt, predictions], axis=1)
    predictions_df["label"] = predictions_df["label"].map(
        {True: "1", False: "0", None: np.nan}
    )
    
    train_correlations = get_train_correlations(predictions_df)

    features = pd.DataFrame(
        predictions_df["test_example"].apply(lambda x: list(x)).values.tolist(),
        columns=columns,
    )

    columns = get_feature_columns(predictions_df)

    predictions_df_cat = pd.concat([features, predictions_df["message.content"]], axis=1)

    # get correlations between features and predictions
    feature_correlations = predictions_df_cat.corr()["prediction"].round(2).sort_values(ascending=False)

    
    val_accuracy_score, val_accuracy_n = compute_val_accuracy(predictions_df)

    return {
        "val_accuracy_score": val_accuracy_score,
        "val_accuracy_n": val_accuracy_n,
        "feature_correlations": feature_correlations,
        "train_correlations": train_correlations,
    }

x1       1.00
x2       1.00
label    1.00
x3      -0.17
x4      -0.17
Name: label, dtype: float64

In [13]:
val_accuracy_score

1.0

In [14]:
predictions_df

Unnamed: 0,prompt,train_examples,test_example,rule_names,split,label,message.content
0,Classify the unlabeled example from the labele...,"[[0011, False], [1100, True], [0001, False], [...",100,"[rule0, rule1]",test,,0
1,Classify the unlabeled example from the labele...,"[[0011, False], [1100, True], [0001, False], [...",110,"[rule0, rule1]",test,,0
2,Classify the unlabeled example from the labele...,"[[0011, False], [1100, True], [0001, False], [...",1001,"[rule0, rule1]",test,,1
3,Classify the unlabeled example from the labele...,"[[0011, False], [1100, True], [0001, False], [...",1010,"[rule0, rule1]",test,,0
4,Classify the unlabeled example from the labele...,"[[0011, False], [1100, True], [0001, False], [...",111,"[rule0, rule1]",test,,1
5,Classify the unlabeled example from the labele...,"[[0011, False], [1100, True], [0001, False], [...",101,"[rule0, rule1]",test,,0
6,Classify the unlabeled example from the labele...,"[[0011, False], [1100, True], [0001, False], [...",1011,"[rule0, rule1]",test,,1
7,Classify the unlabeled example from the labele...,"[[0011, False], [1100, True], [0001, False], [...",0,"[rule0, rule1]",val,0.0,0
8,Classify the unlabeled example from the labele...,"[[0011, False], [1100, True], [0001, False], [...",1000,"[rule0, rule1]",test,,0
