In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
sys.path.append('..')

import torch
import copy
import numpy as np
import matplotlib.pyplot as plt
from src import models, data
from src.metrics import recall
from src.attributelens.attributelens import Attribute_Lens
import src.attributelens.utils as lens_utils
from src.operators import JacobianIclMeanEstimator
import plotly.graph_objects as go


In [None]:
device = "cuda:0"
mt = models.load_model("gptj", device=device)
print(f"dtype: {mt.model.dtype}, device: {mt.model.device}, memory: {mt.model.get_memory_footprint()}")

In [None]:
lens = Attribute_Lens(mt=mt, top_k=10)
att_info = lens.apply_attribute_lens(
    prompt=" Bill Bradley was a",
    relation_operator=None # operator
)
#att_info['subject_range'] = (8, 13)
print('prediction:', att_info['nextwords'][-1])
p = lens_utils.visualize_attribute_lens(
    att_info, layer_skip=3, must_have_layers=[],
)

p.write_image('bill_bradley_lens.pdf')
p

In [None]:
dataset = data.load_dataset()
print('\n'.join([d.name for d in dataset]))

In [None]:
datums =[d for d in dataset if d.name == "plays pro sport"][0]
#print(datums)
print(len(datums.samples))

np.random.seed(4)
indices = np.random.choice(range(len(datums.samples)), 5, replace=False)
samples = [datums.samples[i] for i in indices]

training_samples = copy.deepcopy(datums.__dict__)
training_samples["samples"] = samples
training_samples = data.Relation(**training_samples)

print(training_samples.samples)

mean_estimator = JacobianIclMeanEstimator(
    mt=mt,
    h_layer=15,
    bias_scale_factor=0.5       # so that the bias doesn't knock out the prediction too much in the direction of training examples
) 

operator = mean_estimator(training_samples)

In [None]:
lens = Attribute_Lens(mt=mt, top_k=10)
att_info = lens.apply_attribute_lens(
    prompt=" Bill Bradley was a",
    relation_operator=operator,
)
#att_info['subject_range'] = (8, 13)
print('prediction:', att_info['nextwords'][-1])
p = lens_utils.visualize_attribute_lens(
    att_info, layer_skip=3, must_have_layers=[], colorscale='greens'
)
p.write_image('bill_bradley_sport.pdf')
p

In [None]:
datums =[d for d in dataset if d.name == "person went to university"][0]
mean_estimator = JacobianIclMeanEstimator(
    mt=mt,
    h_layer=15,
    bias_scale_factor=0.5       # so that the bias doesn't knock out the prediction too much in the direction of training examples
) 

np.random.seed(8)
indices = np.random.choice(range(len(datums.samples)), 5, replace=False)
#indices = np.array([ 1, 20,  3,  7,  0])

samples = [datums.samples[i] for i in indices]

training_samples = copy.deepcopy(datums.__dict__)
training_samples["samples"] = samples
training_samples = data.Relation(**training_samples)

training_samples.samples
operator2 = mean_estimator(training_samples)


In [None]:
lens = Attribute_Lens(mt=mt, top_k=10)
att_info = lens.apply_attribute_lens(
    prompt=" Bill Bradley was a",
    relation_operator=operator2,
)
#att_info['subject_range'] = (8, 13)
print('prediction:', att_info['nextwords'][-1])
p = lens_utils.visualize_attribute_lens(
    att_info, layer_skip=3, must_have_layers=[], colorscale='oranges'
)
p.write_image('bill_bradley_school.pdf')
p