In [None]:
from google.colab import drive, userdata
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%cd /content/drive/MyDrive/
!git clone https://github.com/jiseshen/preference-tracing.git


/content/drive/MyDrive
Cloning into 'preference-tracing'...
remote: Enumerating objects: 127, done.[K
remote: Counting objects: 100% (127/127), done.[K
remote: Compressing objects: 100% (78/78), done.[K
remote: Total 127 (delta 60), reused 112 (delta 45), pack-reused 0 (from 0)[K
Receiving objects: 100% (127/127), 1.65 MiB | 4.20 MiB/s, done.
Resolving deltas: 100% (60/60), done.


In [None]:
%cd /content/drive/MyDrive/preference-tracing/

/content/drive/MyDrive/preference-tracing


In [None]:
import os
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')
os.environ["HF_TOKEN"] = userdata.get("HF_TOKEN")
os.environ["HUGGINGFACE_HUB_TOKEN"] = userdata.get("HF_TOKEN")
import huggingface_hub
huggingface_hub.login(token=userdata.get("HF_TOKEN"))

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [None]:
from preference_tracing import PreferenceTracing
import json
from preference_prompt import *
from tqdm import tqdm

In [None]:
for mode in ["Trace", "CoT", "Hybrid"]:

    preference_tracing = PreferenceTracing(
        online_prompt=online_prompt,
        hypothesis_prompt=hypothesis_prompt,
        rejuvenate_prompt=rejuvenate_prompt,
        summary_prompt=summary_prompt,
        CoT_prompt=CoT_prompt,
        mode=mode,
        base_model="gpt-4o-mini",
        temperature=0.2,
        N=5,
        uncertainty_threshold=0.45,
    )

    os.makedirs("log", exist_ok=True)

    with open("profiles_dataset.json", "r") as f:
        data = json.load(f)

    reward_record = []
    uncertainty_record = []
    token_record = []

    for user in tqdm(data[10:30]):
        user["behavior"] = user["behavior"][:50]
        preference_tracing.trace(user)
        reward_record.append(preference_tracing.reward)
        uncertainty_record.append(preference_tracing.avg_uncertainty)
        token_record.append([preference_tracing.prompt_tokens, preference_tracing.completion_tokens])
        with open(f"log/{mode}_record.json", "w") as f:
            json.dump({
                "reward": reward_record,
                "uncertainty": uncertainty_record,
                "token": token_record,
            }, f)

 25%|██▌       | 5/20 [57:50<2:53:30, 694.05s/it]


IndexError: index 5 is out of bounds for axis 0 with size 5

In [None]:
import numpy as np
import matplotlib.pyplot as plt

with open("log/Trace_record.json", "r") as f:
    Trace_record = json.load(f)

with open("log/CoT_record.json", "r") as f:
    CoT_record = json.load(f)

with open("log/Hybrid_record.json", "r") as f:
    Hybrid_record = json.load(f)

cot_reward = np.array([record for record in CoT_record["reward"]])
hybrid_reward = np.array([record for record in Hybrid_record["reward"]])
trace_reward = np.array([record for record in Trace_record["reward"]])

plt.figure(figsize=(8, 5), dpi=150)

plt.plot(trace_reward.mean(axis=0), label='Trace', linewidth=2, color='#1f77b4')
plt.plot(cot_reward.mean(axis=0), label='CoT', linewidth=2, color='#ff7f0e')
plt.plot(hybrid_reward.mean(axis=0), label='Hybrid', linewidth=2, color='#2ca02c')

plt.xlabel('Timestep', fontsize=12)
plt.ylabel('Reward', fontsize=12)
plt.title('Reward throughout Trajectory', fontsize=14)
plt.legend(fontsize=10)
plt.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()

plt.show()

In [None]:
cot_prompt_token = np.array([i[0] for i in CoT_record["token"]])
cot_response_token = np.array([i[1] for i in CoT_record["token"]])
hybrid_prompt_token = np.array([i[0] for i in Hybrid_record["token"]])
hybrid_response_token = np.array([i[1] for i in Hybrid_record["token"]])
trace_prompt_token = np.array([i[0] for i in Trace_record["token"]])
trace_response_token = np.array([i[1] for i in Trace_record["token"]])

cot_prompt_token = cot_prompt_token / np.array([len(i) for i in CoT_record["reward"]], dtype=np.float32)[:, np.newaxis]
cot_response_token = cot_response_token / np.array([len(i) for i in CoT_record["reward"]], dtype=np.float32)[:, np.newaxis]
hybrid_prompt_token = hybrid_prompt_token / np.array([len(i) for i in Hybrid_record["reward"]], dtype=np.float32)[:, np.newaxis]
hybrid_response_token = hybrid_response_token / np.array([len(i) for i in Hybrid_record["reward"]], dtype=np.float32)[:, np.newaxis]
trace_prompt_token = trace_prompt_token / np.array([len(i) for i in Trace_record["reward"]], dtype=np.float32)[:, np.newaxis]
trace_response_token = trace_response_token / np.array([len(i) for i in Trace_record["reward"]], dtype=np.float32)[:, np.newaxis]

cot_prompt_token = cot_prompt_token.mean()
cot_response_token = cot_response_token.mean()
hybrid_prompt_token = hybrid_prompt_token.mean()
hybrid_response_token = hybrid_response_token.mean()
trace_prompt_token = trace_prompt_token.mean()
trace_response_token = trace_response_token.mean()

categories = ["Trace", "CoT", "Hybrid"]

x = np.arange(len(categories))
bar_width = 0.35
fig, ax = plt.subplots(figsize=(10, 6), dpi=150)

rects1 = ax.bar(x - bar_width/2, [trace_prompt_token, cot_prompt_token, hybrid_prompt_token], bar_width, label='Prompt', color='#1f77b4')
rects2 = ax.bar(x + bar_width/2, [trace_response_token, cot_response_token, hybrid_response_token], bar_width, label='Completion', color='#ff7f0e')

ax.set_ylabel("Token Usage")
ax.set_title("Token Usage of Different Settings")
ax.set_xticks(x)
ax.set_xticklabels(categories)

ax.legend()

plt.tight_layout()
plt.show()