In [4]:
from typing import List

def load_prompts_from_file(filepath: str) -> List[str]:
    try:
        with open(filepath, "r", encoding="utf-8") as f:
            return [line.strip() for line in f if line.strip()]
    except FileNotFoundError:
        raise FileNotFoundError(f"File not found: {filepath}")
    except OSError as e:
        raise OSError(f"Error reading file {filepath}: {e}")


In [8]:
load_prompts_from_file("/data1/wuyinjun/semantic_cache_dataset/dataset/descriptions_train.txt")

["The Travel Book: A Journey Through Every Country in the World (Lonely Planet) Review A reference guide to the world, with a bite-sized breakdown on each of the world's countries, Lonely Planet's The Travel Book is a bookshelf essential for gourmet travellers and the just-published 2016 edition is no exception. Slick photography and pithy summaries of what to see and do (as well as eat and drink) makes it an ideal starting point for trip-planning. Did you know that Angola has, historically, been one of the world's biggest coffee producers, or that if you drink tea around a Tuareg campfire in Algeria you'll need to wait for it to be poured three times? You will. * Olive Magazine * The hottest coffee-table travel tome. * The Scotsman *",
 'OXO Good Grips Easy-Release Strawberry Huller and Tomato Corer 11111900 Features: -Press button to release leaves and hull. -No wasted fruit. Product Type: -Slicer and cutter. Finish: -Black. Base Material: -Stainless steel. Handle Material: -Plastic.

In [5]:
from RL4COTrainer import ResumeFriendlyREINFORCE
from embedding_model import EmbeddingModel
from MaxSimGenerator import MaxSimGenerator
from MaxSimEnv import MaxSimEnv
from AdaptedPointerNetworkPolicy import AdaptedPointerNetworkPolicy
import torch, json

ckpt_path = "/data2/ali/checkpoints_words/epoch=4-step=270.ckpt"
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Rebuild the minimal objects required by the checkpointed module
embedding_model = EmbeddingModel(device=device)

dummy_pairs = [
  {"sentence_1": "hello world", "sentence_2": "hi there", "correct": 0}
]

gen = MaxSimGenerator(pairs=dummy_pairs, max_len=512, embedding_model=embedding_model, seed=123)
env = MaxSimEnv(generator=gen, max_segments=8, embedding_model=embedding_model, device=device)
policy = AdaptedPointerNetworkPolicy(env, embedding_dim=768, hidden_dim=768, max_segments=8)

model = ResumeFriendlyREINFORCE.load_from_checkpoint(
    ckpt_path,
    env=env,
    policy=policy,
    baseline="rollout",
    train_data_size=2560,
    val_data_size=2560,
    batch_size=24,
    dataloader_num_workers=0,
    optimizer_kwargs={"lr": 1e-4},
    strict=False,
).to(device).eval()

[INFO] Loading model BAAI/bge-base-en-v1.5 from https://hf-mirror.com
[DEVICE] EmbeddingModel moved to cuda:0
[GEN] bs=1 device=cuda:0 lm_device=cuda:0 max_len=512


/data1/conda_envs/RLSemanticCaching/lib/python3.11/site-packages/lightning/pytorch/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['baseline.baseline.policy.V', 'baseline.baseline.policy.decoder_start_input', 'baseline.baseline.policy.env.score_weights_raw', 'baseline.baseline.policy.encoder_layers.0.mha_ab.in_proj_weight', 'baseline.baseline.policy.encoder_layers.0.mha_ab.in_proj_bias', 'baseline.baseline.policy.encoder_layers.0.mha_ab.out_proj.weight', 'baseline.baseline.policy.encoder_layers.0.mha_ab.out_proj.bias', 'baseline.baseline.policy.encoder_layers.0.mha_ba.in_proj_weight', 'baseline.baseline.policy.encoder_layers.0.mha_ba.in_proj_bias', 'baseline.baseline.policy.encoder_layers.0.mha_ba.out_proj.weight', 'baseline.baseline.policy.encoder_layers.0.mha_ba.out_proj.bias', 'baseline.baseline.policy.encoder_layers.0.norm_ab.weight', 'baseline.baseline.policy.encoder_layers.0.norm_ab.bias', 'baseline.baseline.policy.encoder_layers.0.norm

TypeError: ResumeFriendlyREINFORCE.setup() missing 1 required positional argument: 'stage'

In [6]:
import argparse
import json
from collections import Counter
from typing import Any, Dict, List, Optional, Tuple

def load_json(path: str) -> Any:
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

data = load_json("/data1/wuyinjun/semantic_cache_dataset/dataset/semantic_prompt_cache_benchmark2500.json")

In [7]:
data

[{'id': 'ecommerce_16845',
  'task': 'Product classification',
  'output_format': "Answer with 'Books', 'Electronics', 'Household', or 'Clothing & Accessories'",
  'sentence': 'INOVERA (LABEL) Double Layer Soap Dish with Suction Cup(Multicolour) Description   Soap Dish Holder with its Super Strong Suction makes it an elegant kitchen and bathroom item to have in your household. Its design serves a dual purpose - put soap on the soap dish and hang towels, napkins and other toiletries on the plastic hook. It is very easy to install without the need for drills or glue. Simply follow the directions on the package cover and apply it on bathroom, kitchen, washroom walls with ease. Suction cup may be used on ceramic tiles, glass surfaces, refrigerators .  Product Features   @ Super Strong Suction Attaches to Ceramic Tiles, Glass, Refrigerators .  @ Put Soap on the Soap Dish and Hand Towels, Napkins, Toiletries on the Plastic Hook. @ Very Easy To Install Without Drills or Glue. Follow The Direc