In [None]:
!pip install bitsandbytes
!pip install -q datasets loralib sentencepiece
!pip install -q git+https://github.com/zphang/transformers@c3dc391
!pip install -q git+https://github.com/huggingface/peft.git

In [None]:
from peft import PeftModel
from transformers import LLaMATokenizer, LLaMAForCausalLM, GenerationConfig

tokenizer = LLaMATokenizer.from_pretrained("decapoda-research/llama-7b-hf")
model = LLaMAForCausalLM.from_pretrained(
    "decapoda-research/llama-7b-hf",
    load_in_8bit=True,
    device_map="auto",
)
model = PeftModel.from_pretrained(model, "tloen/alpaca-lora-7b")

In [None]:
def generate_prompt(instruction, input=None):
    if input:
        return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{instruction}

### Input:
{input}

### Response:"""
    else:
        return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{instruction}

### Response:"""

In [None]:
generation_config = GenerationConfig(
    temperature=0.1,
    top_p=0.75,
    num_beams=4,
)

def evaluate(instruction, input=None):
    prompt = generate_prompt(instruction, input)
    inputs = tokenizer(prompt, return_tensors="pt")
    input_ids = inputs["input_ids"].cuda()
    generation_output = model.generate(
        input_ids=input_ids,
        generation_config=generation_config,
        return_dict_in_generate=True,
        output_scores=True,
        max_new_tokens=256
    )
    for s in generation_output.sequences:
        output = tokenizer.decode(s)
        #print("Response:", output.split("### Response:")[1].strip())
        return output.split("### Response:")[1].strip()

In [None]:
des = "Before he was Sherlock’s rival, Moriarty fought against the unfair class caste system in London by making sure corrupt nobility got their comeuppance. But even the best-intentioned plans can spin out of control—will Moriarty’s dream of a more just and equal world turn him into a hero…or a monster?"
allq= evaluate(f"give me 5 natural language query which don't be too specific and not mention about manga name and character that will return a manga with the following description :\n{des}")
#allq

In [None]:
import pandas as pd
from google.colab import drive
drive.mount('/content/drive',force_remount=True)

In [None]:
df = pd.read_csv("/content/drive/MyDrive/aib/df.csv")

In [None]:
df.loc[399, "q"]

In [None]:
for i in range(5100, 5200):
  des = df.loc[i, "description"]
  name_s = df.loc[i, "title"]
  if("(Light Novel)" in name_s):
    continue
  if("(Novel)" in name_s):
     continue
  if("(Webcomic)" in name_s):
     continue
  if("(Doujinshi" in name_s):
      continue
  allq= evaluate(f"give me 1 natural language query which don't be too specific and not mention about manga name and character that will return a manga with the following description :\n{des}")
  df.loc[i, "q"] = allq
  print(i, name_s, "done.")

In [None]:
df.to_csv("/content/drive/MyDrive/aib/df.csv", encoding='utf-8', index=False)

In [None]:
df.loc[610, "q"], df.loc[610, "title"]