# Try sending answers in batches directly to the LLM

In [None]:
import os
import pandas as pd

from transformers import pipeline
import torch

In [None]:
froots = [
    "INCLU1x_IF_Responses_-_ALL_RUNS_041924_M0_IF_Reflection_Questions_cleaned",
    "INCLU1x_IF_Responses_-_ALL_RUNS_041924_M1_IF_Reflection_Question_cleaned",
    "INCLU1x_IF_Responses_-_ALL_RUNS_041924_M2_IF_Reflection_Question_cleaned",
    "INCLU1x_IF_Responses_-_ALL_RUNS_041924_M3_IF_Reflection_Question_cleaned",
    "INCLU1x_IF_Responses_-_ALL_RUNS_041924_M4_IF_Reflection_Question_cleaned",
    "INCLU1x_IF_Responses_-_ALL_RUNS_041924_M5_IF_Reflection_Question_cleaned" ,
]

In [None]:
# adapted from latent-scope 
class transformers_chat_provider():
    def __init__(self, name, params):
        self.name = name
        self.params = params
        self.pipe = pipeline("text-generation", model=self.name, torch_dtype=torch.float16, device_map="auto")
        self.encoder = self.pipe.tokenizer

    def chat(self, messages, max_new_tokens=24):
        prompt = self.pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        outputs = self.pipe(prompt, max_new_tokens=max_new_tokens, do_sample=True)#, temperature=0.5, top_p=0.95) #top_k=50, 
        generated_text = outputs[0]["generated_text"]
        print("GENERATED TEXT", generated_text)
        return generated_text.split("<|assistant|>")[1].strip()
    
def get_LLM_input_max_index(model, input_list, max_tokens = 1048, start_index = 0, system_text = '', extra_tokens_per_item = 6):
    # determine how many lines I can feed into the LLM

    n_tokens = len(model.encoder.encode(system_text))    
    for index, item in enumerate(input_list):
        if (index > start_index):
            encoded_text = model.encoder.encode(item)
            n_tokens += len(encoded_text) + extra_tokens_per_item
            if (n_tokens > max_tokens):
                return index - 1

    return index

In [None]:
instructions_before = "Below is a list of items each starting with [item]. I will want you to summarize this list."
instructions_after = "That was the last item in the list.  Now summarize these items with a new list of 10 or less unique themes.  Do not repeat any item from above verbatim in your themes.  Each theme should be only one short sentence.  Only return the short one-sentence themes."

In [None]:
froot = froots[0]
df = pd.read_csv("../../data/" + froot + ".csv")
df

In [None]:
max_tokens = 2048
max_new_tokens = 500

model = transformers_chat_provider('TinyLlama/TinyLlama-1.1B-Chat-v1.0', {"max_tokens": max_tokens})
ofile = os.path.join('tables', froot + '_themes_directly_from_tinyllama.txt')

# model = transformers_chat_provider('HuggingFaceH4/zephyr-7b-beta', {"max_tokens": max_tokens})
# ofile = os.path.join('tables', froot + '_label_summary_zephyr_MC.txt')

responses = df['student_responses'].to_list()
start_index = 0
end_index = get_LLM_input_max_index(model, responses, start_index = start_index, max_tokens = max_tokens - max_new_tokens)
print(end_index)

# define the message to the LLM
input_list = ''
for item in responses[start_index:(end_index - start_index)]:
    input_list += '[item] ' + item + '\n'
messages=[
    {"role":"system", "content":instructions_before},
    {"role":"user", "content": input_list},
    {"role":"system", "content":instructions_after}
]

# run the model
response = model.chat(messages, max_new_tokens = 1000)


In [None]:
len(responses)

In [None]:
# count the number of words in each file
# from OpenAI: https://platform.openai.com/tokenizer "A helpful rule of thumb is that one token generally corresponds to ~4 characters of text for common English text. This translates to roughly ¾ of a word (so 100 tokens ~= 75 words)."
# the largest GPT model has 128K tokens context limit (so it won't be able to ingest everything)
for f in froots:
    df = pd.read_csv("../../data/" + f + ".csv")
    all_answers = df['student_responses'].str.cat(sep=' ').split(' ')
    print(f, len(all_answers)*4/3.)