In [None]:
from tqdm import tqdm
import pandas as pd, numpy as np
import os, time, pickle

os.chdir('/home/jovyan/work/')

In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

In [None]:
from llama_cpp import Llama
import outlines
import gc

In [None]:
def get_prediction(prompt, generator, verbose = False):
    answer = generator(prompt)
    if verbose == True:
        print(prompt)
        print(answer)
    return answer   

def load_llm(model_, choices):
    llm = Llama(
        model_path = 'models/'+model_[1],
        n_gpu_layers = -1, 
        logits_all = False,
        verbose = False,
        # temp = 0
    )
    model = outlines.models.LlamaCpp(llm)
    generator = outlines.generate.choice(model, choices)
    return generator

def load_model(model_):
    llm = Llama(
        model_path = 'models/'+model_[1],
        n_gpu_layers = -1,
        logits_all = False,
        verbose = False,
        # temp = 0
    )
    return llm

def annotate_data(prompts, generator):
    llm_labels = []
    for prompt in prompts:
        llm_labels.append(get_prediction(prompt, generator))
    return llm_labels

In [None]:
qwen2_dir = 'Qwen2-7B-Instruct-F16.gguf'
qwen2_name = 'qwen2'
qwen2_ = (qwen2_name, qwen2_dir)

choices_binary = ['TRUE', 'FALSE']
choices_scale = ['1', '2', '3', '4', '5']

In [None]:
generator_binary = load_llm(qwen2_, choices_binary)

In [None]:
# qwen_dir = 'ggml-qwen1.6-32-model-Q4_K_M.gguf'
# qwen_name = 'qwen1.6'
# qwen_ = (qwen_name, qwen_dir)

# choices_binary = ['TRUE', 'FALSE']
# choices_scale = ['1', '2', '3', '4', '5']

In [None]:
# generator_binary = load_llm(qwen_, choices_binary)

In [None]:
def prompt_qwen_persona_binary(persona, post):
    return f"Your task is to annotate the post shown below for toxicity, taking on the perspective of the following persona: {persona}. Respond with 'TRUE' if this post seems toxic/hateful/disrespectful to you and 'FALSE' if this post does not seem toxic/hateful/disrespectful to you. Respond with 'TRUE' or 'FALSE' only, do not add any other explanations or descriptions.\nPost: {post}\nResponse:"

def prompt_qwen_nopersona_binary(post):
    return f"Your task is to annotate the post shown below for toxicity. Respond with 'TRUE' if this post seems toxic/hateful/disrespectful to you and 'FALSE' if this post does not seem toxic/hateful/disrespectful to you. Respond with 'TRUE' or 'FALSE' only, do not add any other explanations or descriptions.\nPost: {post}\nResponse:"


def prompt_qwen_persona_scale(persona, post):
    return f"Your task is to annotate the post shown below for toxicity, taking on the perspective of the following persona: {persona}. Answer the following question on a scale from 1 to 5, where 1 stands for 'Not at all' and 5 stands for 'Very much so'. How toxic/hateful/disrespectful or offensive does this post seem to you? Respond with an integer from 1 to 5 only, do not add any other explanations or descriptions.\nPost: {post}\nResponse:"

def prompt_qwen_nopersona_scale(post):
    return f"Your task is to annotate the post shown below for toxicity. Answer the following question on a scale from 1 to 5, where 1 stands for 'Not at all' and 5 stands for 'Very much so'. How toxic/hateful/disrespectful or offensive does this post seem to you? Respond with an integer from 1 to 5 only, do not add any other explanations or descriptions.\nPost: {post}\nResponse:"

### 1000 persona sample annotations on large dataset

In [None]:
run_params = {
    'personal_you': {
        'sample_personas': None,
        'n': None,
        'prompt_func': prompt_qwen_persona_binary,
        'generator': generator_binary,
        'out_dir': 'qwendata_llm_1_2',
        'out_file': '1000_persona_you'
    },
    'unpersonal_you': {
        'sample_personas': None,
        'n': 50,
        'prompt_func': prompt_qwen_nopersona_binary,
        'generator': generator_binary,
        'out_dir': 'qwendata_llm_1_2',
        'out_file': '1000_nopersona_you'
    }
}

In [None]:
def run_llm(pd_, pd_restart, sample_personas, n, prompt_func, generator, out_dir, out_file):
    
    if isinstance(pd_restart, pd.DataFrame):
        pd_results = pd_restart.copy()
    else:
        pd_results = pd_.copy()

    if not n == None: # if random run
        all_prompts = [prompt_func(text) for text in list(pd_results['text'])]
        adder = int(pd_results.columns[-1].split('_')[1]) if len(pd_results.columns)>3 else 0
    else: # if persona run
        all_prompts = {}

    for i in range(len(sample_personas)):
        if n == None: # if persona run
            list_prompts = [prompt_func(sample_personas['persona'].values[i], text) for text in list(pd_results['text'])]     
            persona_ix = sample_personas['personaId'].values[i]
            pd_results[f'persona_{persona_ix}'] = annotate_data(list_prompts, generator)
            all_prompts[persona_ix] = list_prompts
        else: # if random run
            pd_results[f'run_{i+1+adder}'] = annotate_data(all_prompts, generator)

    if not os.path.exists(os.path.join('personas',out_dir)):
        os.mkdir(os.path.join('personas',out_dir))
    pd_results.to_pickle(os.path.join('personas',out_dir,out_file+'.pkl'))
    
    return pd_results

In [None]:
pd_data = pd.read_pickle('personas/data_ext/lscale_majVote.pkl')
pd_personas = pd.read_pickle('personas/data_ext/sample_1000_personas.pkl')

In [None]:
n_batches = 100
batch_size = 10
start_time = time.time()

for i in range(n_batches):
    for k,v in run_params.items():
        
        if i == 0:
            pd_restart = None
        else:
            pd_restart = pd.read_pickle(os.path.join('personas',v['out_dir'],v['out_file']+'.pkl'))
            
        v['sample_personas'] = pd_personas.iloc[(i*batch_size):(i+1)*batch_size,:]
        
        %time pd_results = run_llm(pd_data, pd_restart, v['sample_personas'], v['n'], v['prompt_func'], v['generator'], v['out_dir'], v['out_file'])
        with open('personas/'+v['out_dir']+'/monitor.txt', 'a') as f:
            f.write(f'done: {batch_size*(i+1)}, to-do: {1000-batch_size*(i+1)}, time elapsed: {np.round((time.time()-start_time)/(60*60),4)}h, eta: {(np.round((time.time()-start_time)/(60*60),4)/(batch_size*(i+1)))*(1000-batch_size*(i+1))}h\n')

### annotations for performance bracket personas

In [None]:
pd_data = pd.read_pickle('personas/data_ext/lscale_majVote.pkl')

In [None]:
pd_personas = pd.read_pickle('personas/data_ext/pd_personas_cleaned.pkl')

pd_brackets = pd.read_pickle('personas/qwen_study_1_2/performance_brackets.pkl')
pd_brackets = pd_brackets.astype({'personaId': int})

In [None]:
pd_brackets = pd_brackets.merge(pd_personas, on='personaId', how='left')

In [None]:
pd_brackets.head(2)

In [None]:
bracket_results = {}
start_time = time.time()

for i in range(30):
    for m in range(3):
        pid, persona = pd_brackets['personaId'][i+30*m], pd_brackets['persona'][i+30*m]
        prompts = [prompt_qwen_persona_binary(persona, post) for post in pd_data['text']]
        annotations = pd_data.copy()
        for r in range(30):
            annotations[f'run_{r}'] = annotate_data(prompts, generator_binary)
        bracket_results[pid] = annotations
        with open('personas/qwen_study_1_2/performance_brackets_results.pkl', 'wb') as f:
            pickle.dump(bracket_results, f)
        with open('personas/qwen_study_1_2/bracket_monitor.txt', 'a') as f:
            f.write(f'done: {30*(i+1)}, to-do: {900-30*(i+1)}, time elapsed: {np.round((time.time()-start_time)/(60*60),4)}h, eta: {(np.round((time.time()-start_time)/(60*60),4)/(300*(i+1)))*(900-30*(i+1))}h\n')