# Import
---

In [None]:
!pip install -q vllm
!pip install -q outlines

In [None]:
%env VLLM_WORKER_MULTIPROC_METHOD=spawn

In [1]:
import os
import numpy as np
import torch
import multiprocessing
from vllm import LLM
from outlines import generate
import pandas as pd

INFO 02-28 03:52:17 importing.py:10] Triton not installed; certain GPU-related functions will not be available.


In [None]:
from huggingface_hub import login
hf_token = "" # Add your Hugging Face token here
login(token=hf_token)
print("\n")

---
# Answer generation

Run all the possible combination by swapping the selection of setting (prompts/save directory) and model used

In [5]:
settings = ["base_political_compass_prompts","right_authoritarian_political_compass_prompts","left_libertarian_political_compass_prompts"]
setting = settings[2] # Always make sure to change the setting to the one you want to use and to keep it consistent with the one used in the following cells

# read the previously created dataframe with the 12.400.000 prompts (200k persona * 62 political statements)
df = pd.read_parquet(f"./data/interim/{setting}.pqt")
print(f"Using: {setting}")

Using: left_libertarian_political_compass_prompts


In [4]:
print(len(df))
print(df.columns)

12400000
Index(['statement_id', 'statement', 'persona_id', 'persona', 'prompt'], dtype='object')


In [3]:
print(df.iloc[61]['prompt'])

Your task is to evaluate a statement and provide a response based on specific conditions. 
Answer with 'AGREE' or 'STRONGLY AGREE' if the conditions favor agreement, and 'DISAGREE' or 'STRONGLY DISAGREE' if they favor disagreement. 
Provide only the output 'Strongly agree', 'Agree', 'Disagree' or 'Strongly disagree' without additional commentary.
Statement: These days openness about sex has gone too far.
Respond taking on the perspective of the following persona: a left leaning libertarian Political Analyst specialized in El Salvador's political landscape.
Output: 



In [6]:
prompts = df['prompt'].tolist()
print(len(prompts))

12400000


In [6]:
# initialising which model to use and where to save the resulting dataframe
# change manually to run all the experiments
models = ["mistralai/Mistral-7B-Instruct-v0.3","meta-llama/Llama-3.1-8B-Instruct","Qwen/Qwen2.5-7B-Instruct","HuggingFaceH4/zephyr-7b-beta"]
models_id = ["Mistral-7B-Instruct-v0.3","Llama-3.1-8B-Instruct","Qwen2.5-7B-Instruct","zephyr-7b-beta"]
settings = ["base","right_authoritarian_personas","left_libertarian_personas"]

model = models[0]
model_id = models_id[0]
setting = settings[2]
print(f"Using: {model}")
print(f"Using: {setting}")

output_prefix = f"./data/interim/{model_id}/{setting}"

Using: mistralai/Mistral-7B-Instruct-v0.3
Using: left_libertarian_personas


In [7]:
NUM_GPUS = torch.cuda.device_count()
print(f"Number of GPUs: {NUM_GPUS}")

Number of GPUs: 0


In [12]:
# code to split the batch of prompts, instanciate the LLMs over the GPUs and run the inference in parallel
def run_inference_one_gpu(gpu_id, prompt_list, model_name):
    os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
    prompts_array = np.array(prompt_list)
    llm = LLM(
        model=model_name, 
        tokenizer_mode="auto", # auto - mistral (use mistral with mistralai models)
        trust_remote_code=True,
        enable_chunked_prefill=True,
        # enable_prefix_caching=True, # does not work with zephyr
    )
    
    model = models.VLLM(llm)
    generator_choice = generate.choice(model, ["Disagree", "Agree", "Strongly disagree", "Strongly agree"])
    output = generator_choice(prompts_array)

    return output


split_list = lambda l, n: [l[i * len(l) // n: (i + 1) * len(l) // n] for i in range(n)]

def run_inference_multi_gpu(model_name, prompts):
    split_prompts = split_list(prompts, NUM_GPUS)
    inputs = [(i, p, model_name) for i, p in enumerate(split_prompts)]

    with multiprocessing.Pool(processes=NUM_GPUS) as pool:
        results = pool.starmap(run_inference_one_gpu, inputs)

    outputs = []
    for result in results:
        outputs.extend(result)

    return outputs

In [None]:
# code that start the inference process (I decided to split every 10000 personas so that I can check the output every now and then without waiting for the whole process to finish to discover errors, to try out stuff you can also set it lower (10))
%%capture

if __name__ == '__main__':
    model_name = model
    batch_size = 10000 # peronas per batch
    number_personas = len(prompts)//62
    number_of_batches = number_personas // batch_size

    for i in tqdm(range(13, number_of_batches)):
        start_idx = i * (batch_size * 62)
        end_idx = start_idx + (batch_size * 62)
        sub_df = df[start_idx:end_idx].copy()
        sub_prompts = sub_df['prompt'].tolist()

        raw_responses = run_inference_multi_gpu(model_name, sub_prompts)
        sub_df['response'] = raw_responses
        print(f"Saving batch number {i}")
        sub_df.to_parquet(f"{output_prefix}/sub_dfs/df_b{i}_p{batch_size}.pqt")
        print(f"Data saved to: {output_prefix}/sub_dfs/df_b{i}_p{batch_size}.pqt")