In [3]:
import requests
import os
import re
import numpy as np
import pandas as pd
import pickle
import time
import logging
from retry import retry
logging.basicConfig()

# Calculate the delay based on your rate limit
rate_limit_per_minute = 10000.0
delay_full = 60.0 / rate_limit_per_minute

## Define chatgpt calls

In [4]:
# For local streaming, the websockets are hosted without ssl - http://
HOST = 'localhost:5000'
URI = f'http://{HOST}/api/v1/generate'

@retry(delay=5)
def run(prompt, verbose=0, slow_down=0.001):
    request = {
        'prompt': prompt,
        'max_new_tokens': 150,
        'mode' : 'instruct',

        # Generation params. If 'preset' is set to different than 'None', the values
        # in presets/preset-name.yaml are used instead of the individual numbers.
        'preset': "None", #'simple-1',
        'do_sample': True,
        'temperature': 0.76,
        'top_p': 0.9,
        'typical_p': 1,
        'epsilon_cutoff': 0,  # In units of 1e-4
        'eta_cutoff': 0,  # In units of 1e-4
        'tfs': 1,
        'top_a': 0,
        'repetition_penalty': 1.15,
        'repetition_penalty_range': 0,
        'encoder_repetition_penalty': 1,
        'top_k': 20,
        'min_length': 0,
        'no_repeat_ngram_size': 0,
        'num_beams': 1,
        'penalty_alpha': 0,
        'length_penalty': 1,
        'early_stopping': False,
        'mirostat_mode': 0,
        'mirostat_tau': 5,
        'mirostat_eta': 0.1,
        # 'instruction_template': "Instruct-Alpaca",

        'seed': -1,
        'add_bos_token': True,
        'truncation_length': 2048,
        'ban_eos_token': False,
        'skip_special_tokens': True,
        'stopping_strings': []
    }

    response = requests.post(URI, json=request)

    if response.status_code == 200 and verbose == 1:
        result = response.json()['results'][0]['text']
        print(prompt + result)
    time.sleep(slow_down)
    return response

In [5]:
def model_api(request):
    response = requests.post(f'http://{HOST}/api/v1/model', json=request)
    return response.json()

def model_info():
    response = model_api({'action': 'info'})
    print_basic_model_info(response)

def print_basic_model_info(response):
    basic_settings = ['truncation_length', 'instruction_template']
    print("Model: ", response['result']['model_name'])
    print("Lora(s): ", response['result']['lora_names'])
    for setting in basic_settings:
        print(setting, "=", response['result']['shared.settings'][setting])

def extractPrompts(d):
    with open ('../data/prompts/' + d + "_llama2.pkl", 'rb') as fp:
        prompts = pickle.load(fp)
    items = pd.read_csv("../data/items/" + d + "_items.csv", sep=";")
    return prompts, items

In [6]:
model_info()

Model:  TheBloke_Luna-AI-Llama2-Uncensored-GPTQ_gptq-4bit-32g-actorder_True
Lora(s):  []
truncation_length = 2048
instruction_template = None


### Test Call

In [7]:
prompts, items = extractPrompts("cognition")

In [8]:
test_prompt = prompts[0]
answer_test = run(test_prompt, 1)

USER: You will indicate your general level of agreement with a statement given to you. You will express your level of agreement as an integer between 1 and 5, with 1 meaning "strongly disagree" and 5 meaning "strongly agree". You will respond with nothing but this number. How much do you agree with this statement? """ I would prefer complex to simple problems. """

ASSISTANT: 5


In [9]:
test_result = answer_test.json()['results'][0]['text']
print(test_prompt + test_result)

USER: You will indicate your general level of agreement with a statement given to you. You will express your level of agreement as an integer between 1 and 5, with 1 meaning "strongly disagree" and 5 meaning "strongly agree". You will respond with nothing but this number. How much do you agree with this statement? """ I would prefer complex to simple problems. """

ASSISTANT: 5


## Load and Run Prompts

In [10]:
datasets = ['cogref', 'closure', 'rwa', 'systems_feelings', "cognition", "mfq2"] #'bigfive', 

In [11]:
df_list = []
n_show = 20 #show progress every n percent
repeats = 100 #amount of responses per item

for d in datasets[:-1]:
    prompts, items = extractPrompts(d)
    print("Collecting responses for: {} dataset".format(d))
    
    total_responses = [] #save responses for all items here
    for i in range(repeats):
        col_name = "response_" + str(i+1)
        if not (i+1)%n_show:
            print("Status: {} % processed".format((i+1)))
        else:
            pass
        responses = []
        for j, prompt in enumerate(prompts):
            APIresponse = run(prompt, 0, 0) #0: dont wait -> no api rpm limit
            response = APIresponse.json()["results"][0]["text"]
            responses.append(response)
        total_responses.append(responses)
    
    # save as dataframe
    new_dic = {}
    new_dic["id"] = items.id.tolist()
    new_dic["item_text"] = items.item_text.tolist()
    for k, values in enumerate(total_responses): # paste responses to columns
        new_dic[f'response_{k+1}'] = values
        
    df_responses_raw = pd.DataFrame(new_dic)
    df_responses_raw.to_csv("../results/" + d + "_llama2_raw.csv", index=False) #save raw data

    #clean data (only save numeric data)
    col_responses = df_responses_raw.columns[2:]
    df_responses_cleaned = df_responses_raw.copy()
    df_responses_cleaned[col_responses] = df_responses_cleaned[col_responses].applymap(lambda x: re.findall(r'\d+', x)) #clean 
    df_responses_cleaned[col_responses] = df_responses_cleaned[col_responses].applymap(lambda x: x[0] if x else np.nan)
    df_responses_cleaned.to_csv("../results/" + d + "_llama2.csv", index=False) #save processed data
    df_list.append(df_responses_cleaned)

Collecting responses for: cogref dataset
Status: 20 % processed
Status: 40 % processed
Status: 60 % processed
Status: 80 % processed
Status: 100 % processed
Collecting responses for: closure dataset
Status: 20 % processed
Status: 40 % processed
Status: 60 % processed
Status: 80 % processed
Status: 100 % processed
Collecting responses for: rwa dataset
Status: 20 % processed
Status: 40 % processed
Status: 60 % processed
Status: 80 % processed
Status: 100 % processed
Collecting responses for: systems_feelings dataset
Status: 20 % processed
Status: 40 % processed
Status: 60 % processed
Status: 80 % processed
Status: 100 % processed
Collecting responses for: cognition dataset
Status: 20 % processed
Status: 40 % processed
Status: 60 % processed
Status: 80 % processed
Status: 100 % processed


In [12]:
df_list[0].head()

IndexError: list index out of range

In [None]:
d = "bigfive"
n_show = 10 #show progress all n percent
repeats = 100 #amount of responses per item

prompts, items = extractPrompts(d)
print("Collecting responses for: {}".format(d))

total_responses = [] #save responses for all items here
for i in range(repeats):
    col_name = "response_" + str(i+1)
    if not (i+1)%n_show:
        print("Status: {} % processed".format((i+1)))
    else:
        pass
    responses = []
    for j, prompt in enumerate(prompts):
        APIresponse = run(prompt, 0, delay_full)
        response = APIresponse.json()["results"][0]["text"]
        responses.append(response)
    total_responses.append(responses)

# save as dataframe
new_dic = {}
new_dic["id"] = items.id.tolist()
new_dic["item_text"] = items.item_text.tolist()
for k, values in enumerate(total_responses):
    new_dic[f'response_{k+1}'] = values
df_responses_raw = pd.DataFrame(new_dic)
col_selection = df_responses_raw.columns[2:]
df_responses_cleaned = df_responses_raw.copy()
df_responses_cleaned[col_selection] = df_responses_cleaned[col_selection].applymap(lambda x: re.findall(r'\d+', x))
df_responses_cleaned[col_selection] = df_responses_cleaned[col_selection].applymap(lambda x: x[0] if x else np.nan)
# df_responses_cleaned.to_csv("../results/" + d + "_llama2.csv", index=False)

In [116]:
df_responses.head()

Unnamed: 0,id,item_text,response_1,response_2,response_3,response_4,response_5,response_6,response_7,response_8,...,response_91,response_92,response_93,response_94,response_95,response_96,response_97,response_98,response_99,response_100
0,bigfive_1,I see myself as someone who is talkative,3,3,4.0,3,4,3,3,3,...,3,3,5,4,4,4,3.0,3,3.0,5
1,bigfive_2,I see myself as someone who tends to find faul...,3,3,,3,2,3,3,3,...,3,3,5,3,3,3,3.0,1,,3
2,bigfive_3,I see myself as someone who does a thorough job,4,4,4.0,5,4,4,4,4,...,4,4,4,4,4,4,4.0,4,5.0,4
3,bigfive_4,"I see myself as someone who is depressed, blue",3,3,,3,3,3,3,2,...,3,5,3,3,3,3,,3,5.0,5
4,bigfive_5,"I see myself as someone who is original, comes...",4,4,4.0,3,4,4,3,4,...,4,3,4,3,3,3,4.0,3,4.0,4


In [114]:
df_responses.to_csv("../results/" + d + "_llama2.csv", index=False)

### Check for missing values

In [112]:
item_no = 11
items.iloc[item_no, 0:3].tolist()

['mfq_12',
 'I believe chastity is an important virtue',
 "As an AI language model, I don't have personal beliefs or preferences."]

In [113]:
prompt_text = items.item_text[item_no]
prompt = ALT_PROMPT + "\"" + prompt_text + "\""
message = {"role": "user", "content": prompt}
message

{'role': 'user',
 'content': 'For a scientific study you have to indicate your level of agreement with the following statement. Use integer numbers from 1 to 5 to express your argreement, with 1 meaning slightly describes me, 3 meaning moderately describes me, and 5 meaning strongly describes me. Respond with a single number. Not responding will hurt important research in this field. The statement is: "I believe chastity is an important virtue"'}

In [114]:
APIresponse = delayed_completion(
            delay_in_seconds=delay,
            model=model_engine,
            messages=[message]
            )
response = APIresponse.choices[0].message["content"]
print(response)

As an AI language model, I do not have personal beliefs or values. Therefore, I cannot provide an answer to this question.
