In [2]:
import hashlib
import itertools
import json
import random
from time import sleep
from typing import List, Dict, Tuple

import requests
from tqdm import tqdm

In [3]:
RAW_DATA_DIR: str = './raw'

In [4]:
# experiment setup

integrations: List[dict] = [
    {
        "provider": "local",
        "model": "mixtral:8x7b-instruct-v0.1-q6_K",
    },
    {
        "provider": "local",
        "model": "llama2:70b-chat-q6_K",
    },
]

personas: List[List[str]] = [
    ["alt_right"],
    ["conservative"],
    ["neutral"],
    ["liberal"]
]
languages: List[str] = ["Dutch", "English", "German"]

# topics with subtopics
topics: List[Dict[str, str | List[str]]] = [
    {
        "theme": "ukraine",
        "aspect": ["military conflict", "international stability", "energy prices"]
    },
    {
        "theme": "healthcare",
        "aspect": ["affordability", "privatization", "prevention", "research"]
    },
]

# number of iterations
x: int = 150

# hidden parameters (randomly chosen)
length: List[str] = ['few-word', 'single-sentence', 'short']

In [5]:
configurations: List[Tuple] = list(
    itertools.product(*[personas, languages, topics, range(x)])
)
random.shuffle(configurations)
configurations[0], len(configurations)

((['neutral'],
  'German',
  {'theme': 'ukraine',
   'aspect': ['military conflict',
    'international stability',
    'energy prices']},
  23),
 3600)

In [6]:
for integration in integrations:
    for persona, language, topic, _ in tqdm(configurations, desc=integration['model']):

        payload: dict = {
            "persona": persona,
            "integration": integration,
            "language": language,
            "topic": f'{topic["theme"]}, {random.choice(topic["aspect"])}',
            "length": random.choice(length)
        }

        response = requests.post('https://agents.twon.uni-trier.de/generate/', json=payload)

        if response.status_code == 500 or response.status_code == 502:
            print("500/502: Connection Error, too many request, try again later.")
            sleep(10)
            continue

        try:
            data: dict = response.json()

        except Exception as e:
            print(e, ':', response)
            continue

        sample: dict = {
            "persona": persona[0],
            "model": integration["model"],
            "topic": topic["theme"],
            "language": language,
            "text": data["response"],
            "retrieved_source": data["meta"]["retrieved_source"],
            "annotation": {
                "topic": None,
                "persona": None,
                "authenticity": None
            }
        }

        sample['id'] = hashlib.shake_256(str.encode(json.dumps(sample))).hexdigest(24)
        open(f'{RAW_DATA_DIR}/{sample["id"]}.json', 'w').write(json.dumps(sample, indent=4, ensure_ascii=False))
        sleep(1)

mixtral:8x7b-instruct-v0.1-q6_K:  44%|████▎     | 1567/3600 [3:14:11<12:45:58, 22.61s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


mixtral:8x7b-instruct-v0.1-q6_K:  54%|█████▎    | 1933/3600 [4:01:56<10:33:37, 22.81s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


mixtral:8x7b-instruct-v0.1-q6_K:  54%|█████▎    | 1934/3600 [4:02:56<15:43:42, 33.99s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


mixtral:8x7b-instruct-v0.1-q6_K:  54%|█████▍    | 1943/3600 [4:05:42<11:32:38, 25.08s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


mixtral:8x7b-instruct-v0.1-q6_K:  54%|█████▍    | 1944/3600 [4:06:42<16:21:52, 35.58s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


mixtral:8x7b-instruct-v0.1-q6_K:  57%|█████▋    | 2058/3600 [4:22:07<11:25:59, 26.69s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


mixtral:8x7b-instruct-v0.1-q6_K:  57%|█████▋    | 2059/3600 [4:23:07<15:43:07, 36.72s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


mixtral:8x7b-instruct-v0.1-q6_K: 100%|██████████| 3600/3600 [7:31:03<00:00,  7.52s/it]   
llama2:70b-chat-q6_K:   0%|          | 7/3600 [02:42<28:55:22, 28.98s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K:   6%|▌         | 219/3600 [1:12:47<28:11:41, 30.02s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K:  21%|██        | 751/3600 [3:58:58<23:09:43, 29.27s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K:  21%|██        | 752/3600 [3:59:58<30:27:58, 38.51s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K:  21%|██        | 753/3600 [4:00:58<35:34:58, 44.99s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K:  21%|██        | 754/3600 [4:01:58<39:08:55, 49.52s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K:  22%|██▏       | 797/3600 [4:17:06<24:32:10, 31.51s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K:  22%|██▏       | 798/3600 [4:18:06<31:11:45, 40.08s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K:  22%|██▏       | 799/3600 [4:19:06<35:51:17, 46.08s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K:  28%|██▊       | 992/3600 [5:19:29<20:09:42, 27.83s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K:  28%|██▊       | 993/3600 [5:20:29<27:09:39, 37.51s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K:  28%|██▊       | 994/3600 [5:21:29<32:03:35, 44.29s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K:  28%|██▊       | 1025/3600 [5:31:48<13:19:41, 18.63s/it]

500/502: Connection Error, too many request, try again later.


llama2:70b-chat-q6_K:  30%|██▉       | 1078/3600 [5:48:21<23:59:16, 34.24s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K:  41%|████      | 1477/3600 [7:53:40<16:36:53, 28.17s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K:  46%|████▌     | 1639/3600 [8:42:01<19:02:27, 34.96s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K:  47%|████▋     | 1707/3600 [9:02:38<15:19:41, 29.15s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K:  48%|████▊     | 1715/3600 [9:05:42<17:53:33, 34.17s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>
500/502: Connection Error, too many request, try again later.


llama2:70b-chat-q6_K:  48%|████▊     | 1726/3600 [9:09:01<15:35:41, 29.96s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K:  52%|█████▏    | 1867/3600 [9:51:01<10:19:15, 21.44s/it]

500/502: Connection Error, too many request, try again later.


llama2:70b-chat-q6_K:  54%|█████▎    | 1932/3600 [10:14:13<16:15:57, 35.11s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K:  56%|█████▋    | 2030/3600 [10:51:52<20:03:57, 46.01s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K:  58%|█████▊    | 2106/3600 [11:13:39<12:11:41, 29.39s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K:  66%|██████▌   | 2361/3600 [12:28:48<9:53:29, 28.74s/it] 

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K:  76%|███████▌  | 2736/3600 [14:27:03<7:46:12, 32.38s/it] 

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K:  78%|███████▊  | 2798/3600 [14:47:06<6:07:37, 27.50s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K:  79%|███████▉  | 2849/3600 [15:03:11<6:42:19, 32.14s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K:  84%|████████▍ | 3028/3600 [16:04:03<4:40:34, 29.43s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K:  99%|█████████▊| 3554/3600 [18:26:33<20:55, 27.28s/it]  

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K:  99%|█████████▉| 3572/3600 [18:32:34<13:58, 29.95s/it]

Expecting value: line 1 column 1 (char 0) : <Response [504]>


llama2:70b-chat-q6_K: 100%|██████████| 3600/3600 [18:40:02<00:00, 18.67s/it]
