In [46]:
import json
import typing

import pandas as pd
import requests
import tqdm

import config

In [47]:
CFG = config.Config()

In [48]:
MODEL: str = "mixtral:8x7b-instruct-v0.1-q6_K"

TASK_NAME: str = "political_ideology"
TASK_TEMPLATE: str = \
    """
    Classify the following message as ideologically liberal (0), ideologically neutral (1), or ideologically conservative (2). Ideology here is defined in the context of the German political system. Messages with no ideological content are classified as neutral. 
    
    Respond with only the predicted class (0 or 1 or 2) of the request.
    
    Text: {text}
    Class:
    """

In [60]:
user_content: pd.DataFrame = pd.read_parquet(CFG.final_files["user_content"]).reset_index("userId")
user_content

Unnamed: 0_level_0,userId,content,createdAt
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
66252255616a4cb7061ee32c,661d1639b8beabb58229451b,#UkraineKrieg: Beunruhigende Entwicklungen in ...,2024-04-21 14:27:33.315
66253e74616a4cb7061ee7cc,661d1639b8beabb58229451b,"Bedauerlich, dass gewaltsame Konflikte weiterh...",2024-04-21 16:27:32.919
66255134616a4cb7061eeaf4,661d1639b8beabb58229451b,Gewalt im Nahen Osten & Ukraine nicht isoliert...,2024-04-21 17:47:32.762
6627d8efa6b56b19b4300ae4,661d1639b8beabb58229451b,"Unterstützen wir unsere Verbündeten, respektie...",2024-04-23 15:51:11.067
6628b08fa6b56b19b4301867,661d1639b8beabb58229451b,Geschlossene Tür für Unsicherheit: US-Repräsen...,2024-04-24 07:11:11.797
...,...,...,...
662a070537d6395f42ca7bfd,662a070537d6395f42ca7bf9,Kleiner vermieter Junge noch nicht gefunden,2024-04-25 07:32:21.584
662a12fa37d6395f42ca87fe,662a12fa37d6395f42ca87fa,Lok Leipzig holt Jochen Seitz als Trainer,2024-04-25 08:23:22.984
662a30f137d6395f42ca99c6,662a30f137d6395f42ca99c2,Russland droht der USA,2024-04-25 10:31:13.258
662e892b3ae8346ce92c8e85,662e892b3ae8346ce92c8e81,Arian wird vermisst,2024-04-28 17:36:43.427


In [61]:
predictions: typing.Dict[str, str] = {}

In [62]:
for index, row in tqdm.tqdm(user_content.iterrows()):
    try:
        predictions[index] = requests.post(
            'https://inf.cl.uni-trier.de/',
            json={
                'model': MODEL,
                'system': TASK_TEMPLATE,
                'prompt': TASK_TEMPLATE.format(text=row.content)
            }).json()['response'].strip()

    except json.JSONDecodeError:
        print("Invalid JSON response, skipping to next sample.")


79it [36:01, 27.36s/it]


KeyboardInterrupt: 

In [63]:
predictions

{'66252255616a4cb7061ee32c': '1',
 '66253e74616a4cb7061ee7cc': '1\n\nThe text expresses regret over ongoing violent conflicts and emphasizes the importance of diplomatic solutions and humanitarian support. While it may reflect a value orientation associated with liberal ideology, it does not contain any explicitly partisan or ideologically-charged content in the context of the German political system.',
 '66255134616a4cb7061eeaf4': "1\n\nThe text is calling for international cooperation to address conflicts in the Middle East and Ukraine, which is a general sentiment that doesn't inherently align with either liberal or conservative ideologies in the German political context.",
 '6627d8efa6b56b19b4300ae4': '1 (The text does not contain any ideologically conservative or liberal content specific to the German political system. It emphasizes diplomacy and respect for state sovereignty, which are generally neutral principles.)',
 '6628b08fa6b56b19b4301867': '1',
 '6628c354a6b56b19b4301c88':

In [64]:
pd.Series({
    key: val.split("\n")[0][-1]
    for key, val in predictions.items()
}, name=TASK_NAME)

66252255616a4cb7061ee32c    1
66253e74616a4cb7061ee7cc    1
66255134616a4cb7061eeaf4    1
6627d8efa6b56b19b4300ae4    )
6628b08fa6b56b19b4301867    1
                           ..
6626a47ad4ec55a25b931682    0
6626bbe8d4ec55a25b9319ed    0
66277768d4ec55a25b931ed4    )
6627fe6ea6b56b19b430132b    )
66280c7fa6b56b19b4301644    )
Name: political_ideology, Length: 79, dtype: object

In [68]:
predicted: pd.DataFrame = user_content.join(pd.Series({
    key: val.split("\n")[0][-1]
    for key, val in predictions.items()
}, name=TASK_NAME))
# predicted.to_csv(f'{CFG.report_dir}/prediction.{TASK_NAME}.csv')
predicted["political_ideology"].value_counts()

political_ideology
)    28
0    26
1    22
2     3
Name: count, dtype: int64

In [70]:
predicted[predicted["political_ideology"] == "2"]

Unnamed: 0_level_0,userId,content,createdAt,political_ideology
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
6628c354a6b56b19b4301c88,661d1639b8beabb58229451b,Aufmerksamkeit der NATO-Gipfelteilnehmer: Die ...,2024-04-24 08:31:16.138,2
66250185616a4cb7061edd83,661d1641b8beabb58229451f,#UkraineKrieg: Bedrohung durch Kamikaze-Drohne...,2024-04-21 12:07:33.397,2
662a126b889820447de1272a,661d1641b8beabb58229451f,Unterstützen wir souveräne Nationen wie die Uk...,2024-04-25 08:20:59.202,2
