This notebook compares annotations using different GLLMs with codebooks based prompts of Boukes 2024, Jaidka 2022 and Naab 2025 on their respective datasets

In [1]:
import requests
import os
from dotenv import load_dotenv
load_dotenv("sjoerdAzure.env")  # Load environment variables from .env file
import time

import typing

from sklearn.metrics import cohen_kappa_score, classification_report
import krippendorff
import yaml

import pandas as pd

import config
import src
import tqdm
import json
import numpy as np
import logging

#import cltrier_lib as lib
import pyreadstat
import yaml
pd.set_option('display.max_colwidth', 100) 
#set up helper variables and functions:
CFG = config.Config()


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "c:\Users\sstolwi\Github\llmdiv\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "c:\Users\sstolwi\Github\llmdiv\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "c:\Users\sstolwi\Github\llmdiv\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start
    self.io_loop.start()
  File "c:\Users\sstolwi\Git

In [2]:
#load data:

# Naab2025 data
naab = pd.read_parquet(f'{CFG.report_dir}/Naab2025.parquet')
# Jaida2024 data
jaidka = pd.read_parquet('data/jaidka2022/TwitterDeliberativePolitics2.parquet')
# Boukes
boukes = pd.read_parquet('data/publicsphere/publicsphere.cardiff_prompt_classify_anon.parquet')
boukesT = pd.read_csv('data/publicsphere/full_data.csv') # this includes the comments
#the Boukes2024 data is a subset of this, select YT part of Boukes in line with Boukes2024:
boukesTYT = boukesT[boukesT['Platform'] == 1]
#MH_clemm 2024
MHclemm = pd.read_csv('data/MH_BClemm_data/Ideo_Val_GPT_USA.csv') #uses same prompt as Boukes, but different data (and here we use German, but could ask for English and (?) Spanish (?))

In [274]:
MHclemm

Unnamed: 0,text,label,GPT1,GPT2,GPT_Reconciled
0,"ItÂ’s so great to see our small businesses and entrepreneurs are creating jobs for folks in WI, ...",2,2,2,2
1,Thank you Romania Dukes &amp; Mothers Fighting for Justice for putting together a wonderful toy ...,1,1,1,1
2,"My statement on today's UN Security Council resolution, condemning Israel for its settlement act...",0,1,1,1
3,RT @SharedHope: @RepHartzler @RepChrisSmith &amp; @RepAnnWagner speak on the passage of HR 2200 ...,1,1,1,1
4,Starting first tele-townhall now to answer your questions about the issues important to you. To ...,1,1,1,1
...,...,...,...,...,...
630,Great news for our nation's #TransCommunity: Pentagon to let transgender individuals serve openl...,0,0,0,0
631,"With trepidation I enter the #RTCADinner, wherein lurks the Snarkstress, @Kamenta!",1,1,1,1
632,Dropped by @WVUfootball practice today and met Boone county native and Defensive Coordinator Ton...,1,1,1,1
633,"The American people are tired of top-down mandates from DC, extreme executive overreach, excessi...",2,2,2,2


list the variables we want to use:
**rationality** - prompt: 'rationality_simple2', 'rationality_jaidka',        
  manual coding: "Justification" (Jaidka), RATIONALITY_DUMMY
**incivility** - prompt: 'incivility_simple2', 'incivility_jaidka',  civility_jaidka         
  manual coding: INCIVILITY_DUMMY, Incivility_tot ('Uncivil_abuse', 'Empathy_Respect'), Uncivil_abuse, "Empathy_Respect" (jaidka)
**interactivity** - prompt: 'interactivity_acknowledgement_simple', interactivity_acknowledgement_jaidka       
  manual coding: INTERACTIVITY_DUMMY, Reciprocity (Jaidka)
**diversity/ideology** - prompt: 'political_ideology_US', 'political_ideology' (german)  -> no ideology in Jaidka
  manual coding: LIBERAL_DUMMY, CONSERVATIVE_DUMMY
**political_dum** - prompt: 'political_post', political_post_jaidka 
  manual coding: HAS_OPINION_DUMMY

In [None]:
#model variants:
# llama31_8b
# llama31_70b
# gpt4o
# gpt4Turbo

#optional:
# gpt4 (OPenAI, microsoft)
# llama33_70b (Meta)
# Gemma3:22b (US, google) (based on Gemini 2)
# "id":"deepseek-r1:70b","name":"DeepSeek-R1 (china)
# qwen2.5:70b (china)
# mistral-large:123b","name":"Mistral" (europe)


list the annotations we have available per dataset:
**Jaidka**: 'rationality_simple2_Llama8b_dum', 'rationality_simple2_gpt4o_dum','rationality_jaidka_Llama8b_dum','rationality_jaidka_gpt4o_dum', 'civility_jaidka_gpt4o_dum', incivility_simple2_gpt4o_dum, incivility_jaidka_gpt4o_dum, reciprocity_jaidka_gpt4o_dum, interactivity_acknowledgement_simple_gpt4o_dum, political_post_jaidka_gpt4o_dum, political_post_gpt4o_dum
**Boukes**: 
*rationality*: rationality_simple2_dum (+ rationality_simple_dum, rationality_combine_dum, rationality_combine_exactexample_dum, rationality_prompt_dum (aggregation of indicator prompt scores)), rationality_simple2_gpt4o_system_dum, rationality_simple2_gpt4T_system_dum, rationality_simple2_small_dum, 
*incivility*: incivility_simple2_dum (+ incivility_simple_dum, incivility_combine_dum), incivility_prompt_dum (aggregation of indicator prompt scores), incivility_simple2_gpt4o_system_dum, incivility_simple2_gpt4T_system_dum, incivility_simple2_small_dum 
*interactivity*: interactivity_acknowledgement_simple_dum (+ interactivity_acknowledgement_simple2_dum), interactivity_acknowledgement_simple_gpt4o_system_dum, interactivity_acknowledgement_simple_gpt4T_system_dum, interactivity_acknowledgement_simple_small_dum (+interactivity_acknowledgement_simple_small2_dum)
*diversity*: political_liberal_US_dum, political_conservative_US_dum, political_liberal_US_gpt4o_system_dum, political_liberal_US_gpt4T_system_dum, political_conservative_US_gpt4o_system_dum, political_conservative_US_gpt4T_system_dum, political_liberal_US_small_dum, political_conservative_US_small_dum
*political_dum*: political_opinion_US_dum, political_opinion_US_gpt4o_system_dum, political_opinion_US_gpt4T_system_dum, political_opinion_US_small_dum (either liberal/conservative; Boukes)

#to be made prompts:
#Naab:
 Argumente/ v75 - rationality in combination with quellenbelege?
(Operationalisierung erstellt in Anlehnung an Ziegele & Quiring, 2015)
Argumente sind Aussagen, die dazu dienen sollen, Behauptungen zu begründen oder zu widerlegen. Es wird kodiert, ob ein Nutzerkommentar Argumente verwendet, um eine oder mehrere geäußerte(n) Meinung(en) zu begründen.
Von Interesse ist hier, ob die Nutzer ihre eigenen Aussagen mit Argumenten unterstützen oder ob diese unbegründet bleiben. Der Leser muss sich bei zusammenhängenden Aussagen im Kommentar fragen, ob die Frage nach dem „warum“ beantwortet wird. Anders gesagt: Gut erkennbar ist ein Argument, wenn man es problemlos mit einem Kausalsatz (weil...) an eine Behauptung anfügen kann oder wenn man eine begründende „Wenn-Dann-Beziehung“ zwischen den Sätzen herstellen kann. Eine Aneinanderreihung von Aussagen zählt nicht als Argumentation.
! Achtung: Es geht hier nicht um die Qualität der Argumente! Es wird nur kodiert, ob Begründungen für Aussagen/Behauptungen/Positionen angeführt werden (s. Beispiele)!
0
Keine Argumente
Im Kommentar werden keine Argumente verwendet. Behauptungen werden nicht begründet.
Der Abtreibungsparagraph muss abgeschafft werden.
Alles ist wichtiger als Fußball.
Schade, dass die FDPs in Hessen nochmal geschafft hat aber für Schwarz-Gelb reicht‘s trotzdem nicht mehr. Dafür ist neben rot-rot-grün jetzt auch ‘ne klassische Ampel möglich. (Fragen nach dem „Warum“ bleiben unbeantwortet, Sätze lassen sich nicht mit „weil“ verknüpfen)
1
Mind. ein Argument
Im Kommentar wird mindestens ein Argument, u.U. auch mehrere Argumente, formuliert.
Deutschland hätte Snowden schon lange Asyl anbieten müssen: Schließlich ist er ein politisch Verfolgter, dessen Gesundheit auf dem Spiel steht, wenn er von seinen wildgewordenen Landsleuten aufgegriffen werden sollte.
In erster Linie pflichte ich der GDL bei, (denn) sie demonstrieren wie es geht. In zweiter Linie möchte ich den Arbeitgebern das Armutszeugnis ausstellen, (denn) ich hab zum ersten Mal das Gefühl dass einer das richtige macht.
62
99
Nicht eindeutig zuzuordnen.


Quellenbelege im Kommentar/ v76
Es wird kodiert, ob im Kommentar nachprüfbare Belege für Fakten, Tatsachenbehauptungen, Argumente oder Meinungen genannt werden. Dies passiert, indem auf Quellen verwiesen wird. Ein Argument, eine Prognose oder eine Werthaltung alleine sind kein Beleg. Der Beleg macht eine Aussage prinzipiell für jeden intersubjektiv nachprüfbar. Ein Verweis auf Quellen ist ausreichend, die Quellen müssen nicht (können aber) verlinkt sein. Werden zwar Fakten genannt, diese sind jedoch nicht belegt und nur mit größerem Aufwand intersubjektiv prüfbar, zählt dies nicht als Beleg.
0
Kein Beleg genannt
Ich glaube, dass auch der Asylmissbrauch ein Problem ist. Aber sowas ist auch schwer zu erkennen.
Assad wird weiter Chemiewaffen einsetzen, er weiß, er kann sich auf seine russischen und chinesischen Freunde verlassen. (Spekulation und Zukunftsprognosen sind keine Fakten)
Alle außer Syrien und Iran haben die Amis im Sack und wenn das Problemchen gelöst wurde, dann wird wohl Russland spüren wer Herr im Hause ist und das ist das eigentliche Ziel der USA.
In Frankreich gilt ein Mindestlohn von 11,50€, auch das trägt dazu bei, dass Deutschland mit seinem Niedriglohn den europäischen Nachbarn schwer zusetzt!
Der Fingerabdrucksensor hat nichts mit NSA zu tun. Wer das denkt ist auf dem Holzweg. Der Fingerabdrucksensor ist ja nur für die Entsperrung des Telefons. 64Bit ist nice to have.
1
Mindestens ein Quellen-beleg genannt
Der Kommentator enthält Verweise auf Quellen, mit
Die Tagesschau meldete gestern neben den 20 Toten auch über 70 Verletzte.
63
denen Fakten oder sonstige Aussagen belegt werden. Dadurch sind sie prinzipiell für jeden intersubjektiv nachprüfbar. Ein Verweis auf Quellen ist ausreichend, sie müssen nicht (können aber) verlinkt sein.
Die CDU kommt auf 41,5 Prozent, SPD, Grüne und Linke zusammen auf 42, 7. Und das kann sich jedes Kind auf Wikipedia ansehen: https://de.wikipedia.org/wiki/Bundestagswahl_2013
99
Nicht eindeutig zuzuordnen.


Inziviles im Kommentar/ v711 - check definition
(Operationalisierung erstellt in Anlehnung an Ziegele & Quiring, 2015; vgl. auch Coe, Kenski & Rains, 2014; Papacharissi, 2004)
Inzivilität wird kodiert, wenn im Kommentar die folgenden Aussagetypen vorkommen, die einen unnötig respektlosen Ton gegenüber den Diskussionsteilnehmern, Themen, nicht-anwesenden Dritten oder Journalisten/Medien transportieren:
•
Beschimpfungen: Bösartige oder herabsetzende Wörter, die an Personen oder Personengruppen gerichtet sind.
o
Das verachtenswerte politische Unkraut FDP ist jetzt dort, wo es hingehört. (Thema Landtagswahl in Hessen)
•
Abfällige Bewertungen: Bösartige oder herabsetzende Wörter und Sätze, die gegen eine Idee, einen Plan, ein Verhalten oder eine Politik gerichtet sind.
70
o
Klassisches Eigentor geschossen mit diesem Schrott Handy! (Thema iPhone-Vorstellung)
•
Lügen: Explizite oder implizite Aussagen, dass eine Idee, ein Plan, ein Verhalten oder eine Politik unehrlich sind.
o
Und welche "Glaubwürdigkeit" hat ein Präsident, der die Dauer-Lügen seiner Geheimdienste vertritt, ÜBERHAUPT noch? (Thema Syrien-Konflikt)
•
Vulgarität: Obszönitäten oder eine vulgäre Sprache, die im persönlichen Gespräch unangebracht wäre.
o
Es gibt genug Spinner, die ihre eigene unzureichende Schwanzlänge kompensieren müssen. (Thema iPhone-Vorstellung)
•
Abschätzige Bemerkungen: Herabsetzende Bemerkungen über die Art und Weise, wie eine Person auftritt oder kommuniziert.
o
Damit fühlst du dich nun überlegen und toll, hm? Infantiles Gehabe und Profilierung. (Thema Veggie-Day)
•
Stereotype: Eine vereinfachende, verallgemeinernde, schematische Reduzierung einer Erfahrung, Meinung oder Vorstellung auf ein meist verfestigtes, oft ungerechtfertigtes und emotional aufgeladenes Vorurteil. Stereotype bilden eine Voraussetzung für die Diskriminierung von Minderheiten und die Ausbildung von Feindbildern, Rassismus und Sexismus. Oftmals – aber nicht immer – ist die Kommunikation von Stereotypen politisch unkorrekt. So werden zum Beispiel einzelne Personen mit einer Gruppe gleichgesetzt, oder auf die Haupteigenschaften einer Gruppe reduziert.
o
Der Deutsche hasst den Mainstream, kauft bei KIK und Aldi, schmeißt 2 Jahre alte Handys auf den Müll und fährt mit Monatskarte zur Atomkraftdemo. So ambivalent spießig wie der/die Deutsche ist wohl keine Nation.
o
Frauen sind zum Putzen da!
o
Du bist doch ein Hauptschüler, so beschränkt wie du dich ausdrückst.
•
Absprechen von Rechten (individuell): Anderen Diskussionsteilnehmern werden Menschen- und Persönlichkeitsrechte abgesprochen
z. B. Angriffe auf die Meinungsfreiheit oder persönliche Freiheit (Recht, Leben frei zu gestalten, auf finanziellen Wohlstand und gesicherte Existenz), Androhung von Gewalt, einschließlich impliziter Drohungen.
o
Dir gehört der Mund verboten!; Leute mit Einstellungen wie deiner sollten nicht wählen dürfen.
o
Die sollten alle im Meer ersaufen;
•
Bedrohung der Demokratie/ Gefährdung demokratischer Werte (gesellschaftlich): Der Kommentar trifft Aussagen, die vermuten lassen, dass die demokratischen Werte/ die Demokratie nicht wertgeschätzt oder diese gar bedroht werden.
! Achtung: Die Artikulation von Kritik und Meinungsverschiedenheiten wird nicht als Inzivilität kodiert, sofern diese Artikulation angemessen höflich und sachlich erfolgt. In anderen Worten: Inzivilität ist nicht die Artikulation von Kritik und Meinungsverschiedenheiten an sich, sondern eine unangemessene Art und Weise, diese zu formulieren.
0
Keine Inzivilität
Der Kommentar ist zivil und höflich formuliert. Er transportiert keinen unnötig respektlosen Ton ggü. Anderen bzw. enthält keine der oben genannten Aussagetypen.
71
1
Vereinzelt inzivil
Der Kommentar ist überwiegend zivil und höflich formuliert. Er enthält jedoch vereinzelt die o.g. Aussagetypen.
2
Überwiegend / ausschließlich inzivil
Der Kommentar ist kaum oder nicht zivil und höflich formuliert. Er enthält überwiegend oder ausschließlich die o.g. Aussagetypen.
99
Nicht eindeutig zuzuordnen.


Fragen/ v7135 -> interactivity if either?
(Operationalisierung erstellt in Anlehnung an Ziegele & Quiring, 2015)
Hier wird kodiert, ob Autor innerhalb eines Kommentars Informations-, Wissens- oder Verständnisdefizite aktiv kommunizieren und ausgleichen wollen, oder ob rhetorische Fragen gestellt werden, auf die eigentlich keine Antwort erwünscht ist. Es wird kodiert, ob überhaupt Fragen gestellt werden, nicht aber, an wen diese ggf. gerichtet sind.
a)
Echte Fragen
Als echte Fragen werden alle Aussagen betrachtet, die mit einem Fragezeichen enden oder von ihrer Satzstruktur her als Frage aufgefasst werden können und die ein echtes Informationsbedürfnis anzeigen. Nur wenn eindeutig erkennbar ist, dass der Autor am Erhalt von Informationen/Meinungen interessiert ist, wird die Frage als „echt“ kodiert. Typen von „echten“ Fragen sind u. a. Wissens-, Einstellungs- und Verständnisfragen.
Bsp.: Kennt sich hier einer mit dem Völkerrecht aus? Wieso kann Obama überhaupt überlegen einen ‘Alleingang‘ zu starten?
b)
Rhetorische Fragen
Enthält der Kommentar eine oder mehrere rhetorische Fragen? Rhetorische Fragen werden vor allem als Argumentationsstrategie einsetzt, um die eigenen Positionen durchzusetzen. Sie zielen nicht auf Informationsgewinn ab, sondern der Autor will vorrangig Reaktionen wie Zustimmung bzw. Ablehnung erhalten.
! Achtung: Rhetorische Fragen zusätzlich beim Diskussionsfaktor Kontroverse kodieren.
Kodieranweisung: Rhetorische Frage schlägt echte Frage → sobald eine rhetorische Frage formuliert wird, wird eine „2“ kodiert.

maybe do the following analyses:
-baseline differences between slightly differing prompts on Boukes - best vs long vs best with slight word change vs long exact example vs prompt_dum (indicator aggregation) -> only have this for Llama3.1:70b
-model differences on Boukes - best prompt Llama3.1:70b vs 8b vs GPT4T vs GPT4o
-codebook differences on Boukes - Jaidka vs Boukes 
-dataset differences - best prompt Llama3.1:70b vs 8b vs GPT4o Jaidka vs Boukes -> need to run Llama3.1:70b on Jaidka, but no longer available via Trier -> could run both datasets with Llama3.3:70b instead
-error analysis - how do differences between models overlap with differences between models and manual data
-downstream effect on Boukes2024

maybe leave out indicator aggregation prompt, only focus on best vs slight word change vs arbitrary change (Barrie ea 2025 prompt stability),  try run baseline for Boukes and Jaidka with GPT4o (azure) and Llama3.3:70b (Trier)
compare model differences on Boukes also for other models if possible (but at least add L3.3:70b)

drop Naab data? 
ask MH for english data for out of sample comparison on political_dum and ideology
note we only have low-temperature anotations for Llama available

#TO DO:
-classify Boukes, Jaidka and MHdata with options_low, Llama3.3:70b for Boukes and Jaidka prompts
-classify Boukes with prompt stability/slightly different prompts with Llama3.3:70b and gpt4o -> use Barrie ea 2025 approach of paraphrasing prompt with increasing temperature, perhaps use Llama instead of pegasus for ease of use? - report prompt variations in appendix for manual validation
-error analysis
-downstream effects on Boukes

In [3]:
#set up helper variables and functions:
CFG = config.Config()

def load_json(path: str):
    with open(path, encoding='utf-8') as fp:
        return json.load(fp)
    
#set option variables:

#set options to low temperature (0,1):
options_low_str = """
seed: 42
temperature: 0.1
"""
options_low = yaml.safe_load(options_low_str)

#add alternative seed to options_low:
options_low2_str = """
seed: 2
temperature: 0.1
"""
options_low_seed2 = yaml.safe_load(options_low2_str)

#apparently the 3.1 70b model is no longer available via Trier...
MODELsmall: str = 'llama3.1:8b'
MODEL33large: str = 'llama3.3:70b' # options: 'gemma:7b-instruct-q6_K', 'gemma2:27b-instruct-q6_K', 'llama3.1:8b-instruct-q6_K', 'llama3.1:70b-instruct-q6_K', 'mistral:7b-instruct-v0.3-q6_K', 'mistral-large:123b-instruct-2407-q6_K', 'mixtral:8x7b-instruct-v0.1-q6_K', 'mixtral:8x22b-instruct-v0.1-q6_K', 'phi3:14b-medium-128k-instruct-q6_K' or 'qwen2:72b-instruct-q6_K'
MODELgpt4o = "nf-gpt-4o-2024-08-06" # in principe is er nu van elk model een nf (no filter) en een normale versie beschikbaar, de no filter versies zijn alleen voor onderzoekers beschikbaar voor analyze van content die niet door de filter heen zou komen.
MODELgpt4T = "nf-gpt-4-turbo" # Can be gpt-35-turbo, gpt-4-turbo, gpt-4 or Meta-Llama-3-8B-Instruct.
#MODELGPT41 = "nf-gpt-4.1-2025-04-14" # Can be gpt-35-turbo, gpt-4-turbo, gpt-4 or Meta-Llama-3-8B-Instruct.
MODEL33largeAzure = 'Llama-3.3-70B-Instruct' #azureml://registries/azureml-meta/models/Llama-3.3-70B-Instruct/versions/4 / options:  "data": [
MODEL31largeAzureNF = 'nf-Llama-3.1-70b-instruct'
MODELDSR1_70b = 'deepseek-r1:70b' # DeepSeek-R1 (china)
MODELDSR1_7b = 'deepseek-r1:7b' # DeepSeek-R1 (china)
MODELQ25_72b = 'qwen2.5:72b' # Qwen2.5 70b (china)


options_zero_str = """
seed: 42
temperature: 0
"""
options_zero = yaml.safe_load(options_zero_str)

options_zero2_str = """
seed: 2
temperature: 0
"""
options_zero_seed2 = yaml.safe_load(options_zero2_str)

options_DS_zero_str = """
seed: 42
temperature: 0
max_tokens: 1
"""
options_DS_zero = yaml.safe_load(options_DS_zero_str)

temperature_0 : int = 0
temperature_01 : float = 0.1
SEED: int = 42
SEED43: int = 43
SEED2: int = 2
MAX10: int = 10
MAX15: int = 15
TOPP1: int = 1

options_creative_str = """
seed: 42
temperature: 0.7
topp: 0.8
"""
options_creative = yaml.safe_load(options_creative_str)

options_large_str = """
seed: 42
temperature: 0
num_predict: 2000
"""
options_large = yaml.safe_load(options_large_str)

#load environment variables:
api_key = os.environ.get('sjoerd_key')

#setttings:
api_endpoint = "https://ai-research-proxy.azurewebsites.net/chat/completions"
api_endpoint_embed = "https://ai-research-proxy.azurewebsites.net/embeddings"
####### API REQUEST FORMATTING ######
headers = {
    "Content-Type": "application/json",
    "Authorization": "Bearer " + api_key
}

In [4]:
#load the datasets to merge results with:
dataset_w_pred_2 = pd.read_json(f'{CFG.report_dir}/publicsphere.cardiff_prompt_classify_s.json')
dataset_w_pred_anon = pd.read_parquet('data/publicsphere/publicsphere.cardiff_prompt_classify_anon.parquet')

In [None]:
#classify Boukes with Llama3.3:70b

#pubspherepromptsrunall = ['rationality_simple2', 'rationality_jaidka', 'incivility_simple2', 'incivility_jaidka',  'civility_jaidka', 'interactivity_acknowledgement_simple', 
# 'interactivity_acknowledgement_jaidka', 'political_ideology_US', 'political_post', 'political_post_jaidka', 'rationality_simple2_para1', 'incivility_simple2_para1', 
# 'interactivity_acknowledgement_simple_para1', 'political_ideology_US_para1', 'political_post_para1', 'rationality_simple2_para2', 'incivility_simple2_para2', 
# 'interactivity_acknowledgement_simple_para2', 'political_ideology_US_para2', 'political_post_para2', 'interactivity_acknowledgement_simple_simpa1', 'rationality_simple2_simpa1', 
# 'incivility_simple2_simpa1', 'political_ideology_US_simpa1', 'political_post_simpa1'] 
 

predictions4b: typing.Dict[str, np.ndarray] = {
    label: (
        src.PromptClassify
        .from_json(path)
        (boukesTYT["commentText"], model=MODEL33large, options=options_low)
    )
    for label, path in CFG.prompt_classify_files.items() if label in pubspherepromptsrun4b
}

classifying incivility_para1: 100%|██████████| 3132/3132 [15:44<00:00,  3.32it/s]
classifying incivility_para2: 100%|██████████| 3132/3132 [15:20<00:00,  3.40it/s]
classifying incivility_simpa1: 100%|██████████| 3132/3132 [15:19<00:00,  3.41it/s]
classifying interactivity_acknowledgement_simple_para1: 100%|██████████| 3132/3132 [15:20<00:00,  3.40it/s]
classifying interactivity_acknowledgement_simple_para2: 100%|██████████| 3132/3132 [15:20<00:00,  3.40it/s]
classifying interactivity_acknowledgement_simple_simpa1: 100%|██████████| 3132/3132 [15:14<00:00,  3.42it/s]


In [189]:
#classify Boukes with Llama3.3:8b

#then change seed to 2 (options_low_seed2) to run the intraprompt annotation reliability benchmark for the Boukes2024 prompts:
pubspherepromptsrun_intra_L8b = ['rationality_simple2', 'incivility_simple2',  'interactivity_acknowledgement_simple',
    'political_ideology_US', 'political_post']


predictions_intra_L8b: typing.Dict[str, np.ndarray] = {
    label: (
        src.PromptClassify
        .from_json(path)
        (boukesTYT["commentText"], model=MODELsmall, options=options_low_seed2)
    )
    for label, path in CFG.prompt_classify_files.items() if label in pubspherepromptsrun_intra_L8b
}

classifying incivility_simple2: 100%|██████████| 3132/3132 [08:01<00:00,  6.50it/s]
classifying interactivity_acknowledgement_simple: 100%|██████████| 3132/3132 [18:13<00:00,  2.87it/s]
classifying political_ideology_US: 100%|██████████| 3132/3132 [08:29<00:00,  6.15it/s]
classifying political_post: 100%|██████████| 3132/3132 [07:47<00:00,  6.70it/s]
classifying rationality_simple2: 100%|██████████| 3132/3132 [07:44<00:00,  6.74it/s]


In [85]:
#zero-temperature annotation reliability benchmark for the Boukes2024 prompts:
pubspherepromptsrun_L8b = ['rationality_jaidka', 'incivility_jaidka']
pubspherepromptsrun_L8b2 = ['civility_jaidka', 'interactivity_acknowledgement_jaidka', 'political_post_jaidka']
pubspherepromptsrun_L8bpara = [ 'rationality_simple2_para1']
pubspherepromptsrun_L8bpara2 = [ 'incivility_para1']

pubspherepromptsrun_L8bpara3 = ['interactivity_acknowledgement_para1', 'political_ideology_US_para1', 'political_post_para1']
pubspherepromptsrun_L8bpara4 = ['rationality_simple2_para2', 'incivility_para2', 
 'interactivity_acknowledgement_para2', 'political_ideology_US_para2', 'political_post_para2']
pubspherepromptsrun_L8bsimpa1 = ['interactivity_acknowledgement_simpa1']
pubspherepromptsrun_L8bsimpa2 = [ 'rationality_simple2_simpa1'] 
pubspherepromptsrun_L8bsimpa3 = ['incivility_simpa1', 'political_ideology_US_simpa1', 'political_post_simpa1']


predictions_intra_L8b_zero_simpa3: typing.Dict[str, np.ndarray] = {
    label: (
        src.PromptClassify
        .from_json(path)
        (boukesTYT["commentText"], model=MODELsmall, options=options_zero)
    )
    for label, path in CFG.prompt_classify_files.items() if label in pubspherepromptsrun_L8bsimpa3
}

classifying incivility_simpa1: 100%|██████████| 3132/3132 [08:29<00:00,  6.15it/s]
classifying political_ideology_US_simpa1: 100%|██████████| 3132/3132 [10:58<00:00,  4.76it/s]
classifying political_post_simpa1: 100%|██████████| 3132/3132 [09:06<00:00,  5.73it/s]


In [16]:
#zero-temperature annotation reliability benchmark for the Boukes2024 prompts:
pubspherepromptsrun_L33 = ['rationality_jaidka', 'incivility_jaidka', 'civility_jaidka', 'interactivity_acknowledgement_jaidka', 'political_post_jaidka', 'rationality_simple2_para1', 'incivility_para1', 'interactivity_acknowledgement_para1', 'political_ideology_US_para1', 'political_post_para1']
pubspherepromptsrun_L33para2 = ['rationality_simple2_para2', 'incivility_para2', 
 'interactivity_acknowledgement_para2', 'political_ideology_US_para2', 'political_post_para2']
pubspherepromptsrun_L33simpa1 = ['interactivity_acknowledgement_simpa1', 'rationality_simple2_simpa1', 'incivility_simpa1', 'political_ideology_US_simpa1', 'political_post_simpa1']


predictions_L33_zero1: typing.Dict[str, np.ndarray] = {
    label: (
        src.PromptClassify
        .from_json(path)
        (boukesTYT["commentText"], model=MODEL33large, options=options_zero)
    )
    for label, path in CFG.prompt_classify_files.items() if label in pubspherepromptsrun_L33
}

classifying civility_jaidka:   0%|          | 0/3132 [00:00<?, ?it/s]

classifying civility_jaidka: 100%|██████████| 3132/3132 [19:37<00:00,  2.66it/s] 
classifying incivility_jaidka: 100%|██████████| 3132/3132 [17:48<00:00,  2.93it/s]
classifying incivility_para1: 100%|██████████| 3132/3132 [16:08<00:00,  3.23it/s]
classifying reciprocity_jaidka: 100%|██████████| 3132/3132 [16:42<00:00,  3.12it/s] 
classifying interactivity_acknowledgement_simple_para1: 100%|██████████| 3132/3132 [16:22<00:00,  3.19it/s]
classifying political_ideology_US_para1: 100%|██████████| 3132/3132 [15:34<00:00,  3.35it/s]
classifying political_post_jaidka: 100%|██████████| 3132/3132 [15:34<00:00,  3.35it/s]
classifying political_post_para1: 100%|██████████| 3132/3132 [15:35<00:00,  3.35it/s]
classifying rationality_jaidka: 100%|██████████| 3132/3132 [15:25<00:00,  3.38it/s]
classifying rationality_simple2_para1: 100%|██████████| 3132/3132 [15:21<00:00,  3.40it/s]


In [23]:
#save output:
pd.DataFrame(predictions_L33_zero1).to_parquet('data/publicsphere/predictions_L33_zero1.parquet')

In [18]:
#zero-temperature annotation reliability benchmark for the Boukes2024 prompts:
pubspherepromptsrun_L33 = ['rationality_jaidka', 'incivility_jaidka', 'civility_jaidka', 'interactivity_acknowledgement_jaidka', 'political_post_jaidka', 'rationality_simple2_para1', 'incivility_para1', 'interactivity_acknowledgement_para1', 'political_ideology_US_para1', 'political_post_para1']
pubspherepromptsrun_L33para2 = ['rationality_simple2_para2', 'incivility_para2', 
 'interactivity_acknowledgement_para2', 'political_ideology_US_para2', 'political_post_para2']
pubspherepromptsrun_L33simpa1 = ['interactivity_acknowledgement_simpa1', 'rationality_simple2_simpa1', 'incivility_simpa1', 'political_ideology_US_simpa1', 'political_post_simpa1']


predictions_L33_zero_para2: typing.Dict[str, np.ndarray] = {
    label: (
        src.PromptClassify
        .from_json(path)
        (boukesTYT["commentText"], model=MODEL33large, options=options_zero)
    )
    for label, path in CFG.prompt_classify_files.items() if label in pubspherepromptsrun_L33para2
}

classifying incivility_para2: 100%|██████████| 3132/3132 [15:21<00:00,  3.40it/s]
classifying interactivity_acknowledgement_simple_para2: 100%|██████████| 3132/3132 [15:17<00:00,  3.41it/s]
classifying political_ideology_US_para2: 100%|██████████| 3132/3132 [15:17<00:00,  3.41it/s]
classifying political_post_para2: 100%|██████████| 3132/3132 [15:19<00:00,  3.41it/s]
classifying rationality_simple2_para2: 100%|██████████| 3132/3132 [15:18<00:00,  3.41it/s]


In [22]:
#save output:
pd.DataFrame(predictions_L33_zero_para2).to_parquet('data/publicsphere/predictions_L33_zero_para2.parquet')

In [20]:
#zero-temperature annotation reliability benchmark for the Boukes2024 prompts:
pubspherepromptsrun_L33 = ['rationality_jaidka', 'incivility_jaidka', 'civility_jaidka', 'interactivity_acknowledgement_jaidka', 'political_post_jaidka', 'rationality_simple2_para1', 'incivility_para1', 'interactivity_acknowledgement_para1', 'political_ideology_US_para1', 'political_post_para1']
pubspherepromptsrun_L33para2 = ['rationality_simple2_para2', 'incivility_para2', 
 'interactivity_acknowledgement_para2', 'political_ideology_US_para2', 'political_post_para2']
pubspherepromptsrun_L33simpa1 = ['interactivity_acknowledgement_simpa1', 'rationality_simple2_simpa1', 'incivility_simpa1', 'political_ideology_US_simpa1', 'political_post_simpa1']


predictions_L33_zero_simpa1: typing.Dict[str, np.ndarray] = {
    label: (
        src.PromptClassify
        .from_json(path)
        (boukesTYT["commentText"], model=MODEL33large, options=options_zero)
    )
    for label, path in CFG.prompt_classify_files.items() if label in pubspherepromptsrun_L33simpa1
}

classifying incivility_simpa1: 100%|██████████| 3132/3132 [15:26<00:00,  3.38it/s]
classifying interactivity_acknowledgement_simple_simpa1: 100%|██████████| 3132/3132 [15:26<00:00,  3.38it/s]
classifying political_ideology_US_simpa1: 100%|██████████| 3132/3132 [15:22<00:00,  3.40it/s]
classifying political_post_simpa1: 100%|██████████| 3132/3132 [15:29<00:00,  3.37it/s]
classifying rationality_simple2_simpa1: 100%|██████████| 3132/3132 [15:24<00:00,  3.39it/s]


In [21]:
#save output:
pd.DataFrame(predictions_L33_zero_simpa1).to_parquet('data/publicsphere/predictions_L33_zero_simpa1.parquet')

In [53]:
#why so many None-values in interactivity for Llama3.1:8b?

requests.post(
                            'https://inf.cl.uni-trier.de/',
                            json={
                                'model': MODELsmall,
                                'system': "Is this comment an acknowledgment or reply to another user's remark?\nInstructions: Assign Yes (1) if the comment expresses agreement or disagreement with a particular user's statement, typically indicated by a username or words such as ‘Yes,’ ‘No,’ or ‘I agree.’ Assign No (0) if it does not clearly acknowledge or merely insults.\\n\\nReply with only the anticipated category (0 or 1) of the request.\\n\\nText: {text}\\nCategory:",
                                'prompt': f"Is this comment an acknowledgment or reply to another user's remark?\nInstructions: Assign Yes (1) if the comment expresses agreement or disagreement with a particular user's statement, typically indicated by a username or words such as ‘Yes,’ ‘No,’ or ‘I agree.’ Assign No (0) if it does not clearly acknowledge or merely insults.\\n\\nReply with only the anticipated category (0 or 1) of the request.\\n\\nText: {boukesTYT.commentText[1]}\\nCategory:",
                                'options': options_zero
                                }).json()

{'id': '4b60481c-4d3d-11f0-ac8e-7bac359924a9',
 'timestamp': '2025-06-19T18:43:30.143155',
 'model': 'llama3.1:8b',
 'prompt': [{'role': 'system',
   'content': "Is this comment an acknowledgment or reply to another user's remark?\nInstructions: Assign Yes (1) if the comment expresses agreement or disagreement with a particular user's statement, typically indicated by a username or words such as ‘Yes,’ ‘No,’ or ‘I agree.’ Assign No (0) if it does not clearly acknowledge or merely insults.\\n\\nReply with only the anticipated category (0 or 1) of the request.\\n\\nText: {text}\\nCategory:"},
  {'role': 'user',
   'content': "Is this comment an acknowledgment or reply to another user's remark?\nInstructions: Assign Yes (1) if the comment expresses agreement or disagreement with a particular user's statement, typically indicated by a username or words such as ‘Yes,’ ‘No,’ or ‘I agree.’ Assign No (0) if it does not clearly acknowledge or merely insults.\\n\\nReply with only the anticipated

In [None]:
#model insists on giving explanations...

In [75]:
#save output:
pd.DataFrame(predictions_intra_L8b_zero_simpa1).to_parquet('data/publicsphere/predictions_intra_L8b_zero_simpa1.parquet')

In [148]:
#save output:
pd.DataFrame(predictions4b).to_parquet('data/publicsphere/predictions4b.parquet', index=False)

In [150]:
#save output:
pd.DataFrame(predictions_intra_L8b).to_parquet('data/publicsphere/predictions_intra_L8b.parquet', index=False)

In [151]:
#classify Boukes with Llama3.1:8b
pubspherepromptsrun_L8b = ['rationality_jaidka', 'incivility_jaidka',  'civility_jaidka',
 'interactivity_acknowledgement_jaidka', 'political_post', 'political_post_jaidka', 'rationality_simple2_para1', 'incivility_para1', 
 'interactivity_acknowledgement_para1', 'political_ideology_US_para1', 'political_post_para1', 'rationality_simple2_para2', 'incivility_para2', 
 'interactivity_acknowledgement_para2', 'political_ideology_US_para2', 'political_post_para2', 'interactivity_acknowledgement_simpa1', 'rationality_simple2_simpa1', 
 'incivility_simpa1', 'political_ideology_US_simpa1', 'political_post_simpa1']

predictions_L8b: typing.Dict[str, np.ndarray] = {
    label: (
        src.PromptClassify
        .from_json(path)
        (boukesTYT["commentText"], model=MODELsmall, options=options_low)
    )
    for label, path in CFG.prompt_classify_files.items() if label in pubspherepromptsrun_L8b
}

classifying civility_jaidka: 100%|██████████| 3132/3132 [07:53<00:00,  6.61it/s]
classifying incivility_jaidka: 100%|██████████| 3132/3132 [07:54<00:00,  6.60it/s]
classifying incivility_para1: 100%|██████████| 3132/3132 [15:49<00:00,  3.30it/s]
classifying incivility_para2: 100%|██████████| 3132/3132 [07:48<00:00,  6.68it/s]
classifying incivility_simpa1: 100%|██████████| 3132/3132 [07:37<00:00,  6.85it/s]
classifying reciprocity_jaidka: 100%|██████████| 3132/3132 [07:54<00:00,  6.61it/s]
classifying interactivity_acknowledgement_simple_para1: 100%|██████████| 3132/3132 [07:33<00:00,  6.91it/s]
classifying interactivity_acknowledgement_simple_para2: 100%|██████████| 3132/3132 [22:28<00:00,  2.32it/s]
classifying interactivity_acknowledgement_simple_simpa1: 100%|██████████| 3132/3132 [07:57<00:00,  6.56it/s]
classifying political_ideology_US_para1: 100%|██████████| 3132/3132 [10:13<00:00,  5.10it/s]
classifying political_ideology_US_para2: 100%|██████████| 3132/3132 [10:18<00:00,  5.07

In [152]:
#save output:
pd.DataFrame(predictions_L8b).to_parquet('data/publicsphere/predictions_L8b.parquet', index=False)

In [153]:
#run prompts of concepts annotated in Jaidka2024 for Jaidka2024 data with Llama3.3:70b:
pubspherepromptsrun_jaidka = ['rationality_simple2', 'rationality_jaidka', 'incivility_simple2', 'incivility_jaidka',  'civility_jaidka',
 'interactivity_acknowledgement_simple', 'interactivity_acknowledgement_jaidka',  'political_post', 'political_post_jaidka']

predictions_jaidka: typing.Dict[str, np.ndarray] = {
    label: (
        src.PromptClassify
        .from_json(path)
        (jaidka["message"], model=MODEL33large, options=options_zero)
    )
    for label, path in CFG.prompt_classify_files.items() if label in pubspherepromptsrun_jaidka
}

classifying civility_jaidka: 100%|██████████| 5585/5585 [27:00<00:00,  3.45it/s]
classifying incivility_jaidka: 100%|██████████| 5585/5585 [27:11<00:00,  3.42it/s]
classifying incivility_simple2: 100%|██████████| 5585/5585 [26:39<00:00,  3.49it/s]
classifying reciprocity_jaidka: 100%|██████████| 5585/5585 [26:44<00:00,  3.48it/s]
classifying interactivity_acknowledgement_simple: 100%|██████████| 5585/5585 [26:39<00:00,  3.49it/s]
classifying political_post: 100%|██████████| 5585/5585 [26:43<00:00,  3.48it/s]
classifying political_post_jaidka: 100%|██████████| 5585/5585 [26:39<00:00,  3.49it/s]
classifying rationality_jaidka: 100%|██████████| 5585/5585 [26:45<00:00,  3.48it/s]
classifying rationality_simple2: 100%|██████████| 5585/5585 [26:39<00:00,  3.49it/s]


In [196]:
#repeat for Jaidka2024 data with Llama3.1:8b, for the prompts not already classified earlier for this model:
pubspherepromptsrun_jaidka_L8b = ['incivility_simple2', 'incivility_jaidka',  'civility_jaidka',
 'interactivity_acknowledgement_simple', 'interactivity_acknowledgement_jaidka',  'political_post', 'political_post_jaidka']

predictions_jaidka_L8b: typing.Dict[str, np.ndarray] = {
    label: (
        src.PromptClassify
        .from_json(path)
        (jaidka["message"], model=MODELsmall, options=options_zero)
    )
    for label, path in CFG.prompt_classify_files.items() if label in pubspherepromptsrun_jaidka_L8b
}

classifying civility_jaidka: 100%|██████████| 5585/5585 [13:25<00:00,  6.93it/s]
classifying incivility_jaidka: 100%|██████████| 5585/5585 [13:52<00:00,  6.71it/s]
classifying incivility_simple2: 100%|██████████| 5585/5585 [13:25<00:00,  6.93it/s]
classifying reciprocity_jaidka: 100%|██████████| 5585/5585 [13:38<00:00,  6.82it/s]
classifying interactivity_acknowledgement_simple: 100%|██████████| 5585/5585 [22:20<00:00,  4.17it/s]
classifying political_post: 100%|██████████| 5585/5585 [13:28<00:00,  6.91it/s]
classifying political_post_jaidka: 100%|██████████| 5585/5585 [13:05<00:00,  7.11it/s]


In [198]:
#save output:
pd.DataFrame(predictions_jaidka_L8b).to_parquet('data/publicsphere/predictions_jaidka_L8b.parquet')

In [None]:
#note that lower performance of models on Jaidka2024 is noteworthy since these annotations are openly available and potentially part of the training data for these models, so they should perform better than on Boukes2024 data.

In [255]:
#compare to annotations of Qwen2.5:72b:
#pubspherepromptsrun_Q = ['rationality_simple2', 'incivility_simple2', 'rationality_jaidka', 'incivility_jaidka',  'civility_jaidka', 'interactivity_acknowledgement_simple',
# 'interactivity_acknowledgement_jaidka', 'political_post', 'political_post_jaidka', 'political_ideology_US', 'rationality_simple2_para1', 'incivility_para1', 
# 'interactivity_acknowledgement_para1', 'political_ideology_US_para1', 'political_post_para1', 'rationality_simple2_para2', 'incivility_para2', 
# 'interactivity_acknowledgement_para2', 'political_ideology_US_para2', 'political_post_para2', 'interactivity_acknowledgement_simpa1', 'rationality_simple2_simpa1', 
# 'incivility_simpa1', 'political_ideology_US_simpa1', 'political_post_simpa1']

pubspherepromptsrun_Q1 = ['rationality_simple2', 'incivility_simple2', 'interactivity_acknowledgement_simple', 'political_post', 'political_ideology_US']

predictions_Q72b_boukes: typing.Dict[str, np.ndarray] = {
    label: (
        src.PromptClassify
        .from_json(path)
        (boukesTYT["commentText"], model=MODELQ25_72b, options=options_zero)
    )
    for label, path in CFG.prompt_classify_files.items() if label in pubspherepromptsrun_Q1
}

classifying incivility_simple2: 100%|██████████| 3132/3132 [16:56<00:00,  3.08it/s]
classifying interactivity_acknowledgement_simple: 100%|██████████| 3132/3132 [17:03<00:00,  3.06it/s]
classifying political_ideology_US: 100%|██████████| 3132/3132 [17:01<00:00,  3.07it/s]
classifying political_post: 100%|██████████| 3132/3132 [17:09<00:00,  3.04it/s]
classifying rationality_simple2: 100%|██████████| 3132/3132 [16:54<00:00,  3.09it/s]


In [256]:
#save output:
pd.DataFrame(predictions_Q72b_boukes).to_parquet('data/publicsphere/predictions_Q72b_boukes.parquet')

In [299]:
#compare to annotations of Qwen2.5:72b:
pubspherepromptsrun_Q2 = [ 'rationality_jaidka', 'incivility_jaidka',  'civility_jaidka', 
 'interactivity_acknowledgement_jaidka', 'political_post_jaidka', 'rationality_simple2_para1', 'incivility_para1', 
 'interactivity_acknowledgement_para1', 'political_ideology_US_para1', 'political_post_para1']
pubspherepromptsrun_Q3 = ['rationality_simple2_para2', 'incivility_para2', 
 'interactivity_acknowledgement_para2', 'political_ideology_US_para2', 'political_post_para2', 'interactivity_acknowledgement_simpa1', 'rationality_simple2_simpa1', 
 'incivility_simpa1', 'political_ideology_US_simpa1', 'political_post_simpa1']


predictions_Q72b_boukes3: typing.Dict[str, np.ndarray] = {
    label: (
        src.PromptClassify
        .from_json(path)
        (boukesTYT["commentText"], model=MODELQ25_72b, options=options_zero)
    )
    for label, path in CFG.prompt_classify_files.items() if label in pubspherepromptsrun_Q3
}

classifying incivility_para2:   0%|          | 0/3132 [00:00<?, ?it/s]

classifying incivility_para2: 100%|██████████| 3132/3132 [17:10<00:00,  3.04it/s]
classifying incivility_simpa1: 100%|██████████| 3132/3132 [16:54<00:00,  3.09it/s]
classifying interactivity_acknowledgement_simple_para2: 100%|██████████| 3132/3132 [16:56<00:00,  3.08it/s]
classifying interactivity_acknowledgement_simple_simpa1: 100%|██████████| 3132/3132 [16:53<00:00,  3.09it/s]  
classifying political_ideology_US_para2: 100%|██████████| 3132/3132 [16:51<00:00,  3.10it/s]
classifying political_ideology_US_simpa1: 100%|██████████| 3132/3132 [16:47<00:00,  3.11it/s]
classifying political_post_para2: 100%|██████████| 3132/3132 [16:56<00:00,  3.08it/s]
classifying political_post_simpa1: 100%|██████████| 3132/3132 [16:57<00:00,  3.08it/s]
classifying rationality_simple2_para2: 100%|██████████| 3132/3132 [16:51<00:00,  3.10it/s]
classifying rationality_simple2_simpa1: 100%|██████████| 3132/3132 [16:48<00:00,  3.11it/s]


In [None]:
#save output:
pd.DataFrame(predictions_Q72b_boukes3).to_parquet('data/publicsphere/predictions_Q72b_boukes3.parquet')

In [326]:
#compare to annotations of Qwen2.5:72b:
pubspherepromptsrun_Q1 = ['rationality_simple2', 'incivility_simple2', 'interactivity_acknowledgement_simple', 'political_post', 'political_ideology_US']

predictions_Q72b_boukes_seed2: typing.Dict[str, np.ndarray] = {
    label: (
        src.PromptClassify
        .from_json(path)
        (boukesTYT["commentText"], model=MODELQ25_72b, options=options_zero_seed2)
    )
    for label, path in CFG.prompt_classify_files.items() if label in pubspherepromptsrun_Q1
}

classifying incivility_simple2: 100%|██████████| 3132/3132 [16:39<00:00,  3.13it/s]
classifying interactivity_acknowledgement_simple: 100%|██████████| 3132/3132 [16:45<00:00,  3.12it/s]
classifying political_ideology_US: 100%|██████████| 3132/3132 [16:42<00:00,  3.12it/s]
classifying political_post: 100%|██████████| 3132/3132 [16:50<00:00,  3.10it/s]
classifying rationality_simple2: 100%|██████████| 3132/3132 [16:44<00:00,  3.12it/s]


In [327]:
#save output:
pd.DataFrame(predictions_Q72b_boukes_seed2).to_parquet('data/publicsphere/predictions_Q72b_boukes_seed2.parquet')

In [328]:
#compare to annotations of Llama3.3:70b with zero temperature:
pubspherepromptsrun_Q1 = ['rationality_simple2', 'incivility_simple2', 'interactivity_acknowledgement_simple', 'political_post', 'political_ideology_US']

predictions_boukes_L33_70b_zero: typing.Dict[str, np.ndarray] = {
    label: (
        src.PromptClassify
        .from_json(path)
        (boukesTYT["commentText"], model=MODEL33large, options=options_zero)
    )
    for label, path in CFG.prompt_classify_files.items() if label in pubspherepromptsrun_Q1
}

classifying incivility_simple2: 100%|██████████| 3132/3132 [15:39<00:00,  3.33it/s]
classifying interactivity_acknowledgement_simple: 100%|██████████| 3132/3132 [15:20<00:00,  3.40it/s]
classifying political_ideology_US: 100%|██████████| 3132/3132 [15:14<00:00,  3.43it/s]
classifying political_post: 100%|██████████| 3132/3132 [15:15<00:00,  3.42it/s]
classifying rationality_simple2: 100%|██████████| 3132/3132 [15:14<00:00,  3.42it/s]


In [13]:
#join to the dataset:   
for _, preds in predictions_intra_L8b.items():
    print(preds.value_counts())
    print("-" * 42)
    dataset_w_pred_2 = dataset_w_pred_2.join(preds, rsuffix='_L31_8b_zero')
    dataset_w_pred_anon = dataset_w_pred_anon.join(preds, rsuffix='_L31_8b_zero')

incivility_jaidka
No     2061
Yes    1056
Name: count, dtype: int64
------------------------------------------
rationality_jaidka
Yes    2212
No      536
Name: count, dtype: int64
------------------------------------------


In [329]:
#save output:
pd.DataFrame(predictions_boukes_L33_70b_zero).to_parquet('data/publicsphere/predictions_boukes_L33_70b_zero.parquet')

In [330]:
#compare to annotations of Llama3.3:70b with zero temperature and different seed:
pubspherepromptsrun_Q1 = ['rationality_simple2', 'incivility_simple2', 'interactivity_acknowledgement_simple', 'political_post', 'political_ideology_US']

predictions_boukes_L33_70b_zero_seed2: typing.Dict[str, np.ndarray] = {
    label: (
        src.PromptClassify
        .from_json(path)
        (boukesTYT["commentText"], model=MODEL33large, options=options_zero_seed2)
    )
    for label, path in CFG.prompt_classify_files.items() if label in pubspherepromptsrun_Q1
}

classifying incivility_simple2: 100%|██████████| 3132/3132 [15:28<00:00,  3.37it/s]
classifying interactivity_acknowledgement_simple: 100%|██████████| 3132/3132 [15:21<00:00,  3.40it/s]
classifying political_ideology_US: 100%|██████████| 3132/3132 [15:21<00:00,  3.40it/s]
classifying political_post: 100%|██████████| 3132/3132 [15:20<00:00,  3.40it/s]
classifying rationality_simple2: 100%|██████████| 3132/3132 [15:16<00:00,  3.42it/s]


In [331]:
#save output:
pd.DataFrame(predictions_boukes_L33_70b_zero_seed2).to_parquet('data/publicsphere/predictions_boukes_L33_70b_zero_seed2.parquet')

In [332]:
#compare to annotations of Llama3.1:8b with zero temperature:
pubspherepromptsrun_Q1 = ['rationality_simple2', 'incivility_simple2', 'interactivity_acknowledgement_simple', 'political_post', 'political_ideology_US']

predictions_boukes_L31_8b_zero: typing.Dict[str, np.ndarray] = {
    label: (
        src.PromptClassify
        .from_json(path)
        (boukesTYT["commentText"], model=MODELsmall, options=options_zero)
    )
    for label, path in CFG.prompt_classify_files.items() if label in pubspherepromptsrun_Q1
}

classifying incivility_simple2: 100%|██████████| 3132/3132 [07:56<00:00,  6.57it/s]
classifying interactivity_acknowledgement_simple: 100%|██████████| 3132/3132 [18:04<00:00,  2.89it/s]
classifying political_ideology_US: 100%|██████████| 3132/3132 [08:24<00:00,  6.20it/s]
classifying political_post: 100%|██████████| 3132/3132 [07:53<00:00,  6.61it/s]
classifying rationality_simple2: 100%|██████████| 3132/3132 [07:45<00:00,  6.72it/s]


In [333]:
#save output:
pd.DataFrame(predictions_boukes_L31_8b_zero).to_parquet('data/publicsphere/predictions_boukes_L31_8b_zero.parquet')

In [334]:
#compare to annotations of Llama3.1:8b with zero temperature and different seed:
pubspherepromptsrun_Q1 = ['rationality_simple2', 'incivility_simple2', 'interactivity_acknowledgement_simple', 'political_post', 'political_ideology_US']

predictions_boukes_L31_8b_zero_seed2: typing.Dict[str, np.ndarray] = {
    label: (
        src.PromptClassify
        .from_json(path)
        (boukesTYT["commentText"], model=MODELsmall, options=options_zero_seed2)
    )
    for label, path in CFG.prompt_classify_files.items() if label in pubspherepromptsrun_Q1
}

classifying incivility_simple2: 100%|██████████| 3132/3132 [07:46<00:00,  6.72it/s]
classifying interactivity_acknowledgement_simple: 100%|██████████| 3132/3132 [17:22<00:00,  3.00it/s]
classifying political_ideology_US: 100%|██████████| 3132/3132 [08:27<00:00,  6.18it/s]
classifying political_post: 100%|██████████| 3132/3132 [07:53<00:00,  6.62it/s]
classifying rationality_simple2: 100%|██████████| 3132/3132 [07:46<00:00,  6.71it/s]


In [335]:
#save output:
pd.DataFrame(predictions_boukes_L31_8b_zero_seed2).to_parquet('data/publicsphere/predictions_boukes_L31_8b_zero_seed2.parquet')

In [336]:
#compare to annotations of Qwen2.5:72b with low temperature:
pubspherepromptsrun_Q1 = ['rationality_simple2', 'incivility_simple2', 'interactivity_acknowledgement_simple', 'political_post', 'political_ideology_US']

predictions_Q72b_boukes_low: typing.Dict[str, np.ndarray] = {
    label: (
        src.PromptClassify
        .from_json(path)
        (boukesTYT["commentText"], model=MODELQ25_72b, options=options_low)
    )
    for label, path in CFG.prompt_classify_files.items() if label in pubspherepromptsrun_Q1
}

classifying incivility_simple2: 100%|██████████| 3132/3132 [17:03<00:00,  3.06it/s]
classifying interactivity_acknowledgement_simple: 100%|██████████| 3132/3132 [16:56<00:00,  3.08it/s]
classifying political_ideology_US: 100%|██████████| 3132/3132 [16:54<00:00,  3.09it/s]
classifying political_post: 100%|██████████| 3132/3132 [16:58<00:00,  3.08it/s]
classifying rationality_simple2: 100%|██████████| 3132/3132 [16:56<00:00,  3.08it/s]


In [337]:
#save output:
pd.DataFrame(predictions_Q72b_boukes_low).to_parquet('data/publicsphere/predictions_Q72b_boukes_low.parquet')

In [338]:
#compare to annotations of Qwen2.5:72b with low temperature and different seed:
pubspherepromptsrun_Q1 = ['rationality_simple2', 'incivility_simple2', 'interactivity_acknowledgement_simple', 'political_post', 'political_ideology_US']

predictions_Q72b_boukes_low_seed2: typing.Dict[str, np.ndarray] = {
    label: (
        src.PromptClassify
        .from_json(path)
        (boukesTYT["commentText"], model=MODELQ25_72b, options=options_low_seed2)
    )
    for label, path in CFG.prompt_classify_files.items() if label in pubspherepromptsrun_Q1
}

classifying incivility_simple2: 100%|██████████| 3132/3132 [16:55<00:00,  3.08it/s]
classifying interactivity_acknowledgement_simple: 100%|██████████| 3132/3132 [16:57<00:00,  3.08it/s]
classifying political_ideology_US: 100%|██████████| 3132/3132 [16:55<00:00,  3.09it/s]
classifying political_post: 100%|██████████| 3132/3132 [17:03<00:00,  3.06it/s]
classifying rationality_simple2: 100%|██████████| 3132/3132 [16:59<00:00,  3.07it/s]


In [339]:
#save output:
pd.DataFrame(predictions_Q72b_boukes_low_seed2).to_parquet('data/publicsphere/predictions_Q72b_boukes_low_seed2.parquet')

In [264]:
#save output:
pd.DataFrame(predictions_Q72b_boukes2).to_parquet('data/publicsphere/predictions_Q72b_boukes2.parquet')

In [None]:

requests.post(
                            'https://inf.cl.uni-trier.de/',
                            json={
                                'model': MODELDSR1_7b,
                                'system': "Does this comment provide rational analysis?\nInstructions: Code Yes (1) if the comment includes:\nContext or background,\nEvidence (facts, sources, authorities),\nReasoning or structured argument.\nCode No (0) if these are absent\n\\n\\nRespond with only the predicted class (0 or 1) of the request.\\n\\nText: {text}\\nClass:",
                                'prompt': f"Does this comment provide rational analysis?\nInstructions: Code Yes (1) if the comment includes:\nContext or background,\nEvidence (facts, sources, authorities),\nReasoning or structured argument.\nCode No (0) if these are absent\n\\n\\nRespond with only the predicted class (0 or 1) of the request.\\n\\nText: {boukesTYT["commentText"][0]}\\nClass:",
                                'options': options_DS_zero
                                }).json()

{'id': 'ff32c61e-49b7-11f0-ac8e-7bac359924a9',
 'timestamp': '2025-06-15T07:11:45.784760',
 'model': 'deepseek-r1:7b',
 'prompt': [{'role': 'system',
   'content': 'Does this comment provide rational analysis?\nInstructions: Code Yes (1) if the comment includes:\nContext or background,\nEvidence (facts, sources, authorities),\nReasoning or structured argument.\nCode No (0) if these are absent\n\\n\\nRespond with only the predicted class (0 or 1) of the request.\\n\\nText: {text}\\nClass:'},
  {'role': 'user',
   'content': 'Does this comment provide rational analysis?\nInstructions: Code Yes (1) if the comment includes:\nContext or background,\nEvidence (facts, sources, authorities),\nReasoning or structured argument.\nCode No (0) if these are absent\n\\n\\nRespond with only the predicted class (0 or 1) of the request.\\n\\nText: sad\\nClass:'}],
 'response': '<think>\nOkay, so I need to figure out whether the comment "sad" provides a rational analysis based on the given instructions. 

In [None]:
#deepseek-r1:7b only returns missing values, since it insists on yielding long explanations

In [242]:
pubspherepromptsrun_DStest = ['rationality_simple2', 'incivility_simple2']

def load_json(path: str):
    with open(path, encoding='utf-8') as fp:
        return json.load(fp)

results = []

for label, path in CFG.prompt_classify_files.items():
    if label in pubspherepromptsrun_DStest: 
        template = load_json(path).get('template')
        for index, row in tqdm.tqdm(boukesTYT[:2].iterrows()):
            try:
                response = requests.post(
                    'https://inf.cl.uni-trier.de/',
                    json={
                        'model': MODELDSR1_7b,
                        'system': template,
                        'prompt': 'do not print thinking steps, only class labels'+template.format(text=row['commentText']),
                        'options': options_DS_zero
                    }
                ).json()
                # Extract the annotation
                annotation = response['response'].split('<think>')[-1].split('</think>')[-1].strip().split('\n')[-1].strip()
                results.append({
                    'Mark_ID': row['Mark_ID'],
                    'label': label,
                    'annotation': annotation
                })
            except Exception as e:
                print(f"Error for Mark_ID {row['Mark_ID']} and label {label}: {e}")

# Convert to DataFrame and pivot
df_results = pd.DataFrame(results)
test = df_results.pivot(index='Mark_ID', columns='label', values='annotation').reset_index()

# Now df_pivot has one row per Mark_ID and one column per label
test.head()


2it [00:08,  4.43s/it]
2it [00:06,  3.35s/it]


label,Mark_ID,incivility_simple2,rationality_simple2
0,119,Class: 0,Class: 0
1,282,Class: 1,Class: 0


This is not workable, since results still contain non-parseable output tokens, default back to qwen2.5

In [324]:
#compare to annotations of Qwen2.5:72b for Jaidka2024 data:
jaidkapromptsrun_Q72b2 = ['rationality_simple2']
#, 'rationality_jaidka', 'incivility_simple2', 'incivility_jaidka',  'civility_jaidka',
# 'interactivity_acknowledgement_simple', 'interactivity_acknowledgement_jaidka',  'political_post', 'political_post_jaidka']

predictions_Q72b_jaidka2: typing.Dict[str, np.ndarray] = {
    label: (
        src.PromptClassify
        .from_json(path)
        (jaidka["message"], model=MODELQ25_72b, options=options_zero)
    )
    for label, path in CFG.prompt_classify_files.items() if label in jaidkapromptsrun_Q72b2
}

classifying rationality_simple2: 100%|██████████| 5585/5585 [29:04<00:00,  3.20it/s]


In [325]:
#save output:
pd.DataFrame(predictions_Q72b_jaidka2).to_parquet('data/publicsphere/predictions_Q72b_jaidka2.parquet', index=False)

In [314]:
#open data:
Q72b_jaidka = pd.read_parquet('data/publicsphere/predictions_Q72b_jaidka.parquet')

In [315]:
Q72b_jaidka.tail()

Unnamed: 0,civility_jaidka,incivility_jaidka,incivility_simple2,interactivity_acknowledgement_jaidka,interactivity_acknowledgement_simple,political_post,political_post_jaidka,rationality_jaidka,rationality_simple2
5580,Yes,No,No,No,Yes,political,Yes,No,
5581,No,No,No,No,Yes,political,Yes,Yes,
5582,No,Yes,Yes,No,No,political,Yes,No,
5583,No,No,No,No,Yes,political,Yes,No,
5584,No,No,No,No,Yes,political,Yes,Yes,


In [276]:
pubspherepromptsrun_MH = ['political_ideology_US', 'political_ideology_US_para1']

predictions_MH_L33_70b: typing.Dict[str, np.ndarray] = {
    label: (
        src.PromptClassify
        .from_json(path)
        (MHclemm['text'], model=MODEL33large, options=options_zero)
    )
    for label, path in CFG.prompt_classify_files.items() if label in pubspherepromptsrun_MH
}

classifying political_ideology_US: 100%|██████████| 635/635 [03:04<00:00,  3.44it/s]
classifying political_ideology_US_para1: 100%|██████████| 635/635 [03:04<00:00,  3.43it/s]


In [342]:
#join to the dataset:   
for _, preds in predictions_Q72b_jaidka2.items():
    print(preds.value_counts())
    print("-" * 42)
    jaidka = jaidka.join(preds, rsuffix='_Q25_72b_zero')

rationality_simple2
No     5461
Yes     124
Name: count, dtype: int64
------------------------------------------


In [293]:
#join to the dataset:   
for _, preds in predictions_MH_Q72b.items():
    print(preds.value_counts())
    print("-" * 42)
    MHclemm = MHclemm.join(preds, rsuffix='_Q25_72b')

political_ideology_US
neutral         308
liberal         199
conservative    128
Name: count, dtype: int64
------------------------------------------
political_ideology_US_para1
neutral         333
liberal         184
conservative    117
Name: count, dtype: int64
------------------------------------------


In [291]:
#need to make sure all new columns have the proper suffix:
MHclemm = MHclemm.rename(columns={
  'political_ideology_US': 'political_ideology_US_L31_8b',
 'political_ideology_US_para1': 'political_ideology_US_para1_L31_8b',
 })

In [None]:
#save new annotations:
MHclemm.to_parquet('data/MH_BClemm_data/Ideo_Val_GPT_USA_L33_70b.parquet', index=False)

In [None]:
#L31:8b zero temperature might have a missing value problem


In [27]:
#join to the dataset:   
for _, preds in predictions_L33_zero_simpa1.items():
    print(preds.value_counts())
    print("-" * 42)
    dataset_w_pred_2 = dataset_w_pred_2.join(preds, rsuffix='_L33_70b_zero')
    dataset_w_pred_anon = dataset_w_pred_anon.join(preds, rsuffix='_L31_8b_zero')

incivility_simpa1
Yes    1975
No     1157
Name: count, dtype: int64
------------------------------------------
interactivity_acknowledgement_simple_simpa1
No     2105
Yes    1027
Name: count, dtype: int64
------------------------------------------
political_ideology_US_simpa1
neutral         1519
liberal          932
conservative     681
Name: count, dtype: int64
------------------------------------------
political_post_simpa1
political        2067
non-political    1064
Name: count, dtype: int64
------------------------------------------
rationality_simple2_simpa1
No     2654
Yes     478
Name: count, dtype: int64
------------------------------------------


In [344]:
#need to make sure all new columns have the proper suffix:
jaidka = jaidka.rename(columns={
 'rationality_simple2':'rationality_simple2_Q25_72b_zero'
 })

In [346]:
#save the results:
jaidka.to_parquet('data/jaidka2022/TwitterDeliberativePolitics2.parquet', index=False)

In [28]:
dataset_w_pred_anon

Unnamed: 0,StartDate,RecordedDate,IPAddress,Finished,Coder,ID,Mark_ID,Genre,topiccode,Platform,...,incivility_para2,interactivity_acknowledgement_simple_para2,political_ideology_US_para2,political_post_para2,rationality_simple2_para2,incivility_simpa1,interactivity_acknowledgement_simple_simpa1,political_ideology_US_simpa1,political_post_simpa1,rationality_simple2_simpa1
0,5/30/2021 13:03:17,5/30/2021 13:04:17,62.194.51.29,1,6,UgyPHwv8G0cDE6-wEgl4AaABAg.8_0ZjJKSJty8_0kXGkAd2U,119,0,0,1,...,No,No,neutral,non-political,No,No,No,neutral,non-political,No
1,10/11/2021 10:34:05,10/11/2021 10:36:46,213.127.109.191,1,6,Ugx2WXq9UdV8mPPjejJ4AaABAg.8yHCKV0Boe58yYRxEQEF45,282,1,2,1,...,Yes,No,neutral,political,No,Yes,No,neutral,political,No
2,9/9/2021 18:49:48,9/9/2021 18:51:32,213.127.110.0,1,6,1110578710648890000,372,2,4,2,...,,,,,,,,,,
3,6/6/2021 16:12:46,6/6/2021 16:16:16,213.127.76.145,1,6,UgwUPFScjJ0MCeaP2F54AaABAg.8lvp3fc9Euf8lvvgsUgEgV,769,0,0,1,...,No,Yes,neutral,political,Yes,No,Yes,neutral,political,Yes
4,6/13/2021 13:25:49,6/13/2021 13:27:28,213.127.82.232,1,6,UgwWKCWtSJdFvjGHvTp4AaABAg.8kUC5dGrQ2H8kUDRihE2f3,1206,0,0,1,...,No,No,neutral,non-political,No,No,No,neutral,non-political,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3857,8/19/2021 14:50:13,8/19/2021 14:54:28,62.194.51.29,1,6,1152219467579100000,10000695,0,4,2,...,,,,,,,,,,
3858,8/19/2021 15:10:27,8/19/2021 15:12:21,62.194.51.29,1,6,1085362296472430000,10007008,1,4,2,...,,,,,,,,,,
3859,10/6/2021 16:08:39,10/6/2021 16:10:42,213.127.113.113,1,6,UghFY3QJ6nmT_ngCoAEC.7-H0Z7--wxd8goqpaPs-bl,20000102,0,3,1,...,No,Yes,neutral,non-political,Yes,No,Yes,neutral,non-political,Yes
3860,10/15/2021 18:30:04,10/15/2021 18:35:40,213.127.109.191,1,6,UgyWabsmmnq3zam4DgZ4AaABAg,20000418,2,3,1,...,No,No,neutral,political,No,No,No,neutral,political,No


In [31]:
dataset_w_pred_anon.columns.to_list()

['StartDate',
 'RecordedDate',
 'IPAddress',
 'Finished',
 'Coder',
 'ID',
 'Mark_ID',
 'Genre',
 'topiccode',
 'Platform',
 'Anonymity',
 'Anonymity_9_TEXT',
 'codable',
 'Interaction',
 'Acknowledgement',
 'TopicRelevance',
 'Reasoning',
 'BackgroundInfo',
 'ExternalEvidence',
 'ExternalEvidence_1_TEXT',
 'Opinion',
 'disagreement',
 'Ideologicaldirection',
 'Name_calling',
 'Vulgarity',
 'Attack_reputation',
 'Question_Intelligenc',
 'All_caps_function',
 'Sarcasm_to_criticize',
 'Individual_right',
 'discrimination',
 'Invoke_violence',
 'Tone',
 'INTERACTIVITY_DUMMY',
 'RATIONALITY_DUMMY',
 'HAS_OPINION_DUMMY',
 'LIBERAL_NEUTRAL_CONSERVATIVE',
 'LIBERAL_DUMMY',
 'CONSERVATIVE_DUMMY',
 'NAMECALLING_DUMMY',
 'VULGAR_DUMMY',
 'NAMECALLING_VULGAR_DUMMY',
 'INCIVILITY_ORDINAL',
 'INCIVILITY_DUMMY',
 'INTOLERANCE_DUMMY',
 'filter_$',
 'IMPOLITENESS_DUMMY',
 'showName',
 'genre',
 'Time_comment',
 'likeCount_comment',
 'entities',
 'place',
 'retweet_count',
 'platform',
 'retweeted',
 '

In [30]:
#need to make sure all new columns have the proper suffix:
#rename the columns ['civility_jaidka', 'incivility_jaidka', 'rationality_jaidka']:
dataset_w_pred_2 = dataset_w_pred_2.rename(columns={
 'civility_jaidka': 'civility_jaidka_L33_70b_zero',
 'incivility_jaidka': 'incivility_jaidka_L33_70b_zero',
 'incivility_para1': 'incivility_para1_L33_70b_zero',
 'reciprocity_jaidka': 'reciprocity_jaidka_L33_70b_zero',
 'interactivity_acknowledgement_simple_para1': 'interactivity_acknowledgement_simple_para1_L33_70b_zero',
 'political_ideology_US_para1': 'political_ideology_US_para1_L33_70b_zero',
 'political_post_jaidka': 'political_post_jaidka_L33_70b_zero',
 'political_post_para1': 'political_post_para1_L33_70b_zero',
 'rationality_jaidka': 'rationality_jaidka_L33_70b_zero',
 'rationality_simple2_para1': 'rationality_simple2_para1_L33_70b_zero',
 'incivility_para2': 'incivility_para2_L33_70b_zero',
 'interactivity_acknowledgement_simple_para2': 'interactivity_acknowledgement_simple_para2_L33_70b_zero',
 'political_ideology_US_para2': 'political_ideology_US_para2_L33_70b_zero',
 'political_post_para2': 'political_post_para2_L33_70b_zero',
 'rationality_simple2_para2': 'rationality_simple2_para2_L33_70b_zero',
 'incivility_simpa1': 'incivility_simpa1_L33_70b_zero',
 'interactivity_acknowledgement_simple_simpa1': 'interactivity_acknowledgement_simple_simpa1_L33_70b_zero',
 'political_ideology_US_simpa1': 'political_ideology_US_simpa1_L33_70b_zero',
 'political_post_simpa1': 'political_post_simpa1_L33_70b_zero',
 'rationality_simple2_simpa1': 'rationality_simple2_simpa1_L33_70b_zero'
 })
dataset_w_pred_anon = dataset_w_pred_anon.rename(columns={
 'civility_jaidka': 'civility_jaidka_L33_70b_zero',
 'incivility_jaidka': 'incivility_jaidka_L33_70b_zero',
 'incivility_para1': 'incivility_para1_L33_70b_zero',
 'reciprocity_jaidka': 'reciprocity_jaidka_L33_70b_zero',
 'interactivity_acknowledgement_simple_para1': 'interactivity_acknowledgement_simple_para1_L33_70b_zero',
 'political_ideology_US_para1': 'political_ideology_US_para1_L33_70b_zero',
 'political_post_jaidka': 'political_post_jaidka_L33_70b_zero',
 'political_post_para1': 'political_post_para1_L33_70b_zero',
 'rationality_jaidka': 'rationality_jaidka_L33_70b_zero',
 'rationality_simple2_para1': 'rationality_simple2_para1_L33_70b_zero',
 'incivility_para2': 'incivility_para2_L33_70b_zero',
 'interactivity_acknowledgement_simple_para2': 'interactivity_acknowledgement_simple_para2_L33_70b_zero',
 'political_ideology_US_para2': 'political_ideology_US_para2_L33_70b_zero',
 'political_post_para2': 'political_post_para2_L33_70b_zero',
 'rationality_simple2_para2': 'rationality_simple2_para2_L33_70b_zero',
 'incivility_simpa1': 'incivility_simpa1_L33_70b_zero',
 'interactivity_acknowledgement_simple_simpa1': 'interactivity_acknowledgement_simple_simpa1_L33_70b_zero',
 'political_ideology_US_simpa1': 'political_ideology_US_simpa1_L33_70b_zero',
 'political_post_simpa1': 'political_post_simpa1_L33_70b_zero',
 'rationality_simple2_simpa1': 'rationality_simple2_simpa1_L33_70b_zero'
})

In [32]:
#save the results:
dataset_w_pred_2.to_json(f'{CFG.report_dir}/publicsphere.cardiff_prompt_classify_s.json', orient='records', force_ascii=False, indent=4)
dataset_w_pred_anon.to_parquet('data/publicsphere/publicsphere.cardiff_prompt_classify_anon.parquet', index=False)

In [78]:
dataset_w_pred_anon.loc[:, 'rationality_simple2_L33_70b_dum'] = dataset_w_pred_anon.loc[:, 'rationality_simple2_L33_70b'].map({"Yes": 1, "No":0}).fillna(0).astype(int)
dataset_w_pred_anon.loc[:, 'rationality_jaidka_L33_70b_dum'] = dataset_w_pred_anon.loc[:, 'rationality_jaidka_L33_70b'].map({"Yes": 1, "No":0}).fillna(0).astype(int)
dataset_w_pred_anon.loc[:, 'incivility_simple2_L33_70b_dum'] = dataset_w_pred_anon.loc[:, 'incivility_simple2_L33_70b'].map({"Yes": 1, "No":0}).fillna(0).astype(int)
dataset_w_pred_anon.loc[:, 'incivility_jaidka_L33_70b_dum'] = dataset_w_pred_anon.loc[:, 'incivility_jaidka_L33_70b'].map({"Yes": 1, "No":0}).fillna(0).astype(int)
dataset_w_pred_anon.loc[:, 'political_post_jaidka_L33_70b_dum'] = dataset_w_pred_anon.loc[:, 'political_post_jaidka_L33_70b'].map({"Yes": 1, "No":0}).fillna(0).astype(int)
dataset_w_pred_anon.loc[:, 'political_post_L33_70b_dum'] = dataset_w_pred_anon.loc[:, 'political_post_L33_70b'].map({"political": 1, "non-political":0}).fillna(0).astype(int)


In [96]:
#feasability check:
#do annotations of Llama3.3:70b correlate with gpt4o?
dataset_w_pred_anon.loc[:, ['rationality_simple2_L33_70b_dum', 'rationality_jaidka_L33_70b_dum', 'rationality_simple2_small_dum', 'rationality_simple2_gpt4o_dum', 'rationality_simple_dum', 'RATIONALITY_DUMMY']] \
    .corr(method='pearson').round(2)

Unnamed: 0,rationality_simple2_L33_70b_dum,rationality_jaidka_L33_70b_dum,rationality_simple2_small_dum,rationality_simple2_gpt4o_dum,rationality_simple_dum,RATIONALITY_DUMMY
rationality_simple2_L33_70b_dum,1.0,0.29,0.46,0.39,0.71,0.41
rationality_jaidka_L33_70b_dum,0.29,1.0,0.12,0.11,0.31,0.3
rationality_simple2_small_dum,0.46,0.12,1.0,0.54,0.42,0.25
rationality_simple2_gpt4o_dum,0.39,0.11,0.54,1.0,0.32,0.25
rationality_simple_dum,0.71,0.31,0.42,0.32,1.0,0.33
RATIONALITY_DUMMY,0.41,0.3,0.25,0.25,0.33,1.0


improved performance of L33_70b compared to L31_70b (default), and the two models also show the highest overlap

In [80]:
#and in crosstabulations:
pd.crosstab(dataset_w_pred_anon['RATIONALITY_DUMMY'], [dataset_w_pred_anon['rationality_simple2_L33_70b_dum'], dataset_w_pred_anon['rationality_simple2_small_dum']], margins=True, margins_name='Total')

rationality_simple2_L33_70b_dum,0,0,1,1,Total
rationality_simple2_small_dum,0,1,0,1,Unnamed: 5_level_1
RATIONALITY_DUMMY,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
0,2930,14,181,40,3165
1,394,1,202,100,697
Total,3324,15,383,140,3862


#L33_70b and small share 100+2930=3030 correct classifications (78%) and share 40+394=434 errors (11%) and differ on 14+1+181+202=398 errors (10%)-> they differ on 48% of errors
#L33_70b makes 2930+14+202+100=3246 correct classifications
and 394+1+181+40=616 errors = 16%
#small makes 2930+181+1+100=3212 correct classifications
and 394+14+202+40=650 errors = 17%
#we would thus expect 0.17*0.16 = only 3% overlap between errors if the models were random -> they thus do a lot better than that


In [82]:
#and in crosstabulations:
pd.crosstab(dataset_w_pred_anon['RATIONALITY_DUMMY'], [dataset_w_pred_anon['rationality_simple2_L33_70b_dum'], dataset_w_pred_anon['rationality_simple2_gpt4o_dum']], margins=True, margins_name='Total')

rationality_simple2_L33_70b_dum,0,1,1,Total
rationality_simple2_gpt4o_dum,0,0,1,Unnamed: 4_level_1
RATIONALITY_DUMMY,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
0,2944,205,16,3165
1,395,230,72,697
Total,3339,435,88,3862


#L33_70b and gpt4o share only 16 errors (0%) and differ on 205+230=435 errors (11%) -> they differ on 96% of errors
#L33_70b and gpt4o correctly classify 2944+72=3016 (78%)
#so L33_70b shares the same share of correct classifications in combination with small and gpt4o, but errors overlap much more with small than with gpt4o, which makes sense, overlap between errors of L33_70b and gpt4o is equal to chance.
#this indicates that these two models don't agree on which manual coding are actually coding errors -> together they only mark 16 comments as potentially wrong coded even though they haven't seen our annotations in their training data, so should be independently judging the rationality of the comments.

In [76]:
#do annotations of Llama3.3:70b correlate with gpt4o?
dataset_w_pred_anon.loc[:, ['incivility_simple2_L33_70b_dum', 'incivility_jaidka_L33_70b_dum', 'incivility_simple2_small_dum', 'incivility_simple2_gpt4o_dum', 'INCIVILITY_DUMMY']] \
    .corr(method='pearson').round(2)

Unnamed: 0,incivility_simple2_L33_70b_dum,incivility_jaidka_L33_70b_dum,incivility_simple2_small_dum,incivility_simple2_gpt4o_dum,INCIVILITY_DUMMY
incivility_simple2_L33_70b_dum,1.0,0.76,0.49,0.58,0.54
incivility_jaidka_L33_70b_dum,0.76,1.0,0.53,0.63,0.51
incivility_simple2_small_dum,0.49,0.53,1.0,0.68,0.48
incivility_simple2_gpt4o_dum,0.58,0.63,0.68,1.0,0.55
INCIVILITY_DUMMY,0.54,0.51,0.48,0.55,1.0


In [79]:
#do annotations of Llama3.3:70b correlate with gpt4o?
dataset_w_pred_anon.loc[:, ['political_post_L33_70b_dum', 'political_post_jaidka_L33_70b_dum', 'TopicRelevance']] \
    .corr(method='pearson').round(2)

Unnamed: 0,political_post_L33_70b_dum,political_post_jaidka_L33_70b_dum,TopicRelevance
political_post_L33_70b_dum,1.0,0.49,0.58
political_post_jaidka_L33_70b_dum,0.49,1.0,0.38
TopicRelevance,0.58,0.38,1.0


In [None]:
#what is the infuence of temperature on the results of intraprompt annotation reliability?

#Note the logic of our comparisons:
#we compare the results of the same prompt with different models, and different seeds, to see if the model and/or seed influences the results.
#we use similar options per model, but the options are not the same for all models, they differ in temperature and seed (since the same seed might mean something different for different models).
#but since we compare intraprompt annotation reliability for the same prompt with different seeds, the difference in temperature is not a problem, the annotation might differ per output of the model for that seed/temperature, but the difference with another seed should be minimal.
#anyway we can test the origins of intraprompt reliability by comparing the result of the same prompt with the same seed and low temperature, different seed and low temperature and same seed and zero temperature
#if it turns out that temperature does have a larger influence, but still the influence of different models or prompts is larger, we can still conclude that the model and prompt are more important than the temperature.
#stronger still a higher temperature intraprompt benchmark is harder to beat especially for the simpa-prompts
#for the between model comparisons temperature per model should not be a problem, since it will only vary the result of the model, not the comparison between models, only downside of low temperature is potential slightly lower reproducibility of the exact results and potetially slightly lower performance due to less creativity, but Barry ea 2025 does not seem to suggest this is the case for such low temperatures.

In [None]:
#use GPT4o to compile paraphrased prompts::
def load_json(path: str):
    with open(path, encoding='utf-8') as fp:
        return json.load(fp)
    

#pubspherepromptsrunall = ['rationality_simple2', 'rationality_jaidka', 'incivility_simple2', 'incivility_jaidka',  'civility_jaidka', 'interactivity_simple2', 'interactivity_acknowledgement_jaidka', 'political_ideology_US', 'political_post', 'political_post_jaidka' ]  

pubsphereparaphraserun1 = ['rationality_simple2', 'incivility_simple2', 'interactivity_acknowledgement_simple2', 'political_ideology_US', 'political_post'] 
pubsphereparaphraserun2 = ['interactivity_acknowledgement_simple']


chunked_result: typing.List[pd.DataFrame] = []

for label, path in CFG.prompt_classify_files.items():
    if label in pubsphereparaphraserun2: 
        template = load_json(path).get('template')
        classes = load_json(path).get('classes')
        retry_count = 0
        max_retries = 5
        while retry_count < max_retries:
            try: 
                response = requests.post(
                        url=api_endpoint,
                        headers=headers,
                        json={
                            'model': MODELgpt4o,
                            'messages': [
                                {
                                    "role": "system",
                                    "content": 'You restate a prompt in different words while preserving its meaning and formatting. The prompt is: "{}"'
                                },
                                {
                                    "role": "user",
                                    "content": 'You restate a prompt in different words while preserving its meaning and formatting. The prompt is: "{}"'.format(template)
                                }
                            ],
                            'temperature': 0.7,
                            'top_p': 0.8,  
                            'seed': SEED43
                        }
                    )  
                if response.status_code == 200:
                    data_response = response.json()
                    chunked_result.append(
                        pd.DataFrame(
                            data=[[label, SEED43, data_response["choices"][0]["message"]["content"]]],                                
                            columns=['prompt_label', 'seed', 'paraphrased_prompt']
                        )
                    )
                    break  # Exit retry loop for this label
                elif response.status_code == 429:
                    retry_count += 1
                    wait_time = 20
                    print(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
                    time.sleep(wait_time)
                elif response.status_code == 500:
                    retry_count += 1
                    wait_time = 20
                    print(f"Failed to connect to API. Status code: {response.status_code}. Retrying in {wait_time} seconds...")
                    time.sleep(wait_time)
                else:
                    print(f"Failed to connect to API. Status code: {response.status_code}")
                    print(response.text)
                    # Optionally append a row with error info
                    chunked_result.append(
                        pd.DataFrame(
                            data=[[label, f"ERROR: {response.status_code}"]],
                            columns=['prompt_label', 'paraphrased_prompt']
                        )
                    )
                    break
            except requests.exceptions.RequestException as e:   
                print(f"Failed to connect to API: {e}")
                retry_count += 1
                wait_time = 60
                print(f"Retrying in {wait_time} seconds...")
                time.sleep(wait_time)
        else:
            # If all retries failed, append a row with error info
            chunked_result.append(
                pd.DataFrame(
                    data=[[label, "ERROR: Max retries exceeded"]],
                    columns=['prompt_label', 'paraphrased_prompt']
                )
            )

para1 = pd.concat(chunked_result, ignore_index=True)
print(para1)


                           prompt_label  seed  \
0  interactivity_acknowledgement_simple    43   

                                                                                    paraphrased_prompt  
0  Determine if this remark recognizes or replies to a different user's remark.\nGuidelines: Mark Y...  


This rewording appears as dissimilar as the Jaidka:
-Boukes: "Does this comment provide rational analysis?\nInstructions: Code Yes (1) if the comment includes:\nContext or background,\nEvidence (facts, sources, authorities),\nReasoning or structured argument.\nCode No (0) if these are absent\n\\n\\nRespond with only the predicted class (0 or 1) of the request.\\n\\nText: {text}\\nClass:",
   
-Jaidka: "This tweet is a reply on Twitter (i.e., a Tweet) to a United States member of the Congress. Please classify this tweet according to whether it has a justification. Read the tweet. Determine which category best describes the tweet. Code YES (1): If this tweet contains personal feelings or experiences. Also code YES (1) If this tweet contains facts, links or evidence from other sources. Code NO (0) If this tweet does not offer a justification. \n\\n\\nRespond with only the predicted class (0 or 1) of the request.\\n\\nText: {text}\\nClass:",

-GPT4o_temp0.7_topp0.8_seed1: 'Evaluate whether the comment offers logical examination. \n\nGuidelines: Assign Yes (1) if the comment contains:\n- Context or background information,\n- Supporting evidence (facts, references, experts),\n- Logical reasoning or organized argumentation.\n\nAssign No (0) if these elements are missing.\n\nReply with just the anticipated category (0 or 1) for the request.\n\nText: {text}\nClass:',

-GPT4o_temp0.7_topp0.8_seed2:  'Assess whether the comment offers logical evaluation.\nGuidelines: Assign Yes (1) if the comment contains:\nContext or background information,\nSupporting evidence (facts, references, expert opinions),\nLogical reasoning or organized argumentation.\nAssign No (0) if these elements are missing.\n\\n\\nProvide solely the anticipated classification (0 or 1) of the inquiry.\\n\\nText: {text}\\nClass:',
   

-GPT4o_temp0.5_topp0.5: 'Evaluate whether the comment offers logical examination.\nInstructions: Assign Yes (1) if the comment contains:\nContext or background information,\nEvidence (facts, references, experts),\nLogical reasoning or organized argument.\nAssign No (0) if these elements are missing.\n\\n\\nReply with just the anticipated category (0 or 1) of the inquiry.\\n\\nText: {text}\\nClass:',
    

-> maybe we can generate two paraphrases per boukes prompt with different seeds, which look like they would be rather similar to each other to get a grasp of very small prompt differences, compared to different codebooks or paraphrases of a prompt.
also helps to address the potential critizism that our prompts favor Llama, since most prompt engineering was done on lama (but prompt wording of both simple and paraphrased prompts are based on GPT4o)

    
      

In [26]:
#load the existing paraphrased prompts:
existing_paraphrases = pd.read_parquet(f'{CFG.report_dir}/Boukes_paraphrased_prompts.parquet')

In [27]:
#test join:
pd.concat([existing_paraphrases, para1], ignore_index=True)


Unnamed: 0,prompt_label,seed,paraphrased_prompt
0,incivility_simple2,43,"Is this comment uncivil?\nGuidelines: Mark Yes (1) if the comment contains name-calling, insults..."
1,interactivity_acknowledgement_simple2,43,"Is the comment a reference to, an acknowledgment of, or a reply to another user's remark?\nInstr..."
2,political_ideology_US,43,"Categorize the given message according to its ideological stance as liberal (0), neutral (1), or..."
3,political_post,43,Determine whether the given message should be categorized as political (1) or non-political (0)....
4,rationality_simple2,43,Assess whether the comment offers logical evaluation.\nGuidelines: Assign Yes (1) if the comment...
5,incivility_simple2,42,Determine if the comment shows incivility.\nGuidelines: Assign Yes (1) if the comment contains n...
6,interactivity_acknowledgement_simple2,42,"Determine if this comment is referencing, recognizing, or replying to another user's comment.\nG..."
7,political_ideology_US,42,"Determine whether the given text aligns with liberal views (0), is ideologically neutral (1), or..."
8,political_post,42,Determine whether the given message is political (1) or non-political (0). A message is consider...
9,rationality_simple2,42,Evaluate whether the comment offers logical examination.\nInstructions: Assign Yes (1) if the co...


In [28]:
#add the new paraphrases with the existing ones:
para1 = pd.concat([existing_paraphrases, para1], ignore_index=True)

In [29]:
#inspect the results:
print(para1)

                             prompt_label  seed  \
0                      incivility_simple2    43   
1   interactivity_acknowledgement_simple2    43   
2                   political_ideology_US    43   
3                          political_post    43   
4                     rationality_simple2    43   
5                      incivility_simple2    42   
6   interactivity_acknowledgement_simple2    42   
7                   political_ideology_US    42   
8                          political_post    42   
9                     rationality_simple2    42   
10   interactivity_acknowledgement_simple    42   
11   interactivity_acknowledgement_simple    43   

                                                                                     paraphrased_prompt  
0   Is this comment uncivil?\nGuidelines: Mark Yes (1) if the comment contains name-calling, insults...  
1   Is the comment a reference to, an acknowledgment of, or a reply to another user's remark?\nInstr...  
2   Categorize the

In [30]:
#save the paraphrased prompts to parquet file:
para1.to_parquet(f'{CFG.report_dir}/Boukes_paraphrased_prompts.parquet', index=False)
#save to json file:
para1.to_json(f'{CFG.report_dir}/Boukes_paraphrased_prompts.json', orient='records', force_ascii=False, indent=4)

In [None]:
#prompts appear still quite different between seeds, so maybe add some irrelevant difference manually, maybe in formatting or so?

In [32]:
#use GPT4o to compile slightly altered prompts:
def load_json(path: str):
    with open(path, encoding='utf-8') as fp:
        return json.load(fp)
    

#pubspherepromptsrunall = ['rationality_simple2', 'rationality_jaidka', 'incivility_simple2', 'incivility_jaidka',  'civility_jaidka', 'interactivity_simple2', 'interactivity_acknowledgement_jaidka', 'political_ideology_US', 'political_post', 'political_post_jaidka' ]  

pubsphereparaphraserun1 = ['rationality_simple2', 'incivility_simple2', 'interactivity_acknowledgement_simple', 'political_ideology_US', 'political_post'] 



chunked_result: typing.List[pd.DataFrame] = []

for label, path in CFG.prompt_classify_files.items():
    if label in pubsphereparaphraserun1: 
        template = load_json(path).get('template')
        classes = load_json(path).get('classes')
        retry_count = 0
        max_retries = 5
        while retry_count < max_retries:
            try: 
                response = requests.post(
                        url=api_endpoint,
                        headers=headers,
                        json={
                            'model': MODELgpt4o,
                            'messages': [
                                {
                                    "role": "system",
                                    "content": 'You restate a prompt in the same words only alter formatting. The prompt is: "{}"'
                                },
                                {
                                    "role": "user",
                                    "content": 'You restate a prompt in the same words only alter formatting. The prompt is: "{}"'.format(template)
                                }
                            ],
                            'temperature': 0.1,
                            'top_p': 0.8,  
                            'seed': SEED
                        }
                    )  
                if response.status_code == 200:
                    data_response = response.json()
                    chunked_result.append(
                        pd.DataFrame(
                            data=[[label, SEED, data_response["choices"][0]["message"]["content"]]],                                
                            columns=['prompt_label', 'seed', 'paraphrased_prompt']
                        )
                    )
                    break  # Exit retry loop for this label
                elif response.status_code == 429:
                    retry_count += 1
                    wait_time = 20
                    print(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
                    time.sleep(wait_time)
                elif response.status_code == 500:
                    retry_count += 1
                    wait_time = 20
                    print(f"Failed to connect to API. Status code: {response.status_code}. Retrying in {wait_time} seconds...")
                    time.sleep(wait_time)
                else:
                    print(f"Failed to connect to API. Status code: {response.status_code}")
                    print(response.text)
                    # Optionally append a row with error info
                    chunked_result.append(
                        pd.DataFrame(
                            data=[[label, f"ERROR: {response.status_code}"]],
                            columns=['prompt_label', 'paraphrased_prompt']
                        )
                    )
                    break
            except requests.exceptions.RequestException as e:   
                print(f"Failed to connect to API: {e}")
                retry_count += 1
                wait_time = 60
                print(f"Retrying in {wait_time} seconds...")
                time.sleep(wait_time)
        else:
            # If all retries failed, append a row with error info
            chunked_result.append(
                pd.DataFrame(
                    data=[[label, "ERROR: Max retries exceeded"]],
                    columns=['prompt_label', 'similarized_prompt', 'seed']
                )
            )

simpa1 = pd.concat(chunked_result, ignore_index=True)
print(simpa1)


                           prompt_label  seed  \
0                    incivility_simple2    42   
1  interactivity_acknowledgement_simple    42   
2                 political_ideology_US    42   
3                        political_post    42   
4                   rationality_simple2    42   

                                                                                    paraphrased_prompt  
0  Does this comment display incivility?  \nInstructions: Code Yes (1) if the comment includes name...  
1  Does this comment acknowledge or respond to another user's comment?  \nInstructions:  \nCode Yes...  
2  Classify the following message as ideologically liberal (0), ideologically neutral (1), or ideol...  
3  Classify the following message as following messages as political (1) or non-political (0). Poli...  
4  Does this comment provide rational analysis?  \nInstructions: Code Yes (1) if the comment includ...  


In [34]:
print(simpa1.paraphrased_prompt.unique())

['Does this comment display incivility?  \nInstructions: Code Yes (1) if the comment includes name-calling, insults, inflammatory language, sarcasm, shouting (ALL CAPS), vulgarity, discrimination, threats, or restrictions on rights. Code No (0) if none of these are present.  \n\nRespond with only the predicted class (0 or 1) of the request.  \n\nText: {text}  \nClass:'
 "Does this comment acknowledge or respond to another user's comment?  \nInstructions:  \nCode Yes (1) if the comment shows agreement or disagreement with a specific user's statement, often signaled by a username or phrases like ‘Yes,’ ‘No,’ or ‘I agree.’  \nCode No (0) if it lacks a clear acknowledgment or is only an insult.  \n  \nRespond with only the predicted class (0 or 1) of the request.  \n  \nText: {text}  \nClass:"
 'Classify the following message as ideologically liberal (0), ideologically neutral (1), or ideologically conservative (2). Ideology here is defined in the context of the US political system. Messag

new:      'Does this comment provide rational analysis?  \nInstructions: Code Yes (1) if the comment includes:  \n- Context or background,  \n- Evidence (facts, sources, authorities),  \n- Reasoning or structured argument.  \nCode No (0) if these are absent  \n\nRespond with only the predicted class (0 or 1) of the request.  \n\nText: {text}  \nClass:'

original: "Does this comment provide rational analysis?\nInstructions: Code Yes (1) if the comment includes:\nContext or background,\nEvidence (facts, sources, authorities),\nReasoning or structured argument.\nCode No (0) if these are absent\n\\n\\nRespond with only the predicted class (0 or 1) of the request.\\n\\nText: {text}\\nClass:",

In [35]:
#save the paraphrased prompts to parquet file:
simpa1.to_parquet(f'{CFG.report_dir}/Boukes_similarized_prompts.parquet', index=False)
#save to json file:
simpa1.to_json(f'{CFG.report_dir}/Boukes_similarized_prompts.json', orient='records', force_ascii=False, indent=4)

we now have annotations for boukes, para1, para2, simpa1 and Jaidka prompts for L33_70b (low), L31_8b (low), Q25_72b (zero)
and for boukes only L33_70b_seed2, L33_70b_seed2_run2, L31_8b_seed2, DS7b (all None), Q25_72b_seed2 (zero), L33_70b_zero, L33_70b_zero_seed2, L31_8b_zero, L31_8b_zero_seed2, Q25_72b_low, Q25_72b_low_seed2, L31_70b_low, gpt4o_zero, gpt4Turbo_zero

for Jaidka we have annotations for boukes and Jaidka prompts for L31_8b_zero, L33_70b_zero, Q25_72b_zero, gpt4o_zero

For MHClemm we have ideology and ideology para1 for L31_8b_zero, L33_70b_zero, Q25_72b_zero

#TODO: 
-gpt4o for  para1, para2, simpa1 and Jaidka prompts on boukes
-gpt4o seed2 for boukes
-gpt4o MHClemm ideology and ideology para1

-change variable label to include temperature low/zero
-make dummy variables for all annotations

In [93]:
#gpt4o for  para1, para2, simpa1 and Jaidka prompts on boukes

#check GPT4o:
def load_json(path: str):
    with open(path, encoding='utf-8') as fp:
        return json.load(fp)
    
pubspheregpt4orun1 = ['rationality_jaidka']
pubspheregpt4orun2 = ['incivility_jaidka',  'civility_jaidka',  
 'interactivity_acknowledgement_jaidka', 'political_post_jaidka']

pubspheregpt4orun3 = [ 'political_post', 'rationality_simple2_para1', 'incivility_para1', 
 'interactivity_acknowledgement_para1', 'political_ideology_US_para1', 'political_post_para1', 'rationality_simple2_para2', 'incivility_para2', 
 'interactivity_acknowledgement_para2', 'political_ideology_US_para2', 'political_post_para2', 'interactivity_acknowledgement_simpa1', 'rationality_simple2_simpa1',
 'incivility_simpa1', 'political_ideology_US_simpa1', 'political_post_simpa1']

pubspheregpt4orun4 = [ 'political_post']
pubspheregpt4orun5 = [ 'rationality_simple2_para1', 'political_ideology_US_para1', 'political_post_para1', 'rationality_simple2_para2', 'political_ideology_US_para2', 'political_post_para2', 'interactivity_acknowledgement_simpa1', 'rationality_simple2_simpa1',
  'political_ideology_US_simpa1', 'political_post_simpa1']




chunked_result: typing.List[pd.DataFrame] = []

for label, path in CFG.prompt_classify_files.items():
    if label in pubspheregpt4orun5: 
        template = load_json(path).get('template')
        classes = load_json(path).get('classes')
        for index, row in tqdm.tqdm(boukesTYT["commentText"].items()):
            retry_count = 0
            max_retries = 5
            while retry_count < max_retries:
                try: 
                    response = requests.post(
                            url=api_endpoint,
                            headers=headers,
                            json={
                                'model': MODELgpt4o,
                                'messages': [
                                    {
                                        "role": "system",
                                        "content": template
                                    },
                                    {
                                        "role": "user",
                                        "content": template.format(text=row)
                                    }
                                ],
                                'temperature': temperature_0,  
                                'seed': SEED,
                                "max_tokens": MAX15
                            }
                        )  

                    if response.status_code == 200:
                        data_response = response.json()
                        chunked_result.append(
                        pd.DataFrame(
                            data=[[index, classes.get(data_response["choices"][0]["message"]["content"], None)]],                                
                            columns=['index', label]
                            )
                        )
                        break  # Exit the retry loop on success
                    elif response.status_code == 429:
                        retry_count += 1
                        wait_time = 20
                        print(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
                        time.sleep(wait_time)
                    elif response.status_code == 500:
                        retry_count += 1
                        wait_time = 20
                        print(f"Failed to connect to API. Status code: {response.status_code}. Retrying in {wait_time} seconds...")
                        time.sleep(wait_time)
                    else:
                        print(f"Failed to connect to API. Status code: {response.status_code}")
                        print(response.text)
                        break
                except requests.exceptions.RequestException as e:   
                    print(f"Failed to connect to API: {e}")
                    retry_count += 1
                    wait_time = 60
                    print(f"Retrying in {wait_time} seconds...")
                    time.sleep(wait_time)                 
                
            #Add a pause of 1 minute after every 70 iterations
            #if (index + 1) % 250 == 0:
            #    print("Pausing for 10 seconds...")
            #    time.sleep(10)

predictions_boukes_GPT4o_zero5 = pd.concat(chunked_result, ignore_index=True)
print(predictions_boukes_GPT4o_zero5) 



3132it [17:44,  2.94it/s]
3132it [17:52,  2.92it/s]
3132it [17:14,  3.03it/s]
3132it [17:24,  3.00it/s]
3132it [20:00,  2.61it/s]
3132it [19:49,  2.63it/s]
3132it [20:26,  2.55it/s]
3132it [17:19,  3.01it/s]
3132it [16:52,  3.09it/s]
3132it [16:58,  3.08it/s]


       index interactivity_acknowledgement_simpa1 political_ideology_US_para1  \
0          0                                   No                         NaN   
1          1                                   No                         NaN   
2          3                                   No                         NaN   
3          4                                   No                         NaN   
4          5                                   No                         NaN   
...      ...                                  ...                         ...   
31315   3854                                  NaN                         NaN   
31316   3855                                  NaN                         NaN   
31317   3859                                  NaN                         NaN   
31318   3860                                  NaN                         NaN   
31319   3861                                  NaN                         NaN   

      political_ideology_US

In [94]:
#save output:
predictions_boukes_GPT4o_zero5.to_parquet('data/publicsphere/predictions_boukes_GPT4o_zero5.parquet', index=False)

In [None]:
#join to the dataset:   
incivility_jaidka_gpt4o_system = predictions_boukes_GPT4o_zero2.loc[:, ["index", "incivility_jaidka"]].dropna()
civility_jaidka_gpt4o_system = predictions_boukes_GPT4o_zero2.loc[:, ["index", "civility_jaidka"]].dropna()
interactivity_acknowledgement_jaidka_gpt4o_system = predictions_boukes_GPT4o_zero2.loc[:, ["index", "interactivity_acknowledgement_jaidka"]].dropna()
political_post_jaidka_gpt4o_system = predictions_boukes_GPT4o_zero2.loc[:, ["index", "political_post_jaidka"]].dropna()

incivility_jaidka_gpt4o_system.set_index('index', drop=True, inplace=True)
civility_jaidka_gpt4o_system.set_index('index', drop=True, inplace=True)
interactivity_acknowledgement_jaidka_gpt4o_system.set_index('index', drop=True, inplace=True)
political_post_jaidka_gpt4o_system.set_index('index', drop=True, inplace=True)

dataset_w_pred_anon = dataset_w_pred_anon.join(incivility_jaidka_gpt4o_system, rsuffix='_gpt4o_system_zero').join(civility_jaidka_gpt4o_system, rsuffix='_gpt4o_system_zero').join(interactivity_acknowledgement_jaidka_gpt4o_system, rsuffix='_gpt4o_system_zero').join(political_post_jaidka_gpt4o_system, rsuffix='_gpt4o_system_zero')  

In [97]:
#join 'political_post', 'rationality_simple2_para1', 'incivility_para1', 
# 'interactivity_acknowledgement_para1', 'political_ideology_US_para1', 'political_post_para1', 'rationality_simple2_para2', 'incivility_para2', 
# 'interactivity_acknowledgement_para2', 'political_ideology_US_para2', 'political_post_para2', 'interactivity_acknowledgement_simpa1', 'rationality_simple2_simpa1',
# 'incivility_simpa1', 'political_ideology_US_simpa1', 'political_post_simpa1'
# to the dataset:
for label in pubspheregpt4orun5:
    preds = predictions_boukes_GPT4o_zero5.loc[:, ["index", label]].dropna()
    preds.set_index('index', drop=True, inplace=True)
    dataset_w_pred_anon = dataset_w_pred_anon.join(preds, rsuffix=f'_gpt4o_system_zero')

In [98]:
#rename the columns:
dataset_w_pred_anon = dataset_w_pred_anon.rename(columns={
    'rationality_simple2_para1': 'rationality_simple2_para1_gpt4o_system_zero',
       'political_ideology_US_para1':'political_ideology_US_para1_gpt4o_system_zero' , 
 'political_post_para1': 'political_post_para1_gpt4o_system_zero' , 
 'rationality_simple2_para2':'rationality_simple2_para2_gpt4o_system_zero', 
  'political_ideology_US_para2':'political_ideology_US_para2_gpt4o_system_zero', 'political_post_para2':'political_post_para2_gpt4o_system_zero', 
 'interactivity_acknowledgement_simpa1': 'interactivity_acknowledgement_simpa1_gpt4o_system_zero', 'rationality_simple2_simpa1': 'rationality_simple2_simpa1_gpt4o_system_zero',
 'incivility_simpa1': 'incivility_simpa1_gpt4o_system_zero', 
 'political_ideology_US_simpa1': 'political_ideology_US_simpa1_gpt4o_system_zero', 
 'political_post_simpa1': 'political_post_simpa1_gpt4o_system_zero',
})

In [99]:
dataset_w_pred_anon.columns.to_list()

['StartDate',
 'RecordedDate',
 'IPAddress',
 'Finished',
 'Coder',
 'ID',
 'Mark_ID',
 'Genre',
 'topiccode',
 'Platform',
 'Anonymity',
 'Anonymity_9_TEXT',
 'codable',
 'Interaction',
 'Acknowledgement',
 'TopicRelevance',
 'Reasoning',
 'BackgroundInfo',
 'ExternalEvidence',
 'ExternalEvidence_1_TEXT',
 'Opinion',
 'disagreement',
 'Ideologicaldirection',
 'Name_calling',
 'Vulgarity',
 'Attack_reputation',
 'Question_Intelligenc',
 'All_caps_function',
 'Sarcasm_to_criticize',
 'Individual_right',
 'discrimination',
 'Invoke_violence',
 'Tone',
 'INTERACTIVITY_DUMMY',
 'RATIONALITY_DUMMY',
 'HAS_OPINION_DUMMY',
 'LIBERAL_NEUTRAL_CONSERVATIVE',
 'LIBERAL_DUMMY',
 'CONSERVATIVE_DUMMY',
 'NAMECALLING_DUMMY',
 'VULGAR_DUMMY',
 'NAMECALLING_VULGAR_DUMMY',
 'INCIVILITY_ORDINAL',
 'INCIVILITY_DUMMY',
 'INTOLERANCE_DUMMY',
 'filter_$',
 'IMPOLITENESS_DUMMY',
 'showName',
 'genre',
 'Time_comment',
 'likeCount_comment',
 'entities',
 'place',
 'retweet_count',
 'platform',
 'retweeted',
 '

In [36]:
#rename the columns:
dataset_w_pred_anon = dataset_w_pred_anon.rename(columns={
     'incivility_para1': 'incivility_para1_gpt4o_system_zero',
 'incivility_para2': 'incivility_para2_gpt4o_system_zero',
 'interactivity_acknowledgement_para1': 'interactivity_acknowledgement_para1_gpt4o_system_zero',
 'interactivity_acknowledgement_para2': 'interactivity_acknowledgement_para2_gpt4o_system_zero',
 'incivility_simpa1': 'incivility_simpa1_gpt4o_system_zero'
})

In [100]:
dataset_w_pred_anon

Unnamed: 0,StartDate,RecordedDate,IPAddress,Finished,Coder,ID,Mark_ID,Genre,topiccode,Platform,...,rationality_simple2_para1_gpt4o_system_zero,political_ideology_US_para1_gpt4o_system_zero,political_post_para1_gpt4o_system_zero,rationality_simple2_para2_gpt4o_system_zero,political_ideology_US_para2_gpt4o_system_zero,political_post_para2_gpt4o_system_zero,interactivity_acknowledgement_simpa1_gpt4o_system_zero,rationality_simple2_simpa1_gpt4o_system_zero,political_ideology_US_simpa1_gpt4o_system_zero,political_post_simpa1_gpt4o_system_zero
0,5/30/2021 13:03:17,5/30/2021 13:04:17,62.194.51.29,1,6,UgyPHwv8G0cDE6-wEgl4AaABAg.8_0ZjJKSJty8_0kXGkAd2U,119,0,0,1,...,No,neutral,non-political,No,neutral,non-political,No,No,neutral,non-political
1,10/11/2021 10:34:05,10/11/2021 10:36:46,213.127.109.191,1,6,Ugx2WXq9UdV8mPPjejJ4AaABAg.8yHCKV0Boe58yYRxEQEF45,282,1,2,1,...,No,neutral,non-political,No,neutral,non-political,No,No,neutral,non-political
2,9/9/2021 18:49:48,9/9/2021 18:51:32,213.127.110.0,1,6,1110578710648890000,372,2,4,2,...,,,,,,,,,,
3,6/6/2021 16:12:46,6/6/2021 16:16:16,213.127.76.145,1,6,UgwUPFScjJ0MCeaP2F54AaABAg.8lvp3fc9Euf8lvvgsUgEgV,769,0,0,1,...,No,neutral,non-political,No,neutral,non-political,No,No,neutral,non-political
4,6/13/2021 13:25:49,6/13/2021 13:27:28,213.127.82.232,1,6,UgwWKCWtSJdFvjGHvTp4AaABAg.8kUC5dGrQ2H8kUDRihE2f3,1206,0,0,1,...,No,neutral,non-political,No,neutral,non-political,No,No,neutral,non-political
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3857,8/19/2021 14:50:13,8/19/2021 14:54:28,62.194.51.29,1,6,1152219467579100000,10000695,0,4,2,...,,,,,,,,,,
3858,8/19/2021 15:10:27,8/19/2021 15:12:21,62.194.51.29,1,6,1085362296472430000,10007008,1,4,2,...,,,,,,,,,,
3859,10/6/2021 16:08:39,10/6/2021 16:10:42,213.127.113.113,1,6,UghFY3QJ6nmT_ngCoAEC.7-H0Z7--wxd8goqpaPs-bl,20000102,0,3,1,...,No,neutral,non-political,No,neutral,non-political,No,No,neutral,non-political
3860,10/15/2021 18:30:04,10/15/2021 18:35:40,213.127.109.191,1,6,UgyWabsmmnq3zam4DgZ4AaABAg,20000418,2,3,1,...,No,neutral,non-political,No,neutral,non-political,No,No,neutral,non-political


In [101]:
dataset_w_pred_anon.to_parquet('data/publicsphere/publicsphere.cardiff_prompt_classify_anon.parquet', index=False)

In [22]:
#gpt4T for political_post
def load_json(path: str):
    with open(path, encoding='utf-8') as fp:
        return json.load(fp)
    
pubspheregptrun4 = [ 'political_post']


chunked_result: typing.List[pd.DataFrame] = []

for label, path in CFG.prompt_classify_files.items():
    if label in pubspheregptrun4: 
        template = load_json(path).get('template')
        classes = load_json(path).get('classes')
        for index, row in tqdm.tqdm(boukesTYT["commentText"].items()):
            retry_count = 0
            max_retries = 5
            while retry_count < max_retries:
                try: 
                    response = requests.post(
                            url=api_endpoint,
                            headers=headers,
                            json={
                                'model': MODELgpt4T,
                                'messages': [
                                    {
                                        "role": "system",
                                        "content": template
                                    },
                                    {
                                        "role": "user",
                                        "content": template.format(text=row)
                                    }
                                ],
                                'temperature': temperature_0,  
                                'seed': SEED,
                                "max_tokens": MAX15
                            }
                        )  

                    if response.status_code == 200:
                        data_response = response.json()
                        chunked_result.append(
                        pd.DataFrame(
                            data=[[index, classes.get(data_response["choices"][0]["message"]["content"], None)]],                                
                            columns=['index', label]
                            )
                        )
                        break  # Exit the retry loop on success
                    elif response.status_code == 429:
                        retry_count += 1
                        wait_time = 20
                        print(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
                        time.sleep(wait_time)
                    elif response.status_code == 500:
                        retry_count += 1
                        wait_time = 20
                        print(f"Failed to connect to API. Status code: {response.status_code}. Retrying in {wait_time} seconds...")
                        time.sleep(wait_time)
                    else:
                        print(f"Failed to connect to API. Status code: {response.status_code}")
                        print(response.text)
                        break
                except requests.exceptions.RequestException as e:   
                    print(f"Failed to connect to API: {e}")
                    retry_count += 1
                    wait_time = 60
                    print(f"Retrying in {wait_time} seconds...")
                    time.sleep(wait_time)                 
                
            #Add a pause of 1 minute after every 70 iterations
            #if (index + 1) % 250 == 0:
            #    print("Pausing for 10 seconds...")
            #    time.sleep(10)

predictions_boukes_GPT4T_zero4 = pd.concat(chunked_result, ignore_index=True)
print(predictions_boukes_GPT4T_zero4) 


662it [13:41,  2.13it/s]

Rate limit exceeded. Retrying in 20 seconds...


697it [14:34,  2.10it/s]

Rate limit exceeded. Retrying in 20 seconds...


730it [15:21,  1.69it/s]

Rate limit exceeded. Retrying in 20 seconds...


819it [17:22,  1.84it/s]

Rate limit exceeded. Retrying in 20 seconds...


901it [19:20,  1.22it/s]

Rate limit exceeded. Retrying in 20 seconds...


1039it [22:20,  2.19it/s]

Rate limit exceeded. Retrying in 20 seconds...


1177it [25:23,  1.94it/s]

Rate limit exceeded. Retrying in 20 seconds...


1287it [28:09,  1.91it/s]

Rate limit exceeded. Retrying in 20 seconds...


1471it [32:13,  1.87it/s]

Rate limit exceeded. Retrying in 20 seconds...


1744it [38:17,  1.92it/s]

Rate limit exceeded. Retrying in 20 seconds...


1837it [40:20,  1.20it/s]

Rate limit exceeded. Retrying in 20 seconds...


1884it [41:22,  1.27it/s]

Rate limit exceeded. Retrying in 20 seconds...


1930it [42:23,  1.28it/s]

Rate limit exceeded. Retrying in 20 seconds...


2221it [49:01,  1.48it/s]

Rate limit exceeded. Retrying in 20 seconds...


2359it [52:04,  1.71it/s]

Rate limit exceeded. Retrying in 20 seconds...


2405it [53:05,  1.61it/s]

Rate limit exceeded. Retrying in 20 seconds...


2452it [54:05,  1.56it/s]

Rate limit exceeded. Retrying in 20 seconds...


2917it [1:04:26,  2.11it/s]

Rate limit exceeded. Retrying in 20 seconds...


2965it [1:05:29,  1.28it/s]

Rate limit exceeded. Retrying in 20 seconds...


3132it [1:09:24,  1.33s/it]

      index political_post
0         0  non-political
1         1  non-political
2         3  non-political
3         4  non-political
4         5      political
...     ...            ...
3127   3854  non-political
3128   3855  non-political
3129   3859  non-political
3130   3860      political
3131   3861      political

[3132 rows x 2 columns]





In [95]:
#gpt4o seed2 for boukes

#check GPT4o:
def load_json(path: str):
    with open(path, encoding='utf-8') as fp:
        return json.load(fp)
    
pubspheregpt4orun1 = ['rationality_simple2', 'incivility_simple2', 'interactivity_acknowledgement_simple', 'political_ideology_US', 'political_post']


chunked_result: typing.List[pd.DataFrame] = []

for label, path in CFG.prompt_classify_files.items():
    if label in pubspheregpt4orun1: 
        template = load_json(path).get('template')
        classes = load_json(path).get('classes')
        for index, row in tqdm.tqdm(boukesTYT["commentText"].items()):
            retry_count = 0
            max_retries = 5
            while retry_count < max_retries:
                try: 
                    response = requests.post(
                            url=api_endpoint,
                            headers=headers,
                            json={
                                'model': MODELgpt4o,
                                'messages': [
                                    {
                                        "role": "system",
                                        "content": template
                                    },
                                    {
                                        "role": "user",
                                        "content": template.format(text=row)
                                    }
                                ],
                                'temperature': temperature_0,  
                                'seed': SEED2,
                                "max_tokens": MAX15
                            }
                        )  

                    if response.status_code == 200:
                        data_response = response.json()
                        chunked_result.append(
                        pd.DataFrame(
                            data=[[index, classes.get(data_response["choices"][0]["message"]["content"], None)]],                                
                            columns=['index', label]
                            )
                        )
                        break  # Exit the retry loop on success
                    elif response.status_code == 429:
                        retry_count += 1
                        wait_time = 20
                        print(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
                        time.sleep(wait_time)
                    elif response.status_code == 500:
                        retry_count += 1
                        wait_time = 20
                        print(f"Failed to connect to API. Status code: {response.status_code}. Retrying in {wait_time} seconds...")
                        time.sleep(wait_time)
                    else:
                        print(f"Failed to connect to API. Status code: {response.status_code}")
                        print(response.text)
                        break
                except requests.exceptions.RequestException as e:   
                    print(f"Failed to connect to API: {e}")
                    retry_count += 1
                    wait_time = 60
                    print(f"Retrying in {wait_time} seconds...")
                    time.sleep(wait_time)                 
                
            #Add a pause of 1 minute after every 70 iterations
            #if (index + 1) % 250 == 0:
            #    print("Pausing for 10 seconds...")
            #    time.sleep(10)

predictions_boukes_GPT4o_zero_seed2 = pd.concat(chunked_result, ignore_index=True)
print(predictions_boukes_GPT4o_zero_seed2) 



3132it [19:29,  2.68it/s]
3132it [16:56,  3.08it/s]
3132it [16:51,  3.10it/s]
3132it [20:20,  2.57it/s]
3132it [16:45,  3.12it/s]


       index incivility_simple2 interactivity_acknowledgement_simple  \
0          0                 No                                  NaN   
1          1                 No                                  NaN   
2          3                 No                                  NaN   
3          4                 No                                  NaN   
4          5                Yes                                  NaN   
...      ...                ...                                  ...   
15655   3854                NaN                                  NaN   
15656   3855                NaN                                  NaN   
15657   3859                NaN                                  NaN   
15658   3860                NaN                                  NaN   
15659   3861                NaN                                  NaN   

      political_ideology_US political_post rationality_simple2  
0                       NaN            NaN                 NaN  
1    

In [96]:
#save output:
predictions_boukes_GPT4o_zero_seed2.to_parquet('data/publicsphere/predictions_boukes_GPT4o_zero_seed2.parquet', index=False)

In [102]:
for label in pubspheregpt4orun1:
    preds = predictions_boukes_GPT4o_zero_seed2.loc[:, ["index", label]].dropna()
    preds.set_index('index', drop=True, inplace=True)
    dataset_w_pred_anon = dataset_w_pred_anon.join(preds, rsuffix=f'_{label}_gpt4o_system_zero_seed2')

In [68]:
#gpt4o MHClemm ideology and ideology para1

#check GPT4o:
def load_json(path: str):
    with open(path, encoding='utf-8') as fp:
        return json.load(fp)
    
MHClemmGPT4orun1 = ['political_ideology_US', 'political_ideology_US_para1']


chunked_result: typing.List[pd.DataFrame] = []

for label, path in CFG.prompt_classify_files.items():
    if label in MHClemmGPT4orun1: 
        template = load_json(path).get('template')
        classes = load_json(path).get('classes')
        for index, row in tqdm.tqdm(MHclemm['text'].items()):
            retry_count = 0
            max_retries = 5
            while retry_count < max_retries:
                try: 
                    response = requests.post(
                            url=api_endpoint,
                            headers=headers,
                            json={
                                'model': MODELgpt4o,
                                'messages': [
                                    {
                                        "role": "system",
                                        "content": template
                                    },
                                    {
                                        "role": "user",
                                        "content": template.format(text=row)
                                    }
                                ],
                                'temperature': temperature_0,  
                                'seed': SEED,
                                "max_tokens": MAX15
                            }
                        )  

                    if response.status_code == 200:
                        data_response = response.json()
                        chunked_result.append(
                        pd.DataFrame(
                            data=[[index, classes.get(data_response["choices"][0]["message"]["content"], None)]],                                
                            columns=['index', label]
                            )
                        )
                        break  # Exit the retry loop on success
                    elif response.status_code == 429:
                        retry_count += 1
                        wait_time = 20
                        print(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
                        time.sleep(wait_time)
                    elif response.status_code == 500:
                        retry_count += 1
                        wait_time = 20
                        print(f"Failed to connect to API. Status code: {response.status_code}. Retrying in {wait_time} seconds...")
                        time.sleep(wait_time)
                    else:
                        print(f"Failed to connect to API. Status code: {response.status_code}")
                        print(response.text)
                        break
                except requests.exceptions.RequestException as e:   
                    print(f"Failed to connect to API: {e}")
                    retry_count += 1
                    wait_time = 60
                    print(f"Retrying in {wait_time} seconds...")
                    time.sleep(wait_time)                 
                
            #Add a pause of 1 minute after every 70 iterations
            #if (index + 1) % 250 == 0:
            #    print("Pausing for 10 seconds...")
            #    time.sleep(10)

predictions_MHClemm_GPT4o_zero = pd.concat(chunked_result, ignore_index=True)
print(predictions_MHClemm_GPT4o_zero) 



0it [00:00, ?it/s]

635it [03:35,  2.95it/s]
635it [03:34,  2.96it/s]

      index political_ideology_US political_ideology_US_para1
0         0          conservative                         NaN
1         1               neutral                         NaN
2         2               neutral                         NaN
3         3               neutral                         NaN
4         4               neutral                         NaN
...     ...                   ...                         ...
1265    630                   NaN                     liberal
1266    631                   NaN                     neutral
1267    632                   NaN                     neutral
1268    633                   NaN                conservative
1269    634                   NaN                     neutral

[1270 rows x 3 columns]





In [57]:
MHclemmW= pd.read_parquet('data/MH_BClemm_data/Ideo_Val_GPT_USA_L33_70b.parquet')

In [72]:
MHclemmW.columns.to_list()

['text',
 'label',
 'GPT1',
 'GPT2',
 'GPT_Reconciled',
 'political_ideology_US_L33_70b_zero',
 'political_ideology_US_para1_L33_70b_zero',
 'political_ideology_US_L31_8b_zero',
 'political_ideology_US_para1_L31_8b_zero',
 'political_ideology_US_Q25_72b_zero',
 'political_ideology_US_para1_Q25_72b_zero',
 'political_ideology_US_L33_70b_zero_con_dum',
 'political_ideology_US_L33_70b_zero_lib_dum',
 'political_ideology_US_para1_L33_70b_zero_con_dum',
 'political_ideology_US_para1_L33_70b_zero_lib_dum',
 'political_ideology_US_L31_8b_zero_con_dum',
 'political_ideology_US_L31_8b_zero_lib_dum',
 'political_ideology_US_para1_L31_8b_zero_con_dum',
 'political_ideology_US_para1_L31_8b_zero_lib_dum',
 'political_ideology_US_Q25_72b_zero_con_dum',
 'political_ideology_US_Q25_72b_zero_lib_dum',
 'political_ideology_US_para1_Q25_72b_zero_con_dum',
 'political_ideology_US_para1_Q25_72b_zero_lib_dum',
 'rationality_jaidka_gpt4o_system_zero',
 'political_ideology_US_gpt4o_system_zero',
 'political_i

In [71]:
#need to make sure all new columns have the proper suffix:
MHclemmW = MHclemmW.rename(columns={
 'political_ideology_US': 'political_ideology_US_gpt4o_system_zero', 'political_ideology_US_para1':'political_ideology_US_para1_gpt4o_system_zero',
  })

In [70]:
for label in MHClemmGPT4orun1:
    preds = predictions_MHClemm_GPT4o_zero.loc[:, ["index", label]].dropna()
    preds.set_index('index', drop=True, inplace=True)
    MHclemmW = MHclemmW.join(preds, rsuffix=f'_{label}_gpt4o_system_zero')

In [73]:
#save data:
MHclemmW.to_parquet('data/MH_BClemm_data/Ideo_Val_GPT_USA_L33_70b.parquet', index=False)

In [6]:
#gpt4o temperature 0.1 for boukes

#check GPT4o:
def load_json(path: str):
    with open(path, encoding='utf-8') as fp:
        return json.load(fp)
    
pubspheregpt4orun1 = ['rationality_simple2', 'incivility_simple2', 'interactivity_acknowledgement_simple', 'political_ideology_US', 'political_post']


chunked_result: typing.List[pd.DataFrame] = []

for label, path in CFG.prompt_classify_files.items():
    if label in pubspheregpt4orun1: 
        template = load_json(path).get('template')
        classes = load_json(path).get('classes')
        for index, row in tqdm.tqdm(boukesTYT["commentText"].items()):
            retry_count = 0
            max_retries = 5
            while retry_count < max_retries:
                try: 
                    response = requests.post(
                            url=api_endpoint,
                            headers=headers,
                            json={
                                'model': MODELgpt4o,
                                'messages': [
                                    {
                                        "role": "system",
                                        "content": template
                                    },
                                    {
                                        "role": "user",
                                        "content": template.format(text=row)
                                    }
                                ],
                                'temperature': temperature_01,  
                                'seed': SEED,
                                "max_tokens": MAX15
                            }
                        )  

                    if response.status_code == 200:
                        data_response = response.json()
                        chunked_result.append(
                        pd.DataFrame(
                            data=[[index, classes.get(data_response["choices"][0]["message"]["content"], None)]],                                
                            columns=['index', label]
                            )
                        )
                        break  # Exit the retry loop on success
                    elif response.status_code == 429:
                        retry_count += 1
                        wait_time = 20
                        print(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
                        time.sleep(wait_time)
                    elif response.status_code == 500:
                        retry_count += 1
                        wait_time = 20
                        print(f"Failed to connect to API. Status code: {response.status_code}. Retrying in {wait_time} seconds...")
                        time.sleep(wait_time)
                    else:
                        print(f"Failed to connect to API. Status code: {response.status_code}")
                        print(response.text)
                        break
                except requests.exceptions.RequestException as e:   
                    print(f"Failed to connect to API: {e}")
                    retry_count += 1
                    wait_time = 60
                    print(f"Retrying in {wait_time} seconds...")
                    time.sleep(wait_time)                 
                
            #Add a pause of 1 minute after every 70 iterations
            #if (index + 1) % 250 == 0:
            #    print("Pausing for 10 seconds...")
            #    time.sleep(10)

predictions_boukes_GPT4o_low = pd.concat(chunked_result, ignore_index=True)
print(predictions_boukes_GPT4o_low) 



3132it [19:49,  2.63it/s]
3132it [19:54,  2.62it/s]
3132it [19:41,  2.65it/s]
3132it [20:39,  2.53it/s]
3132it [20:00,  2.61it/s]


       index incivility_simple2 interactivity_acknowledgement_simple  \
0          0                 No                                  NaN   
1          1                Yes                                  NaN   
2          3                 No                                  NaN   
3          4                 No                                  NaN   
4          5                Yes                                  NaN   
...      ...                ...                                  ...   
15655   3854                NaN                                  NaN   
15656   3855                NaN                                  NaN   
15657   3859                NaN                                  NaN   
15658   3860                NaN                                  NaN   
15659   3861                NaN                                  NaN   

      political_ideology_US political_post rationality_simple2  
0                       NaN            NaN                 NaN  
1    

In [7]:
#save output:
predictions_boukes_GPT4o_low.to_parquet('data/publicsphere/predictions_boukes_GPT4o_low.parquet', index=False)

In [10]:
for label in pubspheregpt4orun1:
    preds = predictions_boukes_GPT4o_low.loc[:, ["index", label]].dropna()
    preds.set_index('index', drop=True, inplace=True)
    dataset_w_pred_anon = dataset_w_pred_anon.join(preds, rsuffix=f'_{label}_gpt4o_system_low')

In [8]:
#gpt4o temperature 0.1 for boukes with seed 2

#check GPT4o:
def load_json(path: str):
    with open(path, encoding='utf-8') as fp:
        return json.load(fp)
    
pubspheregpt4orun1 = ['rationality_simple2', 'incivility_simple2', 'interactivity_acknowledgement_simple', 'political_ideology_US', 'political_post']


chunked_result: typing.List[pd.DataFrame] = []

for label, path in CFG.prompt_classify_files.items():
    if label in pubspheregpt4orun1: 
        template = load_json(path).get('template')
        classes = load_json(path).get('classes')
        for index, row in tqdm.tqdm(boukesTYT["commentText"].items()):
            retry_count = 0
            max_retries = 5
            while retry_count < max_retries:
                try: 
                    response = requests.post(
                            url=api_endpoint,
                            headers=headers,
                            json={
                                'model': MODELgpt4o,
                                'messages': [
                                    {
                                        "role": "system",
                                        "content": template
                                    },
                                    {
                                        "role": "user",
                                        "content": template.format(text=row)
                                    }
                                ],
                                'temperature': temperature_01,  
                                'seed': SEED2,
                                "max_tokens": MAX15
                            }
                        )  

                    if response.status_code == 200:
                        data_response = response.json()
                        chunked_result.append(
                        pd.DataFrame(
                            data=[[index, classes.get(data_response["choices"][0]["message"]["content"], None)]],                                
                            columns=['index', label]
                            )
                        )
                        break  # Exit the retry loop on success
                    elif response.status_code == 429:
                        retry_count += 1
                        wait_time = 20
                        print(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
                        time.sleep(wait_time)
                    elif response.status_code == 500:
                        retry_count += 1
                        wait_time = 20
                        print(f"Failed to connect to API. Status code: {response.status_code}. Retrying in {wait_time} seconds...")
                        time.sleep(wait_time)
                    else:
                        print(f"Failed to connect to API. Status code: {response.status_code}")
                        print(response.text)
                        break
                except requests.exceptions.RequestException as e:   
                    print(f"Failed to connect to API: {e}")
                    retry_count += 1
                    wait_time = 60
                    print(f"Retrying in {wait_time} seconds...")
                    time.sleep(wait_time)                 
                
            #Add a pause of 1 minute after every 70 iterations
            #if (index + 1) % 250 == 0:
            #    print("Pausing for 10 seconds...")
            #    time.sleep(10)

predictions_boukes_GPT4o_low_seed2 = pd.concat(chunked_result, ignore_index=True)
print(predictions_boukes_GPT4o_low_seed2) 



3132it [20:05,  2.60it/s]
3132it [22:44,  2.30it/s]
3132it [21:29,  2.43it/s]
3132it [21:12,  2.46it/s]
3132it [22:38,  2.31it/s]


       index incivility_simple2 interactivity_acknowledgement_simple  \
0          0                 No                                  NaN   
1          1                 No                                  NaN   
2          3                 No                                  NaN   
3          4                 No                                  NaN   
4          5                Yes                                  NaN   
...      ...                ...                                  ...   
15655   3854                NaN                                  NaN   
15656   3855                NaN                                  NaN   
15657   3859                NaN                                  NaN   
15658   3860                NaN                                  NaN   
15659   3861                NaN                                  NaN   

      political_ideology_US political_post rationality_simple2  
0                       NaN            NaN                 NaN  
1    

In [9]:
#save output:
predictions_boukes_GPT4o_low_seed2.to_parquet('data/publicsphere/predictions_boukes_GPT4o_low_seed2.parquet', index=False)

In [12]:
for label in pubspheregpt4orun1:
    preds = predictions_boukes_GPT4o_low_seed2.loc[:, ["index", label]].dropna()
    preds.set_index('index', drop=True, inplace=True)
    dataset_w_pred_anon = dataset_w_pred_anon.join(preds, rsuffix=f'_{label}_gpt4o_system_low_seed2')

In [14]:
dataset_w_pred_anon.columns.to_list()

['StartDate',
 'RecordedDate',
 'IPAddress',
 'Finished',
 'Coder',
 'ID',
 'Mark_ID',
 'Genre',
 'topiccode',
 'Platform',
 'Anonymity',
 'Anonymity_9_TEXT',
 'codable',
 'Interaction',
 'Acknowledgement',
 'TopicRelevance',
 'Reasoning',
 'BackgroundInfo',
 'ExternalEvidence',
 'ExternalEvidence_1_TEXT',
 'Opinion',
 'disagreement',
 'Ideologicaldirection',
 'Name_calling',
 'Vulgarity',
 'Attack_reputation',
 'Question_Intelligenc',
 'All_caps_function',
 'Sarcasm_to_criticize',
 'Individual_right',
 'discrimination',
 'Invoke_violence',
 'Tone',
 'INTERACTIVITY_DUMMY',
 'RATIONALITY_DUMMY',
 'HAS_OPINION_DUMMY',
 'LIBERAL_NEUTRAL_CONSERVATIVE',
 'LIBERAL_DUMMY',
 'CONSERVATIVE_DUMMY',
 'NAMECALLING_DUMMY',
 'VULGAR_DUMMY',
 'NAMECALLING_VULGAR_DUMMY',
 'INCIVILITY_ORDINAL',
 'INCIVILITY_DUMMY',
 'INTOLERANCE_DUMMY',
 'filter_$',
 'IMPOLITENESS_DUMMY',
 'showName',
 'genre',
 'Time_comment',
 'likeCount_comment',
 'entities',
 'place',
 'retweet_count',
 'platform',
 'retweeted',
 '

In [15]:
#save the results:
dataset_w_pred_2.to_json(f'{CFG.report_dir}/publicsphere.cardiff_prompt_classify_s.json', orient='records', force_ascii=False, indent=4)
dataset_w_pred_anon.to_parquet('data/publicsphere/publicsphere.cardiff_prompt_classify_anon.parquet', index=False)