# Test Notebook

For Debugging the Indidiviual Chains

## Setup

In [1]:
%load_ext dotenv
%dotenv ../.env

In [2]:
import os
assert "HUGGINGFACEHUB_API_TOKEN" in os.environ

In [3]:
from langchain_openai import ChatOpenAI

chat_model = ChatOpenAI(
    model="tgi",
    base_url="http://kriton.philosophie.kit.edu:8080/v1/",
    api_key="no-key-required",
    temperature=0.3,
)

In [4]:
# from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
# 
# llm = HuggingFaceEndpoint(
#     model="HuggingFaceH4/zephyr-7b-beta",
#     task="text-generation",
#     max_new_tokens=512,
#     do_sample=True,
#     temperature=0.4,
# )
# 
# chat_model = ChatHuggingFace(
#     llm=llm,
# )

In [5]:
# test API
from langchain_core.prompts import ChatPromptTemplate
chain = ChatPromptTemplate.from_messages([("user","Tell me a joke about {x}!")]) | chat_model
chain.invoke({"x":"kids"})

AIMessage(content='Why did the kid bring a ladder to school?\n\nBecause he wanted to reach his full potential! (get it?)', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 24, 'prompt_tokens': 17, 'total_tokens': 41, 'completion_tokens_details': None}, 'model_name': 'cstr/llama3-8b-spaetzle-v20', 'system_fingerprint': '2.0.4-sha-f426a33', 'finish_reason': 'eos_token', 'logprobs': None}, id='run-e9c654f0-3377-4699-801c-4fb5b741cea0-0', usage_metadata={'input_tokens': 17, 'output_tokens': 24, 'total_tokens': 41})

## Test Debate Design Chains

In [6]:
import random 

with open('../data/universal_tags.txt') as file:
    tags = [line.rstrip() for line in file]
testtags = random.sample(tags, 8)
print(f"Loaded {len(tags)} tags. Choose: {testtags}.")


Loaded 280 tags. Choose: ['Taxes', 'Belief', 'Referendum', 'Money', 'Sports', 'USA', 'Rights', 'Antinatalism'].


In [7]:
from syncialo.chains.debate_design import SuggestTopicsChain

chain = SuggestTopicsChain.build(chat_model)

In [8]:
import ujson

from langchain.globals import set_debug

set_debug(False)

output = chain.invoke({
    "tags": testtags,
    "debates_per_tag_cluster": 5
})

print(ujson.dumps(output, indent=2))

[
  {
    "idx": 1,
    "topic": "The United States should adopt a universal basic income to combat poverty and income inequality."
  },
  {
    "idx": 2,
    "topic": "The United States should prioritize the development of renewable energy sources over fossil fuels."
  },
  {
    "idx": 3,
    "topic": "The United States should have a national referendum on the legalization of recreational marijuana use."
  },
  {
    "idx": 4,
    "topic": "The United States should adopt a single-payer healthcare system."
  },
  {
    "idx": 5,
    "topic": "The United States should increase its military spending to counter the growing threat of China."
  },
  {
    "idx": 6,
    "topic": "The United States should prioritize the protection of the environment over economic growth."
  },
  {
    "idx": 7,
    "topic": "The United States should have stricter gun control laws."
  },
  {
    "idx": 8,
    "topic": "The United States should have a more progressive tax system."
  },
  {
    "idx": 9,
    "t

In [9]:
from syncialo.chains.debate_design import SuggestMotionChain

from langchain.globals import set_debug

set_debug(False)

chain_motion = SuggestMotionChain.build(chat_model)

output_motion = chain_motion.invoke({
    "topic": "How could changes in artistic expression reflect and influence significant social issues such as poverty?",
    "tags": testtags
})

print(output_motion)

{'motion': 'Increased government funding for the arts is a necessary step to address poverty in the United States.', 'title': 'Arts for the Poor'}


## Argumentation Chains

In [10]:
from syncialo.chains.argumentation import IdentifyPremisesChain, Valence

chain_premises = IdentifyPremisesChain.build(chat_model)

output = chain_premises.invoke({
    "argument": "Meat contains major nutrients.",
    "conclusion": "It's okay to eat meat.",
    "valence": Valence.PRO
})

print(output)


['Meat is a significant source of essential nutrients.', 'Nutrients are crucial for a healthy diet.', 'A healthy diet is important for overall well-being.', 'Consuming a diet that is not healthy can have negative consequences.', 'The benefits of nutrients in meat outweigh the potential drawbacks of eating meat.']


In [11]:
from syncialo.chains.argumentation import RankPropsByPlausibilityChain

chain_rank = RankPropsByPlausibilityChain.build(chat_model)

output = chain_rank.invoke({
    "premises": [
        'Meat contains major nutrients',
        'Consuming major nutrients is important for overall health and well-being', 
        'Eating meat provides major nutrients', 
        'Maintaining good health through proper nutrition is desirable', 
        'The benefits of consuming meat, including the provision of major nutrients, outweigh any potential drawbacks or concerns'
    ],
    "tags": ["climate change", "animals", "health"],
    "persona": "A foodtruck owner in Chicago."
})

print(output)

[1, 0, 3, 2, 4]


In [12]:
from langchain.globals import set_debug
from syncialo.chains.argumentation import GenSupportingArgumentChain

set_debug(True)

chain_support = GenSupportingArgumentChain.build(chat_model)

output = chain_support.invoke({
    "premises": [
        'Meat contains major nutrients',
        'Consuming major nutrients is important for overall health and well-being', 
        'Eating meat provides major nutrients', 
        'Maintaining good health through proper nutrition is desirable', 
        'The benefits of consuming meat, including the provision of major nutrients, outweigh any potential drawbacks or concerns'
    ],
    "tags_pro": ["climate change", "animals", "health"],
    "persona": "A foodtruck owner in Chicago.",
    "ranking": [0,1,2,3,4],
    "n": 4
})

print(output)


[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence] Entering Chain run with input:
[0m{
  "premises": [
    "Meat contains major nutrients",
    "Consuming major nutrients is important for overall health and well-being",
    "Eating meat provides major nutrients",
    "Maintaining good health through proper nutrition is desirable",
    "The benefits of consuming meat, including the provision of major nutrients, outweigh any potential drawbacks or concerns"
  ],
  "tags_pro": [
    "climate change",
    "animals",
    "health"
  ],
  "persona": "A foodtruck owner in Chicago.",
  "ranking": [
    0,
    1,
    2,
    3,
    4
  ],
  "n": 4
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableAssign<target_idx>] Entering Chain run with input:
[0m{
  "premises": [
    "Meat contains major nutrients",
    "Consuming major nutrients is important for overall health and well-being",
    "Eating meat provides major nutrients",
    "Maintaining good health throu

In [13]:
from syncialo.chains.argumentation import GenAttackingArgumentChain

chain_support = GenAttackingArgumentChain.build(chat_model)

output = chain_support.invoke({
    "premises": [
        'Meat contains major nutrients',
        'Consuming major nutrients is important for overall health and well-being', 
        'Eating meat provides major nutrients', 
        'Maintaining good health through proper nutrition is desirable', 
        'The benefits of consuming meat, including the provision of major nutrients, outweigh any potential drawbacks or concerns'
    ],
    "tags_con": ["climate change", "books", "India"],
    "persona": "A foodtruck owner in Chicago.",
    "ranking": [0,1,2,3,4],
    "n": 4
})

print(output)

[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence] Entering Chain run with input:
[0m{
  "premises": [
    "Meat contains major nutrients",
    "Consuming major nutrients is important for overall health and well-being",
    "Eating meat provides major nutrients",
    "Maintaining good health through proper nutrition is desirable",
    "The benefits of consuming meat, including the provision of major nutrients, outweigh any potential drawbacks or concerns"
  ],
  "tags_con": [
    "climate change",
    "books",
    "India"
  ],
  "persona": "A foodtruck owner in Chicago.",
  "ranking": [
    0,
    1,
    2,
    3,
    4
  ],
  "n": 4
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableAssign<target_idx>] Entering Chain run with input:
[0m{
  "premises": [
    "Meat contains major nutrients",
    "Consuming major nutrients is important for overall health and well-being",
    "Eating meat provides major nutrients",
    "Maintaining good health through 

In [14]:
from syncialo.chains.argumentation import ArgumentModel, Valence
from syncialo.chains.argumentation import SelectMostSalientChain

assert isinstance(output[0], ArgumentModel)
print(len(output))
print(output)

chain_salient = SelectMostSalientChain.build(chat_model)

s_output = chain_salient.invoke({
    "args": output,
    "k": 2,
    "conclusion": "Meat contains major nutrients.",
    "valence": Valence.CON,
})

print(len(s_output))
print(s_output)


4
[ArgumentModel(label='Environmental Impact', claim='The production of meat contributes significantly to greenhouse gas emissions, deforestation, and water pollution, outweighing the benefits of its nutritional value.', target_idx=4, valence=<Valence.PRO: 'PRO'>), ArgumentModel(label='Animal Welfare', claim="The meat industry's animal cruelty, mistreatment, and inhumane practices are a moral concern that outweighs the nutritional benefits of consuming meat.", target_idx=4, valence=<Valence.PRO: 'PRO'>), ArgumentModel(label='Resource Inefficiency', claim='Producing meat requires significantly more resources (land, water, and energy) than plant-based alternatives, making it an inefficient use of resources that outweighs its nutritional benefits.', target_idx=4, valence=<Valence.PRO: 'PRO'>), ArgumentModel(label='Health Risks', claim='Consuming excessive amounts of red and processed meat has been linked to increased risk of chronic diseases, such as heart disease, diabetes, and certain c

In [15]:
from syncialo.chains.argumentation import GenerateProAndConChain

chain_support = GenerateProAndConChain.build(chat_model)

output = chain_support.invoke({
    "premises": [
        'Meat contains major nutrients',
        'Consuming major nutrients is important for overall health and well-being', 
        'Eating meat provides major nutrients', 
        'Maintaining good health through proper nutrition is desirable', 
        'The benefits of consuming meat, including the provision of major nutrients, outweigh any potential drawbacks or concerns'
    ],
    "tags": ["climate change", "animals", "health"],
    "tags_universal": ["climate change", "books", "India", "Trees"],
    "tags_per_cluster": 3,
    "persona": "A foodtruck owner in Chicago.",
    "ranking": [0,1,2,3,4],
    "n": 2
})

print(output)

[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence] Entering Chain run with input:
[0m{
  "premises": [
    "Meat contains major nutrients",
    "Consuming major nutrients is important for overall health and well-being",
    "Eating meat provides major nutrients",
    "Maintaining good health through proper nutrition is desirable",
    "The benefits of consuming meat, including the provision of major nutrients, outweigh any potential drawbacks or concerns"
  ],
  "tags": [
    "climate change",
    "animals",
    "health"
  ],
  "tags_universal": [
    "climate change",
    "books",
    "India",
    "Trees"
  ],
  "tags_per_cluster": 3,
  "persona": "A foodtruck owner in Chicago.",
  "ranking": [
    0,
    1,
    2,
    3,
    4
  ],
  "n": 2
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableAssign<persona,tags_pro,tags_con>] Entering Chain run with input:
[0m{
  "premises": [
    "Meat contains major nutrients",
    "Consuming major nutrients is i

## Tags

In [16]:
import os

UNIVERSAL_TAGS_PATH = "../data/universal_tags.txt"
EVAL_TAGS_PATH = "../data/eval_tags.txt"
TEST_TAGS_PATH = "../data/test_tags.txt"

tags_universal = None
tags_eval = None
tags_test = None

if (
    os.path.exists(UNIVERSAL_TAGS_PATH) and
    os.path.exists(EVAL_TAGS_PATH) and
    os.path.exists(TEST_TAGS_PATH)
):
    with open(UNIVERSAL_TAGS_PATH) as file:
        tags_universal = [line.rstrip() for line in file]
        print(f"Loaded {len(tags_universal)} universal tags.")
    with open(EVAL_TAGS_PATH) as file:
        tags_eval = [line.rstrip() for line in file]
        print(f"Loaded {len(tags_eval)} eval tags.")
    with open(TEST_TAGS_PATH) as file:
        tags_test = [line.rstrip() for line in file]
        print(f"Loaded {len(tags_test)} test tags.")
else:
    raise Exception("Failed to load tags; data files missing.")

Loaded 280 universal tags.
Loaded 10 eval tags.
Loaded 10 test tags.


# Test DebateBuilder

In [17]:
from langchain.globals import set_debug

set_debug(False)

In [18]:
from syncialo.debate_builder import DebateBuilder

debateBuilder = DebateBuilder(
    model=chat_model,
    tags_universal=tags_universal,
    tags_per_cluster=6,
)


  from .autonotebook import tqdm as notebook_tqdm


In [19]:
built_debate = await debateBuilder.build_debate(
    motion="Museums should be free for everyone.",
    topic="Should we pay for culture?",
    tag_cluster=tags_universal[:6],
    degree_config=[2,2,0],
)

[32m2024-10-04 18:40:04.795[0m | [34m[1mDEBUG   [0m | [36msyncialo.debate_builder[0m:[36mbuild_subtree[0m:[36m104[0m - [34m[1mProcessing at depth 0[0m
[32m2024-10-04 18:40:04.797[0m | [34m[1mDEBUG   [0m | [36msyncialo.debate_builder[0m:[36mbuild_subtree[0m:[36m105[0m - [34m[1mDegree = 2[0m
[32m2024-10-04 18:40:04.797[0m | [34m[1mDEBUG   [0m | [36msyncialo.debate_builder[0m:[36mbuild_subtree[0m:[36m106[0m - [34m[1mReason claim Museums should be free for everyone.[0m
[32m2024-10-04 18:40:26.580[0m | [34m[1mDEBUG   [0m | [36msyncialo.debate_builder[0m:[36mbuild_subtree[0m:[36m104[0m - [34m[1mProcessing at depth 1[0m
[32m2024-10-04 18:40:26.581[0m | [34m[1mDEBUG   [0m | [36msyncialo.debate_builder[0m:[36mbuild_subtree[0m:[36m105[0m - [34m[1mDegree = 2[0m
[32m2024-10-04 18:40:26.582[0m | [34m[1mDEBUG   [0m | [36msyncialo.debate_builder[0m:[36mbuild_subtree[0m:[36m106[0m - [34m[1mReason claim Free museums prom

In [20]:
# pretty print the networkx graph `built_debate`

import pprint
pprint.pprint(list(built_debate.nodes(data=True)))
pprint.pprint(list(built_debate.edges(data=True)))



[('d15b2578-a375-4f7e-8133-a1b284921bd0',
  {'claim': 'Museums should be free for everyone.', 'label': ''}),
 ('2125c46b-a956-4a3f-8643-d49e98525b22',
  {'claim': 'Free museums promote equal access to information, bridging the '
            'knowledge gap between the affluent and the underprivileged, '
            'thereby fostering a more informed and just society.',
   'label': 'Democratizing Knowledge',
   'premises': ['Access to information is a key factor in bridging the '
                'knowledge gap between the affluent and the underprivileged.',
                'Museums are a significant source of information.',
                'The affluent have easier access to museums due to their '
                'financial means.',
                'The underprivileged have limited access to museums due to '
                'financial constraints.',
                'Equal access to information is essential for a more informed '
                'and just society.']}),
 ('cceca3aa-9b81-4b1

In [12]:
import networkx as nx

from syncialo.chains.argumentation import Valence

data = nx.node_link_data(built_debate)
#pprint.pprint(data)

def get_parents(data, node_id, valence):
    parents = []
    for edge in data["links"]:
        if edge["target"] == node_id and edge["valence"] == valence:
            parents.append(edge["source"])
    return parents

root_id = data["nodes"][0]["id"]

def print_argdown_line(data, node_id, valence=None, level=0):
    node = next(n for n in data["nodes"] if n["id"]==node_id)
    indent = level * "  "
    marker = "+ " if valence == Valence.PRO else "- " if valence == Valence.CON else ""
    print(f"{indent}{marker}[{node.get('label')}]: {node['claim']}")
    for pro in get_parents(data, node_id, Valence.PRO):
        print_argdown_line(data, pro, Valence.PRO, level + 1)
    for con in get_parents(data, node_id, Valence.CON):
        print_argdown_line(data, con, Valence.CON, level + 1)

print_argdown_line(data, root_id)


[None]: Museums should be free for everyone.
  + [Knowledge Equity]: Free museums promote equal access to knowledge, a fundamental human right, by bridging the information gap between those who can afford education and those who cannot.
    + [Informed Society]: A well-informed population is better equipped to make informed decisions, participate in the democratic process, and hold those in power accountable, making knowledge a fundamental right.
    + [Freedom of Thought]: Knowledge is the foundation of critical thinking, and the ability to think critically is essential for individual freedom and autonomy, making knowledge a fundamental human right.
    - [Museum Misconception]: Museums are not widely accessible, especially in rural or underprivileged areas, undermining the notion that they provide equal access to knowledge for all.
    - [Privileged Pursuit]: Museums often focus on specific, niche topics, catering to a limited audience with the means and interest to explore such area

## Pipeline

1. partition 300 tags:

    * 280 universal tags
    * 10 validation tags
    * 10 test tags

2. determine debate tag-clusters 

    * train: 100 clusters composed of universal tags
    * eval: 5 mixed tag-clusters with 50% eval tags, 50% univeral tags each
    * test: 5 mixed tag-clusters with 50% test tags, 50% univeral tags each

3. topics and motions

4. recursively generate balanced argument tree
    


In [7]:
TAGS_PER_CLUSTER = 8
DEBATES_PER_TAG_CLUSTER = 10
TRAIN_SPLIT_SIZE = 1000
EVAL_SPLIT_SIZE = 50
TEST_SPLIT_SIZE = 50
DEGREE_CONFIGS = [
    [6,6,1,0],
    [5,5,2,0],
    [3,2,2,1,1,0],
    [4,3,2,1,0],
    [3,4,2,1,0]     
]


In [10]:
# train tag clusters
n = TRAIN_SPLIT_SIZE // DEBATES_PER_TAG_CLUSTER
tag_clusters_train = []
for i in range(n):
    tc = random.Random(42+i).sample(tags_universal, k=TAGS_PER_CLUSTER)
    tag_clusters_train.append({"tags":tc, "split":"train"})

# eval tag clusters
n = EVAL_SPLIT_SIZE // DEBATES_PER_TAG_CLUSTER
tag_clusters_eval = []
for i in range(n):
    tc = random.Random(42+i).sample(tags_universal, k=TAGS_PER_CLUSTER//2)
    tc += random.Random(42+i).sample(tags_eval, k=TAGS_PER_CLUSTER//2)
    random.Random(42+i).shuffle(tc)
    tag_clusters_eval.append({"tags":tc, "split":"eval"})

# test tag clusters
n = TEST_SPLIT_SIZE // DEBATES_PER_TAG_CLUSTER
tag_clusters_test = []
for i in range(n):
    tc = random.Random(42+i).sample(tags_universal, k=TAGS_PER_CLUSTER//2)
    tc += random.Random(42+i).sample(tags_test, k=TAGS_PER_CLUSTER//2)
    random.Random(42+i).shuffle(tc)
    tag_clusters_test.append({"tags":tc, "split":"test"})

## Topics and motions

In [11]:
import os
import pandas as pd
import random
from tqdm import tqdm
import uuid

import queries

if os.path.isdir(corpus_path):
    raise ValueError(f"Directory {corpus_path} exists. Delete before creating topics and motions.")
os.makedirs(corpus_path)

metadata = []

logger.info("Creating topics")
for tags in tqdm(tag_clusters_train+tag_clusters_eval+tag_clusters_test):
    topics = await queries.suggest_topics(
        model=model,
        tags=tags["tags"],
        debates_per_tag_cluster=DEBATES_PER_TAG_CLUSTER,
        decoder="sample",
        temperature=.6,
    )
    for topic in topics:
        degree_config = random.choice(DEGREE_CONFIGS)
        metadata.append({**tags, "topic": topic, "degree_config": degree_config})

        
logger.info("Creating motions")
for record in tqdm(metadata):
    motion = await queries.suggest_motion(
        model=model,
        tags=record["tags"],
        topic=record["topic"],
        decoder="sample",
        temperature=.6,
    )
    record.update({"motion": motion, "uid": str(uuid.uuid4())})

    
df_metadata = pd.DataFrame(metadata)
df_metadata.to_csv(os.path.join(corpus_path,"metadata.csv"), index=False)

df_metadata.head()    

ValueError: Directory ./debates/SOLAR-10.7B-Instruct-v1.0-AWQ exists. Delete before creating topics and motions.

In [None]:
# number of claims in each debate
import math 

def n_claims(profile):
    profile_d = [2*i for i in profile]
    total = 0
    for i in range(len(profile_d)):
        total += math.prod(profile_d[:i+1])
    return total

nc = df_metadata.degree_config.apply(n_claims)

for split in ["train", "eval", "test"]:
    print(f"{split}: {nc[df_metadata.split.eq(split)].sum()}")

## Debate Generation

In [None]:
# multithreaded

from ast import literal_eval
import asyncio
import json
import networkx as nx
import pandas as pd
import uuid

from loguru import logger

from builder import DebateBuilder, to_kialo

BATCH_SIZE = 128

logger.remove()
logger.add("debate_creation_{time}.log", level="INFO")
logger.add(sys.stderr, format="{time} {level} {message}", level="INFO")

builder = DebateBuilder(
    tags_universal=tags_universal,
    tags_per_cluster=TAGS_PER_CLUSTER,
    model=model,
)

df_metadata = pd.read_csv(os.path.join(corpus_path,"metadata.csv"))


async def build(i, row):
    kialo_file_path = os.path.join(corpus_path,"kialo",row.split,f"{row.uid}.txt") 
    json_file_path = os.path.join(corpus_path,"json",row.split,f"{row.uid}.json") 

    if os.path.exists(kialo_file_path):
        logger.warning(f"File '{kialo_file_path}' exists, skipping debate #{i+1}.")
        return
    if os.path.exists(json_file_path):
        logger.warning(f"File '{json_file_path}' exists, skipping debate #{i+1}.")
        return

    logger.info(f"Building debate #{i+1} (of {len(df_metadata)}) with topic '{row.topic}' and central claim '{row.motion}' ...")    
        
    degree_config = literal_eval(row.degree_config)
        
    tree = await builder.build_debate(
        motion=row.motion,
        topic=row.topic,
        tag_cluster=row.tags,
        degree_config=degree_config,
    )

    os.makedirs(os.path.dirname(kialo_file_path), exist_ok=True)
    with open(kialo_file_path, 'w') as f:
        for line in to_kialo(tree, topic=row.topic):
            f.write(f"{line}\n")

    os.makedirs(os.path.dirname(json_file_path), exist_ok=True)
    with open(json_file_path, 'w') as f:
        json.dump(nx.node_link_data(tree), f)

        
sem = asyncio.Semaphore(BATCH_SIZE)

async def safe_build(i, row):
    async with sem:  # semaphore limits num of simultaneous debate builds
        return await build(i, row)


tasks = [
    asyncio.ensure_future(safe_build(i, row))  # creating task starts coroutine
    for i, row 
    in df_metadata.iterrows()
]
await asyncio.gather(*tasks)  # await moment all debates built    
    


2024-01-22T09:41:50.046323+0000 INFO Building debate #4 (of 1100) with topic 'The Ethics of Genetic Engineering: Can it Enhance or Endanger Future Generations?' and central claim 'Genetic engineering, when strictly regulated and ethically applied, has the potential to enhance future generations without causing significant endangerment to human life and values.' ...
tokenizer_config.json: 100%|██████████| 1.41k/1.41k [00:00<00:00, 9.62MB/s]
tokenizer.model: 100%|██████████| 493k/493k [00:00<00:00, 59.0MB/s]
tokenizer.json: 100%|██████████| 1.80M/1.80M [00:00<00:00, 55.8MB/s]
special_tokens_map.json: 100%|██████████| 551/551 [00:00<00:00, 4.30MB/s]
2024-01-22T09:41:50.829504+0000 INFO Building debate #5 (of 1100) with topic 'Parental Controls in the Digital Age: Should Apple Play a Greater Role or Leave it to Parents?' and central claim 'Apple should significantly enhance parental controls in their digital products to ensure a safer online environment for children, beyond the responsibil

In [None]:
# no threading

from ast import literal_eval
import json
import networkx as nx
import pandas as pd
import uuid

from loguru import logger

from builder import DebateBuilder, to_kialo

logger.remove()
logger.add("debate_creation_{time}.log", level="INFO")
logger.add(sys.stderr, format="{time} {level} {message}", level="INFO")

builder = DebateBuilder(
    tags_universal=tags_universal,
    tags_per_cluster=TAGS_PER_CLUSTER,
    model=model,
)

df_metadata = pd.read_csv(os.path.join(corpus_path,"metadata.csv"))

for i, row in df_metadata.iterrows():

    kialo_file_path = os.path.join(corpus_path,"kialo",row.split,f"{row.uid}.txt") 
    json_file_path = os.path.join(corpus_path,"json",row.split,f"{row.uid}.json") 

    if os.path.exists(kialo_file_path):
        logger.warning(f"File '{kialo_file_path}' exists, skipping debate #{i+1}.")
        continue
    if os.path.exists(json_file_path):
        logger.warning(f"File '{json_file_path}' exists, skipping debate #{i+1}.")
        continue

    logger.info(f"Building debate #{i+1} (of {len(df_metadata)}) with topic '{row.topic}' and central claim '{row.motion}' ...")    
        
    degree_config = literal_eval(row.degree_config)
        
    tree = await builder.build_debate(
        motion=row.motion,
        topic=row.topic,
        tag_cluster=row.tags,
        degree_config=degree_config,
    )

    os.makedirs(os.path.dirname(kialo_file_path), exist_ok=True)
    with open(kialo_file_path, 'w') as f:
        for line in to_kialo(tree, topic=row.topic):
            f.write(f"{line}\n")

    os.makedirs(os.path.dirname(json_file_path), exist_ok=True)
    with open(json_file_path, 'w') as f:
        json.dump(nx.node_link_data(tree), f)
    
    


In [None]:
def to_kialo(tree, topic = ""):

    lines = []
    lines.append(f"Discussion Title: {topic}")
    lines.append("")
    
    def print_edge(target, counter, val = None):

        if val is None:
            sym = " "
        else:
            sym = " PRO: " if val==queries.PRO else " CON: "

        line = counter + sym + tree.nodes[target]["claim"]
        lines.append(line)

        i = 0
        for source, _, data in tree.in_edges(target, data=True):
            i += 1
            print_edge(
                source,
                counter+f"{i}.",
                data['valence']
            )    

    root_id = next(n for n in tree.nodes if len(tree.out_edges(n))==0)
    counter = "1."

    print_edge(root_id, counter)
    
    return lines
    
print("\n".join(to_kialo(tree)))

# Appendices

## LMQL Queries Tests

In [None]:
import queries

In [None]:
# test topic suggestion
result = await queries.suggest_topics(
    model=model,
    tags=tag_clusters_train[0]["tags"],
    debates_per_tag_cluster=DEBATES_PER_TAG_CLUSTER,
)
result

In [None]:
result = await queries.suggest_motion(
    model=model,
    tags=tag_clusters_train[0]["tags"],
    topic="The Effects of Lockdowns on Children's Mental Health: A Worldwide Analysis."
)
import pprint
pprint.pprint(result)

In [None]:
result = await queries.identify_premises(
    model=model,
    argument="Racial and Ethnic Disparities: Systemic racism has historically restricted educational opportunities for certain minority communities, such as Black, Hispanic, and Native American students, who continue to face barriers to accessing higher education.",
    conclusion="Improved Equity and Inclusion: Free college education addresses historical disparities faced by underrepresented groups in higher education, promoting inclusivity and equal opportunity across various racial, ethnic, gender, and socioeconomic identities.",
    valence=queries.PRO,
)

In [None]:
pprint.pprint(result)

In [None]:
result = await queries.identify_premises(
    model=model,
    argument="Improved Equity and Inclusion: Free college education addresses historical disparities faced by underrepresented groups in higher education, promoting inclusivity and equal opportunity across various racial, ethnic, gender, and socioeconomic identities.",
    conclusion="College education should be free.",
    valence=queries.CON,
)

In [None]:
pprint.pprint(result)

In [None]:
premises = ['Free college education addresses historical disparities in higher education.',
 'Promoting inclusivity and equal opportunity is important across various '
 'racial, ethnic, gender, and socioeconomic identities.',
 'Underrepresented groups face challenges in higher education access.',
 'Improving equity and inclusion in higher education can be achieved through '
 'free college education.']

In [None]:
result = await queries.rank_by_plausibility(
    model=model,
    premises=premises,
    tags=tag_clusters_train[0]["tags"],
    decoder="beam",
    n=2,
)
print(result[0])

In [None]:
result = await queries.supporting_argument(
    model=model,
    premises=premises,
    target_idx=1,
    n=4,
    tags=tag_clusters_train[0]["tags"],
    decoder="sample",
    temperature=.6,
)
pprint.pprint(result)

In [None]:
result = await queries.supporting_argument(
    model=model,
    premises=premises,
    target_idx=2,
    tags=tag_clusters_train[0]["tags"],
    decoder="sample",
    temperature=.6,    
)
pprint.pprint(result)

In [None]:
result = await queries.attacking_argument(
    model=model,
    premises=premises,
    target_idx=2,
    tags=tag_clusters_train[0]["tags"],
    decoder="sample",
    temperature=.6,
)
pprint.pprint(result)

In [None]:
# back of enevlope: args per debate

import math 

profile = [6,6,1,0] # [6,6,1,0]  [5,5,2,0]  [3,2,2,1,1,0] # [4,3,2,1,0] [3,4,2,1,0] 
profile = [2*i for i in profile]
total = 0
for i in range(len(profile)):
    total += math.prod(profile[:i+1])
    print(total)
total

In [None]:
def print_argtree(tree):

    def print_edge(target, indent, val = None):

        if val is None:
            sym = ""
        else:
            sym = "+ " if val==queries.PRO else "- "

        line = indent*" " + sym + tree.nodes[target]["claim"]
        print(line)

        for source, _, data in tree.in_edges(target, data=True):
            print_edge(
                source,
                indent+2,
                data['valence']
            )    

    root_id = next(n for n in tree.nodes if len(tree.out_edges(n))==0)
    indent = 0

    print_edge(root_id, 0)
    
print_argtree(tree)