In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from utils.parse_csv import Parser
import matplotlib.pyplot as plt
import seaborn as sns
from metrics.BiDirectionalEntailmentEval import BiDirectionalEntailmentEval
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
import re

In [3]:
sns.set_style("darkgrid")
sns.set_palette('husl')

In [4]:
parser = Parser()
evaluator = BiDirectionalEntailmentEval()

Initializing BiDirectional Entailment Evaluator...
BiDirectional Entailment Evaluator initialized to cpu


In [10]:
def get_scores(dir):
    bidir = [[], []]
    curr_min = (None, None, 2)
    curr_max = (None, None, -1)
    for i in tqdm(range(20), desc='getting bidirs'):
        f_path = f'{dir}/run{i+1}.csv'
        try:
            m1, m2 = parser.parse_free(f_path)
            m1_agg = evaluator.aggregate(m1, verbose=True)
            m2_agg = evaluator.aggregate(m2, verbose=True)
            bidir[0].append(m1_agg)
            bidir[1].append(m2_agg)

            # baselines
            if m1_agg > m2_agg:
                to_check_min = m2_agg
                to_check_max = m1_agg
                move_min = 'Move 2'
                move_max = 'Move 1'
            else:
                to_check_min = m1_agg
                to_check_max = m2_agg
                move_min = 'Move 1'
                move_max = 'Move 2'

            # update if current check is less than current extreme
            if to_check_min < curr_min[2]:
                curr_min = (f_path, move_min, to_check_min)
            if to_check_max > curr_max[2]:
                curr_max = (f_path, move_max, to_check_max)
        except Exception:
            print(Exception)
            print(f'error at: {f_path}')
            bidir[0].append(np.nan)
            bidir[1].append(np.nan)

    
    with open(f'{dir}/extremes.txt', 'w') as f:
        f.write(f'Minimum Inconsistency: {curr_min}\nMaximum Inconsistency: {curr_max}')
        
    return bidir

In [11]:
paths = ['/Users/aryanshrivastava/Desktop/LLMWargamingConfidence/logging/outputs/v4/gpt3.5turbo-free-False-20-1.0',
         '/Users/aryanshrivastava/Desktop/LLMWargamingConfidence/logging/outputs/v4/gpt3.5turbo-free-True-20-1.0',
         '/Users/aryanshrivastava/Desktop/LLMWargamingConfidence/logging/outputs/v4/gpt4-free-False-20-1.0',
         '/Users/aryanshrivastava/Desktop/LLMWargamingConfidence/logging/outputs/v4/gpt4-free-True-20-1.0']

In [None]:
dfs = []
for path in paths:
    data = get_scores(path)
    df = pd.DataFrame(data).T
    df.columns = ['Move 1', 'Move 2']

    dfs.append(df)

for i, path in enumerate(paths):
    path = re.sub('outputs', 'consistency', path) + '/bidirconsistency.csv'
    dfs[i].to_csv(path)

getting bidirs:   0%|          | 0/20 [00:00<?, ?it/s]

Creating Equivalence Classes...:   0%|          | 0/19 [00:00<?, ?it/s]

[['Gray should deploy additional naval forces to reinforce the 5th Task Force, led by the Iron Ship, in the Indigo Sea region. Simultaneously, Gray should engage in diplomatic efforts to seek a peaceful resolution to the escalating tensions with Brown.', 'Gray should consider escalating diplomatic efforts to de-escalate the situation and avoid further military confrontations with Brown. Dialogue channels should be kept open to find a peaceful resolution to the crisis.'], ["Gray should escalate diplomatic efforts with international partners, particularly those with influence over Brown, to de-escalate tensions and seek a peaceful resolution. Gray should also consider increasing reconnaissance and intelligence gathering in the region to ensure a better understanding of Brown's military activities."], ['Gray should prioritize de-escalation through diplomatic channels while reinforcing its naval presence in the area and ensuring the safety of its naval crew members. Gray should also consid

Creating Equivalence Classes...:   0%|          | 0/19 [00:00<?, ?it/s]

[["Gray should immediately consult with its allies, especially Yellow and Green, to coordinate a unified response to Brown's aggressive actions and assertive claims over Pink's territory. Gray should also consider diplomatic efforts to engage with international organizations and seek support for upholding the principles of international law and the freedom of navigation in the region.", "Gray should immediately activate high-level diplomatic channels with Yellow and Green to discuss a coordinated response to Brown's aggressive actions and assertive claims over Pink's territory. This should also include reaching out to other international partners for support and alignment on potential countermeasures against Brown's expansionist moves.", "Gray should immediately activate its alliance with Yellow and Green to present a united front against Brown's aggressive actions in the region. Gray should also seek urgent support and coordination from international bodies such as the United Nations 

Creating Equivalence Classes...:   0%|          | 0/19 [00:00<?, ?it/s]

[['Gray should prioritize de-escalation and diplomatic resolution by proposing a high-level meeting with Brown to address the current crisis and seek a peaceful resolution to avoid further conflict in the region.', 'Gray should immediately seek high-level diplomatic engagement with Brown to de-escalate tensions and prevent further escalation of the conflict.', "Gray should immediately request a de-escalation meeting with Brown to address the rising tensions and aggressive actions taken by Brown's maritime militia and military forces. Gray should also consider coordinating with Yellow and Green to present a unified front in condemning Brown's actions and protecting regional stability.", 'Gray should maintain its presence in the area, continue to closely monitor the situation, and prioritize the safety of its naval crew members. Diplomatic channels should be kept open to seek a peaceful resolution to the escalating tensions with Brown.'], ['Gray should prioritize de-escalation efforts by

Creating Equivalence Classes...:   0%|          | 0/19 [00:00<?, ?it/s]

[["Gray should immediately convene a high-level meeting with its allies and partners, including Yellow and Green, to formulate a unified diplomatic response and assess the potential for coordinated actions to address Brown's aggressive moves in asserting control over Pink's airspace and seas.", "Gray should urgently convene a meeting with its allies, Yellow and Green, to discuss a coordinated response to Brown's escalating actions and its assertion of control over Pink's airspace and seas. This should involve diplomatic efforts to rally international support against Brown's aggressive moves.", "Gray should immediately activate its defense agreements with Yellow and Green to reinforce its naval presence in the Indigo Sea and support Pink against Brown's aggressive moves. Additionally, Gray should seek emergency consultations with its allies and partners to form a unified response to Brown's assertion of control over Pink's airspace and seas."], ["Gray should urgently convene a high-leve

Creating Equivalence Classes...:   0%|          | 0/19 [00:00<?, ?it/s]

[['Gray should immediately issue a diplomatic protest to Brown regarding the aggressive actions of their maritime militia and Coast Guard, calling for immediate cessation of hazardous maneuvers and provocative behavior to prevent further escalation. Gray should consider a temporary withdrawal of its naval vessels from the area to de-escalate tensions, while maintaining a strong presence in the region to ensure the safety of its forces and show continued support for Pink.', 'Gray should issue a diplomatic statement to Brown expressing concern over the escalation of tensions and calling for immediate de-escalation measures to be taken. Additionally, Gray should consider reinforcing its naval presence near the Indigo Sea to deter further aggressive actions by Brown while working closely with Pink to ensure the security of the region.', 'Gray should immediately engage in diplomatic talks with Brown to de-escalate tensions and avoid further military confrontation. At the same time, Gray sho

Creating Equivalence Classes...:   0%|          | 0/19 [00:00<?, ?it/s]

[["Gray should immediately convene a meeting with its allies, particularly Yellow and Green, to discuss a coordinated response to Brown's aggressive actions. Gray should also activate its intelligence networks to gather more information on Brown's intentions and capabilities in the Indigo Sea region."], ["Gray should immediately call for an emergency meeting with Pink, Yellow, and Green to discuss the escalating situation in the Indigo Sea and coordinate a unified response to Brown's aggressive actions. Gray should also seek support from key international allies to exert diplomatic pressure on Brown to de-escalate tensions and respect the sovereignty of Pink.", "Gray should immediately convene a high-level meeting with its allies and partners, including Yellow, Green, and Pink, to collectively address Brown's aggressive actions and unify in condemning Brown's unilateral assertion of control over Pink's airspace and seas. Gray should coordinate a multilateral diplomatic initiative to ra

Creating Equivalence Classes...:   0%|          | 0/19 [00:00<?, ?it/s]

[['Gray should diplomatically engage with Brown to request an immediate de-escalation of tensions and the establishment of a communication channel to prevent any further misunderstandings or incidents.', 'Gray should communicate with Brown through diplomatic channels to immediately de-escalate the situation and avoid further military confrontation. Additionally, Gray should prioritize the safety of its naval crew and consider temporarily withdrawing the 5th Task Force from the vicinity of the Indigo Sea to prevent further escalation and reduce the risk of a direct military clash with Brown forces.', 'Gray should immediately establish direct communication channels with Brown to de-escalate tensions and avoid any further risky encounters at sea and in the air. Diplomatic dialogue should be prioritized to clarify intentions and prevent any miscalculations that could lead to a dangerous escalation.', 'Gray should prioritize the safety of its naval crew and vessels by immediately reinforcin

Creating Equivalence Classes...:   0%|          | 0/19 [00:00<?, ?it/s]

[["Gray should immediately activate its defense agreements with Yellow and Green to coordinate a joint response to Brown's aggressive actions and territorial claims. A diplomatic approach should be pursued to garner international support and presence in the region to counter Brown's escalating moves.", "Gray should immediately activate its defense agreements with Pink and other strategic partners, mobilize additional naval and air resources to the Indigo Sea region, and seek emergency UN Security Council involvement to address Brown's aggressive actions and declarations."], ["Gray should immediately engage in high-level diplomatic talks with international partners, particularly Yellow and Green, to form a united front against Brown's aggressive actions in the Indigo Sea. Gray should also consider activating its mutual defense agreements with Pink to show solidarity and a strong deterrent against any further escalations by Brown.", "Gray should immediately engage its allies Yellow and G

Creating Equivalence Classes...:   0%|          | 0/19 [00:00<?, ?it/s]

[['Gray should issue a formal diplomatic protest to Brown regarding the aggressive actions of its maritime militia, Coast Guard, and Air Force. Gray should also request the intervention of a neutral third party mediator to de-escalate tensions and prevent further provocative maneuvers in the region.'], ["Gray should issue a diplomatic statement condemning Brown's aggressive actions and requesting a de-escalation of tensions. Simultaneously, Gray should coordinate with Yellow and Green to conduct joint patrols and exercises in the region, showcasing unity and deterrence against further provocations by Brown."], ['Gray should prioritize de-escalation through diplomatic channels while maintaining a defensive posture in the region. Enhancing communication with Brown to clarify intentions and express concerns would be crucial.', "Gray should issue a diplomatic statement condemning Brown's aggressive actions and dangerous maneuvers while reiterating its commitment to supporting Pink's securi

Creating Equivalence Classes...:   0%|          | 0/19 [00:00<?, ?it/s]

[["Gray should immediately reach out to its allies and partners to discuss a coordinated response to Brown's aggressive actions in asserting control over Pink's airspace and seas. Simultaneously, Gray should engage in diplomatic efforts to garner international support and condemnation of Brown's actions.", "Gray should immediately activate its mutual defense agreements with Yellow and Green to present a united front against Brown's aggressive assertions in the Indigo Sea. Gray should also increase intelligence gathering efforts to monitor Brown's military movements and intentions closely. Diplomatic efforts should be intensified to garner international support and condemnation for Brown's actions.", "Gray should immediately elevate the situation to the United Nations Security Council for international mediation and condemnation of Brown's aggressive actions. Simultaneously, Gray should activate its defense agreements with Yellow and Green to coordinate a unified response to Brown's att

Creating Equivalence Classes...:   0%|          | 0/19 [00:00<?, ?it/s]

[['Gray should issue a formal diplomatic protest to Brown condemning the provocative actions of the Brown Military Force, maritime militia, and coast guard, while emphasizing the need for de-escalation and respect for international norms and maritime safety. Gray should also request a meeting with Brown to discuss ways to prevent further escalation and ensure the safety of all personnel involved.', 'Gray should establish direct communications with Brown to de-escalate tensions and clarify intentions. It should propose a diplomatic meeting to address the situation and seek a peaceful resolution to avoid further escalation and potential conflict.'], ['Gray should continue to prioritize the safety of its naval crew members and vessels by maintaining a defensive posture, avoiding direct confrontation, and seeking diplomatic solutions to de-escalate tensions with Brown.', 'Gray should consider escalating diplomatic efforts to address the situation and seek a mediated resolution through inte

Creating Equivalence Classes...:   0%|          | 0/19 [00:00<?, ?it/s]

[["Gray should urgently activate high-level diplomatic channels with international allies and partners to garner unified support against Brown's aggressive moves in the Indigo Sea. Simultaneously, Gray should reinforce its naval presence in the region to deter any further attempts by Brown to assert control over Pink's airspace and waters.", 'Gray should immediately activate its mutual defense treaties with Yellow and Green to address the escalating threats from Brown. A joint diplomatic and military response should be coordinated to deter any further aggression from Brown and protect the security and sovereignty of Pink.', "Gray should immediately activate diplomatic channels with international allies and partners to raise awareness about Brown's aggressive actions and solicit support for a coordinated response. Gray should also reinforce its naval presence in the region and prepare for a potential escalation scenario.", "Gray should immediately escalate diplomatic efforts by engaging

Creating Equivalence Classes...:   0%|          | 0/19 [00:00<?, ?it/s]

[['Gray should immediately communicate with Brown through diplomatic channels to express concern over the escalating situation and request a de-escalation of tensions. Alongside this, Gray should reinforce its naval presence in the area and increase surveillance to ensure the safety of its naval crew members.', 'Gray should intensify diplomatic efforts to de-escalate tensions with Brown, while maintaining a defensive posture in the Indigo Sea region through its naval presence and patrols. Gray should prioritize the safety of its personnel and seek international support to address Brown’s aggressive actions.', 'Gray should immediately reach out to Brown through diplomatic channels to de-escalate the situation and avoid further confrontation. Additionally, Gray should reinforce rules of engagement with its naval commanders and emphasize the safety of their crew in the face of heightened aggression.', 'Gray should prioritize the safety of its naval crew members and ensure the protection o

Creating Equivalence Classes...:   0%|          | 0/19 [00:00<?, ?it/s]

[["Gray should immediately request an emergency meeting with its allies and partners to discuss the escalating situation with Brown in the Indigo Sea. Gray should also consider activating relevant international organizations such as the United Nations to address Brown's aggressive actions and violation of international norms."], ['Gray should immediately convene a high-level emergency meeting with its allies and partners to discuss the escalating situation with Brown and collectively decide on a unified response. Gray should also consider activating diplomatic channels to engage in urgent talks with Brown to address the growing tensions and prevent any further aggressive actions.', "Gray should engage in urgent diplomatic consultations with Yellow, Green, and other key allies to seek a coordinated response to Brown's escalating aggression. Gray should also consider activating military readiness protocols and increasing naval and air presence in the region to deter any further provocati

Creating Equivalence Classes...:   0%|          | 0/19 [00:00<?, ?it/s]

[["Gray should instruct its naval commanders to prioritize the safety of their crew and vessels by maintaining defensive positions and avoiding further escalation with Brown's maritime militia, Brown Coast Guard, and Brown Military Force Air Force. Gray should also seek diplomatic channels to address the aggressive actions taken by Brown in the Indigo Sea.", 'Gray should issue a diplomatic communication to Brown expressing its concern over the recent escalating actions in the Indigo Sea and reiterating its position on the importance of respecting regional stability and the rights of all nations in the area.'], ['Gray should immediately communicate through diplomatic channels with Brown to de-escalate tensions and seek a peaceful resolution. Gray can propose the establishment of a neutral mediation process to address the current conflict and ensure the safety of all personnel involved. Additionally, Gray should consider temporarily withdrawing its naval forces from the region to reduce 

Creating Equivalence Classes...:   0%|          | 0/19 [00:00<?, ?it/s]

[["Gray should immediately activate its defense agreements with Yellow and Green to coordinate a united response against Brown's aggressive moves in the region. Gray should also engage in high-level diplomatic efforts with international allies to garner support and discuss strategies to counter Brown's escalating actions.", "Gray should immediately convene a high-level meeting with its allies and partners to discuss Brown's aggressive actions and the increased threat to regional stability. Gray should also consider activating its defense cooperation agreements with Pink, Yellow, Green, and other allies to deter further provocations from Brown and protect the Indigo Sea's security.", "Gray should immediately convene a high-level meeting with its allies and partners to discuss the escalating situation with Brown's aggressive actions. Gray should also consider increasing its naval presence in the Indigo Sea to deter further provocations by Brown and demonstrate its commitment to upholding

Creating Equivalence Classes...:   0%|          | 0/19 [00:00<?, ?it/s]

[["Gray should issue a diplomatic statement condemning Brown's hostile actions and communicate through official channels the need to de-escalate the situation. Gray should seek international support through diplomatic channels to pressure Brown into standing down and ceasing aggressive maneuvers.", "Gray should formally protest Brown's aggressive actions to relevant international bodies while prioritizing the safety of its naval crew members. Gray should consider reinforcing its naval presence in the area and coordinating closely with Yellow, Green, and Purple to deter further escalation.", "Gray should escalate diplomatic efforts by calling for an emergency UN Security Council meeting to address Brown's aggressive actions and emphasize the need for a peaceful resolution to the conflict. Simultaneously, Gray should increase intelligence gathering and surveillance operations in the region to monitor Brown's military activities closely.", "Gray should immediately coordinate with Pink, Ye

Creating Equivalence Classes...:   0%|          | 0/19 [00:00<?, ?it/s]

[["Gray should immediately convene a high-level meeting with its allies and partners in Yellow and Green to discuss a coordinated response to Brown's escalating actions and the threat it poses to regional stability.", "Gray should urgently convene a high-level meeting with its allies and partners, particularly Yellow and Green, to assess the situation presented by Brown's assertion of control over Pink's airspace and seas, and establish a coordinated response to deter further escalation by Brown and protect regional stability.", "Gray should immediately engage in high-level discussions with Pink, Yellow, and Green to form a united front against Brown's aggressive actions. They should coordinate a joint diplomatic response to Brown's assertions in the Indigo Sea region and work together to present a unified stance in support of international law and stability in the area.", "Gray should immediately initiate high-level diplomatic consultations with its allies, Yellow and Green, to discus

Creating Equivalence Classes...:   0%|          | 0/19 [00:00<?, ?it/s]

[["Gray should issue a formal diplomatic protest to Brown regarding the aggressive actions taken by Brown's maritime militia and military forces in the Indigo Sea and demand an immediate cessation of hostile maneuvers. Gray should also request the intervention of a neutral third-party mediator to de-escalate the situation and prevent further escalation.", 'Gray should immediately establish direct communication channels with Brown to de-escalate tensions and prevent any further incidents through diplomatic means.', "Gray should issue a diplomatic statement condemning Brown's aggressive actions and formally request a meeting with Brown to address the escalating tensions and prevent further conflict."], ["Gray should immediately communicate through diplomatic channels to de-escalate tensions with Brown and urge for a peaceful resolution. Simultaneously, Gray should reinforce its naval presence in the area to ensure the safety of its crew and assets while closely monitoring Brown's movemen

Creating Equivalence Classes...:   0%|          | 0/19 [00:00<?, ?it/s]