In [1]:
%reload_ext autoreload
%autoreload 2

import os
import sys

if not os.path.exists(os.path.join(os.getcwd(), 'config.json')):
  # preventing to go always to the parent folder everytime we run the jupyter notebook without restarting
  os.chdir("..")
if not os.getcwd() in sys.path:
  sys.path.append(os.getcwd())

import json
from graph import Graph
from rank import Rank
from strategies import *
import matplotlib.pyplot as plt

config = None
with open('config.json') as config_file:
  config = json.load(config_file)
  config["database"]["database"] = "wikilite"

strategies_to_run = [
  'rnd', 
  'greedy',
  MultiObjectiveStrategy
]

to_ignore = [
  # 'Declarative Language vs Procedural Language',
  # 'Optimism vs Pessimism',
  # 'Rationalism vs Empiricism',
  # 'Classical Economics vs Keynesian Economics',
]

flipped = False

In [2]:
def label_for_topic(topic_name):
  return topic_name[:3].lower()

def check_repeated_keys(datavoids_per_topic_json):
  keys_dict = set()
  for _, d in datavoids_per_topic_json.items():
    for k in [label_for_topic(d['mitigator']), label_for_topic(d['disinformer'])]:
      if k in keys_dict:
        raise Exception("Repeated key " + k)
      keys_dict.add(k)
  return True

In [3]:
# functions from analysis commons module
import csv
import io
import gzip

TABLEAU_PALETTE = ["#4E79A7", "#F28E2B", "#E15759", "#76B7B2", "#59A14F", "#EDC948", "#B07AA1", "#FF9DA7", "#9C755F", "#BAB0AC" ]

f = open('data/datavoids_per_topic_filtered.json', 'r')
datavoids_per_topic = json.load(f)
check_repeated_keys(datavoids_per_topic)

selected_topics = [] 
with open('data/contrasting_arguments_labeled_stats_selected.csv', 'r') as f:
  reader = csv.DictReader(f)
  for r in reader:
    if int(r['Selected']) != 1:
      continue
    selected_topics.append(r['Mitigator'] + ' vs ' + r['Disinformer'])
datavoids_per_topic = {k: v for k, v in datavoids_per_topic.items() if k in selected_topics}
datavoids_per_topic = {k: v for k, v in datavoids_per_topic.items() if k not in to_ignore}

def clone_config_with_target(config, mitigator_keyword, disinformer_keyword,
                             mitigator_node, disinformer_node,
                             mitigator_keywords=[], disinformer_keywords=[]):
  """ returns a configuration copy where the nodes and groups have been changed """

  new_config = config.copy()

  new_config["target_groups"] = [
    mitigator_keyword,
    disinformer_keyword,
    "None"
  ]

  new_config["groups_colors"] = {
    mitigator_keyword: config["groups_colors"]["mit"],
    disinformer_keyword: config["groups_colors"]["dis"],
    "None": config["groups_colors"]["None"]
  }

  new_config["mitigator_keyword"] = mitigator_keyword
  new_config["disinformer_keyword"] = disinformer_keyword

  new_config["target_node"] = {
    mitigator_keyword: int(mitigator_node),
    disinformer_keyword: int(disinformer_node),
  }
  # print("target_nodes", new_config["target_node"])

  new_config[mitigator_keyword + "_keywords"] = mitigator_keywords
  new_config[disinformer_keyword + "_keywords"] = disinformer_keywords

  return new_config

def create_simulation_name(config, label):
  name = "_".join(list(filter(lambda g: g != "None", config['target_groups'])))
  return f"{name}-{label}.csv"

def get_output(config, output_filename=None):
  csv_reader = None

  curr_config = config
  if output_filename is not None:
    curr_config = copy.deepcopy(config)
    curr_config["output_filename"] = output_filename

  if "output_filename" in curr_config and curr_config['output_filename'] is not None:
    if curr_config["output_filename"].endswith('.gz'):
      # Open and read gzip file
      f = gzip.open(curr_config["output_filename"], 'rt')
      csv_reader = csv.DictReader(row for row in f if not row.startswith('#'))
    else:
      f = open(curr_config["output_filename"], 'r')
      csv_reader = csv.DictReader(row for row in f if not row.startswith('#'))
  else:
    file_content = curr_config["output"]["file"].getvalue()
    read_file_object = io.StringIO(file_content)
    csv_reader = csv.DictReader(row for row in read_file_object if not row.startswith('#'))

  header = csv_reader.fieldnames
  if header[0] != "run_no" or header[1] != "strategy" or header[2] != "step_no":
    raise ("Invalid csv file")

  return read_csv_vals(csv_reader, curr_config)

def read_csv_vals(csv_reader, config):
  steps_per_strategy = {}
  strategies = set()
  id_max_rank = {}
  for g in config["target_groups"]:
    id_max_rank[g] = (None, 0)
  for r in csv_reader:
    strategy = r['strategy']
    if strategy not in steps_per_strategy:
      steps_per_strategy[strategy] = {}
    if strategy not in strategies:
      strategies.add(strategy)
    step_no = int(r['step_no'])
    if step_no not in steps_per_strategy[strategy]:
      steps_per_strategy[strategy][step_no] = {gr: {
          "avg": [],
          "max": [],
          "min": [],
          "nodes_in_top_k": [],
          "weighted_avg_rank": [],
          "cost": [],
          "nodes_added": [],
          "edges_added": []
      } for gr in config["target_groups"]}
    for grp in config["target_groups"]:
      vals = r["group_" + grp].split("|")
      if vals[0] != "None":
        steps_per_strategy[strategy][step_no][grp]["avg"].append(float(vals[0]))
      if vals[1] != "None":
        steps_per_strategy[strategy][step_no][grp]["max"].append(float(vals[1]))
      if vals[2] != "None":
        steps_per_strategy[strategy][step_no][grp]["min"].append(float(vals[2]))
      steps_per_strategy[strategy][step_no][grp]["nodes_in_top_k"].append(float(vals[3]))
      steps_per_strategy[strategy][step_no][grp]["weighted_avg_rank"].append(float(vals[4]))
      steps_per_strategy[strategy][step_no][grp]["cost"].append(float(vals[5]))
      steps_per_strategy[strategy][step_no][grp]["nodes_added"].append(float(vals[6]))
      steps_per_strategy[strategy][step_no][grp]["edges_added"].append(float(vals[7]))

  return steps_per_strategy, list(strategies), id_max_rank

def close_output(config):
  if "output" not in config:
    return
  if not isinstance(config["output"]["file"], io.StringIO):
    config["output"]["file"].close()
    config["output"] = None

def plot_differences_against_opponent_base_ranges(target_group, opponent_group,
                                                  strategy_base, strategy,
                                                  steps_per_strategy_base, steps_per_strategy, config):
  max_y_val = 0
  min_y_val = 0

  steps_no = min(
    max(steps_per_strategy[strategy].keys()),
    max(steps_per_strategy_base[strategy_base].keys())
  )

  for step_no in range(steps_no + 1):
    vals = steps_per_strategy_base[strategy_base][step_no][opponent_group]["weighted_avg_rank"]
    y_val_opponent_base = sum(vals) / len(vals)

    vals = steps_per_strategy[strategy][step_no][opponent_group]["weighted_avg_rank"]
    y_val_opponent = sum(vals) / len(vals)

    vals = steps_per_strategy_base[strategy_base][step_no][target_group]["weighted_avg_rank"]
    y_val_base = sum(vals) / len(vals)

    vals = steps_per_strategy[strategy][step_no][target_group]["weighted_avg_rank"]
    y_val = sum(vals) / len(vals)

    if max_y_val < y_val - y_val_opponent:
      max_y_val = y_val - y_val_opponent
    if max_y_val < y_val_base - y_val_opponent_base:
      max_y_val = y_val_base - y_val_opponent_base
    if min_y_val > y_val - y_val_opponent:
      min_y_val = y_val - y_val_opponent
    if min_y_val > y_val_base - y_val_opponent_base:
      min_y_val = y_val_base - y_val_opponent_base

  return [min_y_val, max_y_val, step_no]


def readable_group_name(group):
  if group == "d":
    return "Disinformer"
  if group == "m":
    return "Mitigator"
  if group == "n":
    return "Neutral"
  else:
    return group

def plot_differences_against_opponent_base(title, target_group, opponent_group, strategy_base, strategy,
                                           steps_per_strategy_base, steps_per_strategy, config, y_min=None, y_max=None,
                                           show=True, steps_no=None, plot_x_axis=True, plot_y_axis=True, log_scale=False, ylabel=None, xlabel=None, 
                                           show_legend=True):
  plt.rc('font', family='Helvetica Neue', size=25)
  plt.rcParams['figure.dpi'] = 200
  plt.rcParams['axes.prop_cycle'] = plt.cycler(color=TABLEAU_PALETTE)

  fig, axs = plt.subplots(1, 1, squeeze=False)

  if steps_no is None:
    steps_no = max(steps_per_strategy[strategy].keys())

  xvals = []
  yvals = {
    "weighted_avg_rank": {gr: [] for gr in [opponent_group + '_base', opponent_group, target_group + '_base', target_group]},
    "diff": {gr: [] for gr in ['base', 'test']}
  }
  for step_no in range(steps_no + 1):
    if step_no not in steps_per_strategy_base[strategy_base] \
      or step_no not in steps_per_strategy[strategy]:
      continue

    xvals.append(step_no)

    vals = steps_per_strategy_base[strategy_base][step_no][opponent_group]["weighted_avg_rank"]
    y_val_opponent_base = sum(vals) / len(vals)
    yvals["weighted_avg_rank"][opponent_group + '_base'].append(y_val_opponent_base)

    vals = steps_per_strategy[strategy][step_no][opponent_group]["weighted_avg_rank"]
    y_val_opponent = sum(vals) / len(vals)
    yvals["weighted_avg_rank"][opponent_group].append(y_val_opponent)

    vals = steps_per_strategy_base[strategy_base][step_no][target_group]["weighted_avg_rank"]
    y_val_base = sum(vals) / len(vals)
    yvals["weighted_avg_rank"][target_group + '_base'].append(y_val_base)

    vals = steps_per_strategy[strategy][step_no][target_group]["weighted_avg_rank"]
    y_val = sum(vals) / len(vals)
    yvals["weighted_avg_rank"][target_group].append(y_val)

    yvals["diff"]['test'].append(y_val - y_val_opponent)
    yvals["diff"]['base'].append(y_val_base - y_val_opponent_base)

  for g in ['base', 'test']:
    axs[0][0].plot(
        xvals,
        yvals['diff'][g],
        label=readable_group_name(g),
        color='black',
        linewidth=2,
        linestyle='--' if g == 'base' else '-',
        alpha=0.3 if g == 'base' else 1.0
    )
    if title:
      axs[0][0].set_title(title, fontsize=8)
    axs[0][0].autoscale(enable=True, axis='y')

    if y_min is not None and y_max is not None:
      axs[0][0].set_ylim(bottom=y_min, top=y_max)
      # axs[0][0].set_yticks([y_min, 0, y_max])
    elif y_max is not None:
      axs[0][0].set_ylim(bottom=0, top=y_max)
      # axs[0][0].set_yticks([0, y_max])
    elif y_min is not None:
      axs[0][0].set_ylim(bottom=y_min)
      # axs[0][0].set_yticks([y_min, 0])
    axs[0][0].set_xlim(left=0, right=step_no)

  label_above = "Outperform"
  label_below = "Underperform"

  axs[0][0].fill_between(xvals,
                         yvals["diff"]['test'],
                         yvals["diff"]['base'],
                         color=TABLEAU_PALETTE[4],
                         interpolate=True,
                         label=label_above,
                         where=[yvals["diff"]['test'][i] >= yvals["diff"]['base'][i]
                                for i in range(len(xvals))]
                         )
  axs[0][0].fill_between(xvals,
                         yvals["diff"]['test'],
                         yvals["diff"]['base'],
                         color=TABLEAU_PALETTE[2],
                         interpolate=True,
                         label=label_below,
                         where=[yvals["diff"]['test'][i] <= yvals["diff"]['base'][i]
                                for i in range(len(xvals))]
                         )

  axs[0][0].axhline(y=0, color='gray', linestyle=':')

  fig.set_size_inches(8, 4)

  plt.tight_layout()

  axs[0][0].spines['top'].set_visible(False)
  axs[0][0].spines['right'].set_visible(False)

  axs[0][0].tick_params(axis="x", direction="in")
  axs[0][0].tick_params(axis="y", direction="in")
  axs[0][0].tick_params(axis='both', which='both', length=1, labelbottom=plot_x_axis, labelleft=plot_y_axis)

  if show_legend:
    handles, labels = axs[0][0].get_legend_handles_labels()
    legend = fig.legend(
      handles,
      labels + [label_above, label_below],  
      loc='lower right',
      borderaxespad=1,
      bbox_to_anchor=(1, -0.0),
      fontsize=20,
      ncol=len(labels) + 2
    )
    legend.get_frame().set_linewidth(0.0)
  plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.3)

  if xlabel:
    plt.xlabel(xlabel)
  
  if ylabel:
    plt.ylabel(ylabel)

  if show:
    plt.show()

  return fig

def plot_differences_against_base_ranges(target_group, strategy_base_csv_name, strategy_csv_name,
                                         steps_per_strategy_base, steps_per_strategy, config):
  max_y_val = 0
  max_steps = min(
    max([int(s) for s in steps_per_strategy[strategy_csv_name].keys()]),
    max([int(s) for s in steps_per_strategy_base[strategy_base_csv_name].keys()])
  )
  for step_no in range(max_steps + 1):
    vals_base = steps_per_strategy_base[strategy_base_csv_name][step_no][target_group]["weighted_avg_rank"]
    y_val_base = sum(vals_base) / len(vals_base)
    vals = steps_per_strategy[strategy_csv_name][step_no][target_group]["weighted_avg_rank"]
    y_val = sum(vals) / len(vals)
    if y_val_base > max_y_val:
      max_y_val = y_val_base
    if y_val > max_y_val:
      max_y_val = y_val

  return [0, max_y_val]




In [6]:
import math 

results_tb = open('results/evaluation-all-topics-against-disinformer.html', 'w')

results_tb.write("""
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <title>Strategy evaluation against disinformer</title>
  <script type="text/javascript" async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/MathJax.js?config=TeX-MML-AM_CHTML"> </script>
  <style>
    body, html {
      font-family: Helvetica Neue;
      font-size: 23px;
    }

    @font-face {
      font-family: 'Computer Modern Classical Serif Italic';
      src: url('cmunci.eot');
      src: url('cmunci.eot?#iefix') format('embedded-opentype'),
        url('cmunci.woff') format('woff'),
        url('cmunci.ttf') format('truetype'),
        url('cmunci.svg#cmunci') format('svg');
      font-weight: normal;
      font-style: italic;
    }

    i {
      font-family: "Computer Modern Classical Serif Italic", serif;
    }
    table, thead, tbody, tfoot, tr, th, td {
      width: auto;
      height: auto;
      margin: 0;
      padding: 0;
      border: none;
      border-collapse: inherit;
      border-spacing: 0;
      border-color: inherit;
      vertical-align: inherit;
      text-align: center;
      font-weight: inherit;
      -webkit-border-horizontal-spacing: 0;
      -webkit-border-vertical-spacing: 0;
    }
    th {
      font-size: 300%;
      vertical-align: middle;
    }
    th[rowspan] {
      writing-mode: vertical-lr;
      transform: rotate(180deg);
      padding: 5px;
    }
    th[rowspan] {
      padding: 5px;
      font-weight: bold;
    }
    th.mitigator-strategy {
      padding: 5px;
      min-width: 200px;
      white-space: nowrap;
    }
    .test-label, .base-label {
      font-size: 300%;
    }
    .split-line { 
      padding: 50px;
    }
    .topics {
      font-weight: bold;
    }
  </style>
</head>
<body>
 <p>\( x =  E_m(U_t) - E_d(U_t) \)</p>
""")

results_tb.write('<table>\n')

results_tb.write('<tr>\n')
results_tb.write(f"  <th> </th>\n")
results_tb.write(f"  <th> </th>\n")
for topic in datavoids_per_topic.keys():
  topic_parts = topic.split(' vs ')
  if flipped:
    results_tb.write(f"  <th>\( m = \) {topic_parts[1]} </br> \( d = \) {topic_parts[0]}</th>\n")
  else:
    results_tb.write(f"  <th>\( m = \) {topic_parts[0]} </br> \( d = \) {topic_parts[1]}</th>\n")
results_tb.write("</tr>\n")

plot_y_max = 0
plot_y_min = 0
min_step_no = {topic: math.inf for topic in datavoids_per_topic.keys()}
for find_y_max in [True, False]:
  for dis_idx, disinformer_strategy_name in enumerate(strategies_to_run):
    if not find_y_max:
      results_tb.write("<tr>\n")
    disinformer_strategy_base_name = disinformer_strategy_name
    mitigator_strategy_base = RandomStrategy()
    if not find_y_max:
      results_tb.write(f"  <th rowspan=\"{len(strategies_to_run) - 1}\">\( d = \) " + disinformer_strategy_base_name + "</th>\n")
    simulation_label_base = f'eval-all-base-{mitigator_strategy_base.get_strategy_name()}-{disinformer_strategy_base_name}'

    for mit_idx, mitigator_strategy_name in enumerate(strategies_to_run):
      disinformer_strategy = disinformer_strategy_name
      simulation_label = f'eval-all-{mitigator_strategy_name}-{disinformer_strategy.get_strategy_name()}'

      if disinformer_strategy_name == disinformer_strategy_base_name and \
          mitigator_strategy_name == mitigator_strategy_base.get_strategy_name():
        continue

      if not find_y_max:
        results_tb.write(f"  <th class=\"mitigator-strategy\">\( m = \) " + mitigator_strategy_name + "<br/>Differences in Effect \( E_m(U_t) - E_d(U_t) \) " + "</th>\n")

      for topic_idx, topic in enumerate(datavoids_per_topic.keys()):
        topic_parts = topic.split(' vs ')
        mitigator_id = datavoids_per_topic[topic]['mitigator_id']
        disinformer_id = datavoids_per_topic[topic]['disinformer_id']
        mitigator_label = label_for_topic(datavoids_per_topic[topic]['mitigator'])
        disinformer_label = label_for_topic(datavoids_per_topic[topic]['disinformer'])
        current_config = None
        topic_label = f"{mitigator_label}-{disinformer_label}"

        if flipped:
          current_config = clone_config_with_target(config, mitigator_label, disinformer_label, mitigator_id, disinformer_id)
          current_config['topic_name'] = topic_parts[1] + ' vs ' + topic_parts[0]
        else:
          current_config = clone_config_with_target(config, disinformer_label, mitigator_label, disinformer_id, mitigator_id)
          current_config['topic_name'] = topic 

        datavoids = datavoids_per_topic[topic]['datavoids']
        current_config['datavoids'] = [datavoids]

        sim_filename_base = 'results/' + create_simulation_name(current_config, simulation_label_base)
        sim_filename = 'results/' + create_simulation_name(current_config, simulation_label)
        plot_filename = f'results/images_against_disinformer/{topic_label}-{simulation_label_base}-{simulation_label}.png'
        plot_filename_pdf = f'results/images_against_disinformer/{topic_label}-{simulation_label_base}-{simulation_label}.pdf'

        if not os.path.exists(sim_filename_base) or not os.path.exists(sim_filename):
          continue

        current_config["output_filename"] = sim_filename_base
        steps_per_strategy_base, strategies_base, id_max_rank_base = get_output(current_config)

        current_config["output_filename"] = sim_filename
        steps_per_strategy, strategies, id_max_rank = get_output(current_config)

        if find_y_max:
          res = plot_differences_against_opponent_base_ranges(
            current_config['target_groups'][0],
            current_config['target_groups'][1],
            strategies_base[0],
            strategies[0], 
            steps_per_strategy_base, 
            steps_per_strategy, 
            current_config,
          )
          if res[1] > plot_y_max:
            plot_y_max = res[1]
          if res[0] < plot_y_min:
            plot_y_min = res[0]
          if res[2] < min_step_no[topic]:
            min_step_no[topic] = res[2]

          abs_y_max = max(abs(plot_y_max), abs(plot_y_min))
          plot_y_min = - abs_y_max
          plot_y_max = abs_y_max
        else:
          plot_fig = plot_differences_against_opponent_base(
            None,
            current_config['target_groups'][0],
            current_config['target_groups'][1],
            strategies_base[0],
            strategies[0], 
            steps_per_strategy_base, 
            steps_per_strategy, 
            current_config,
            y_max=plot_y_max,
            y_min=plot_y_min,
            steps_no=min_step_no[topic],
            show=False,
            plot_y_axis= (topic_idx == 0),
            plot_x_axis= (dis_idx == len(strategies_to_run) - 1 and mit_idx == len(strategies_to_run) - 1),
            show_legend=False
          )
          plot_fig.savefig(plot_filename, bbox_inches='tight')
          plot_fig.savefig(plot_filename_pdf, bbox_inches='tight')

        if not find_y_max:
          results_tb.write(f"""  <td>
                <div>{plot_filename_pdf.replace("results/images_against_disinformer/", "")}</div>
                <img src=\"{plot_filename.replace("results/", "")}\" /></td>\n
              """)

        print('.', end='')

      if not find_y_max:
        results_tb.write("</tr>\n")

    if not find_y_max:
      results_tb.write("</tr>")
      if dis_idx < len(strategies_to_run) - 1:
        results_tb.write(f"<tr><td class=\"split-line\" colspan=\"{len(datavoids_per_topic.keys()) + 2}\"></td></tr>")
      
results_tb.write('<tr>\n')
results_tb.write(f"  <th> </th>\n")
results_tb.write(f"  <th> </th>\n")
for topic in datavoids_per_topic.keys():
  topic_parts = topic.split(' vs ')
  results_tb.write(f"  <th>Simulation Steps</th>\n")
results_tb.write("</tr>\n")

results_tb.write("</table>")

results_tb.write("</body></html>")

results_tb.close()

plot_y_max

AttributeError: 'str' object has no attribute 'get_strategy_name'