Copyright 2024 DeepMind Technologies Limited.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    https://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

In [None]:
#@title Imports.
import io
import math
import tempfile
from typing import Dict, List, Sequence, Tuple, Union
import warnings
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pandas.core.groupby import DataFrameGroupBy
import requests
from scipy import stats
import seaborn as sns
from sklearn import linear_model

warnings.simplefilter('ignore', category=RuntimeWarning)
from IPython.display import clear_output

In [None]:
file_location = (
    'https://storage.googleapis.com/sustainable_behavior/sustainable_behavior.csv'
)
response = requests.get(file_location)
with io.BytesIO(response.content) as f:
  df = pd.read_csv(f)
clear_output()

In [None]:
#@title Define mapping between mechanisms and colours.


MECH_COLOURS = {
    # Baselines: red-to-pink.
    'Proportional Baseline': 'red',
    'Mixed Baseline': 'purple',
    'Interpolating Baseline': 'orange',

    # Uniform: blue.
    'Equal Baseline': 'blue',

    # Learned agents M1: green.
    'RL Agent (M1)': 'forestgreen',
    'RL Agent (M2)': 'limegreen',

    # Learned agents M2: orange to brown.
    'RL Agent (M2) Game 1': 'orange',
    'RL Agent (M2) Game 2': 'red',
    'RL Agent (M2) Game 3': 'maroon',

    # Proportional: blue-to-green.
    'Proportional Baseline Game 1': 'aqua',
    'Proportional Baseline Game 2': 'blue',
    'Proportional Baseline Game 3': 'green',
}

In [None]:
# @title Util functions for trial analysis.


def calculate_gini(x: Sequence[float]) -> float:
  """Calculates the Gini coefficient for a sequence of float numbers.

  The Gini coefficient measures the inequality among values of a frequency
  distribution. A Gini coefficient of 0 expresses perfect equality, where all
  values are the same. A Gini coefficient of 1 expresses maximal inequality
  among values.

  Args:
    x: Sequence of numerical data representing the frequency distribution.

  Returns:
    Gini coefficient as a float.
  """
  # Calculate the mean absolute difference between all pairs of values in x.
  mean_absolute_difference = np.abs(np.subtract.outer(x, x)).mean()

  # Calculate the relative mean absolute difference.
  relative_mean_absolute_difference = mean_absolute_difference / np.mean(x)

  # The Gini coefficient is half the relative mean absolute difference.
  gini_coefficient = 0.5 * relative_mean_absolute_difference

  return gini_coefficient


def get_mean_and_conf(data: Sequence[float]) -> Tuple[float, float]:
  """Calculates the mean and 95% confidence interval for a sequence of floats.

  Args:
    data: Sequence of numerical data.

  Returns:
    Tuple containing the mean and the confidence interval as floats.
  """
  mean = np.mean(data)
  standard_error = np.std(data) / math.sqrt(len(data))
  confidence_interval = 1.96 * standard_error

  return mean, confidence_interval


def get_surplus_and_ginis(
    df: pd.DataFrame,
    mechanism: str,
    player: str,
) -> Tuple[np.ndarray, np.ndarray]:
  """Calculates surplus and Gini coefficients for a given mechanism and player.

  Args:
    df: Pandas DataFrame containing the data. It's assumed to have columns like
      'mech_name_by_player', 'launch_id', 'player_reward_0', etc.
    mechanism: Name of the mechanism.
    player: Name of the player.

  Returns:
    Tuple containing:
      - surpluses: A NumPy array of the average surplus for each launch ID.
      - ginis: A NumPy array of the Gini coefficients for each launch ID.
  """

  # Filter the DataFrame for the specific mechanism and player.
  filtered_df = df[df['mech_name_by_player'] == f'{mechanism} {player}']

  # Calculate the average rewards for each player across launch IDs.
  average_rewards = []
  for i in range(4):
    average_reward = (
        filtered_df.groupby('launch_id')[f'player_reward_{i}'].mean().values
    )
    average_rewards.append(average_reward)

  # Calculate surpluses and Gini coefficients.
  average_rewards = np.array(average_rewards).T
  surpluses = np.mean(average_rewards, axis=1)
  ginis = np.array(
      [calculate_gini(player_values) for player_values in average_rewards]
  )

  return surpluses, ginis


def find_first_depletion_trial(subgroup: DataFrameGroupBy) -> int:
  """Finds the index of the first trial where the pool is depleted.

  Args:
    subgroup: Pandas DataFrameGroupBy object containing pool data for a specific
      launch ID.

  Returns:
    The index (1-based) of the first trial where the pool is depleted.
    Returns 40 if the pool is never depleted.
  """
  depletion_indices = np.where(subgroup < 1)[0]
  if depletion_indices.size == 0:
    return 40  # Pool not depleted.
  else:
    return depletion_indices[0] + 1  # 1-based indexing.


def get_last_trial(subgroup: DataFrameGroupBy) -> int:
  """Gets the index of the last trial where all players are active.

  Args:
    subgroup: A Pandas DataFrameGroupBy object for a specific launch ID, assumed
      to have data about active players.

  Returns:
    The index (1-based) of the last trial where all players were active.
  """
  return subgroup.to_numpy()[-1]


def get_depletion_and_active_players(
    df: pd.DataFrame,
    mechanism: str,
    player: str,
) -> Tuple[np.ndarray, ...]:
  """Calculates depletion trials and active players for a mechanism and player.

  Args:
    df: Pandas DataFrame containing the data. It's expected to have columns like
      'mech_name_by_player', 'launch_id', 'players_kept_active', and
      'mechanism_observation.pool'.
    mechanism: Name of the mechanism.
    player: Name of the player.

  Returns:
    A tuple containing:
      - depletion_trials: A NumPy array of the first depletion trial index for
      each launch ID.
      - depletion_binary: A NumPy array indicating whether depletion occurred
      (True) or not (False) for each launch ID.
      - avg_active_players: A NumPy array of the average number of active
      players throughout the trials for each launch ID.
      - last_trial_active_players: A NumPy array of the number of active players
      in the last trial for each launch ID.
      - launch_ids: A NumPy array of the launch IDs.
  """

  filtered_df = df[df['mech_name_by_player'] == f'{mechanism} {player}']

  # Group by 'launch_id' to perform calculations on each launch separately.
  grouped_df = filtered_df.groupby('launch_id')

  # Calculate depletion trials.
  depletion_trials = (
      grouped_df['mechanism_observation.pool']
      .apply(find_first_depletion_trial)
      .values
  )
  depletion_binary = depletion_trials < 40

  # Calculate active players data.
  avg_active_players = grouped_df['players_kept_active'].mean().values
  last_trial_active_players = (
      grouped_df['players_kept_active'].apply(get_last_trial).values
  )

  # Get the launch IDs.
  launch_ids = grouped_df['players_kept_active'].mean().index.values

  return (
      depletion_trials,
      depletion_binary,
      avg_active_players,
      last_trial_active_players,
      launch_ids,
  )


def get_exp_info(exp: int) -> Tuple[List[str], str, str]:
  """Retrieves information about a specific experiment.

  Args:
    exp: Experiment number (1, 2, 3, or 4).

  Returns:
    Tuple containing:
      - mechanisms_to_plot: A list of mechanism names associated with the
      experiment.
      - human_exp_name: The name of the corresponding human experiment.
      - bc_used: The name of the behavioral cloning (BC) model used for the
      experiment.

  Raises:
    ValueError: If an invalid experiment number is provided.
  """

  exp_info = {
      1: (
          [
              'Equal Baseline',
              'Mixed Baseline',
              'Proportional Baseline',
              'RL Agent (M1)',
          ],
          'Exp 1',
          'BC 1',
      ),
      2: (
          [
              'Proportional Baseline',
              'Interpolating Baseline',
              'RL Agent (M1)',
          ],
          'Exp 2',
          'BC 1',
      ),
      3: (
          [
              'Interpolating Baseline',
              'RL Agent (M2)',
          ],
          'Exp 3',
          'BC 2',
      ),
      4: (
          [
              'RL Agent (M2) Game 1',
              'RL Agent (M2) Game 2',
              'RL Agent (M2) Game 3',
              'Proportional Baseline Game 1',
              'Proportional Baseline Game 2',
              'Proportional Baseline Game 3',
          ],
          'Exp 4',
          'Exp 4',
      ),
  }

  if exp in exp_info:
    return exp_info[exp]
  else:
    raise ValueError('Experiment undefined.')

In [None]:
# @title Colour plotting functions.


def desaturate_color(
    input_color: Union[str, Tuple[float, float, float]],
    factor: float,
    brighten: float,
) -> Tuple[float, float, float]:
  """Desaturates a given color by a specified factor and brightens it.

  Desaturation moves the color towards a neutral gray, where 0 is fully
  desaturated(gray) and 1 is the original color. Brightness is adjusted by
  adding a value to the RGB components, where 0 is no change and values above
  0 increase brightness.

  Args:
    input_color: Color to desaturate, either as a color string or an RGB tuple.
    factor: Desaturation factor (0.0 to 1.0), where 0.0 is fully desaturated,
      and 1.0 is no change.
    brighten: Amount to brighten the color (0.0 to 1.0), where 0.0 is no change,
      and 1.0 is maximum brightness.

  Returns:
    Tuple representing the desaturated and brightened RGB color.
  """
  # Convert input color to RGB tuple.
  r, g, b = matplotlib.colors.to_rgb(input_color)

  # Calculate the differences from the neutral gray (intensity = 0.5).
  gray_intensity = 0.5
  dr = gray_intensity - r
  dg = gray_intensity - g
  db = gray_intensity - b

  # Apply desaturation factor.
  r = r + dr * factor
  g = g + dg * factor
  b = b + db * factor

  # Apply brightening and clip values to the valid range (0.0 - 1.0).
  color = np.array((r, g, b))
  color = np.clip(color + brighten, 0.0, 1.0)

  return tuple(color)


def cmap_between_colors(
    start_color: Tuple[float, float, float],
    end_color: Tuple[float, float, float],
    levels: int,
) -> List[Tuple[float, float, float]]:
  """Creates a colormap that transitions linearly between two given colors.

  Args:
    start_color: Tuple representing the starting RGB color, values in [0.0,
      1.0].
    end_color: Tuple representing the ending RGB color, values in [0.0, 1.0].
    levels: Number of colors to generate in the colormap.

  Returns:
    List of tuples, where each tuple represents an RGB color in the colormap.
  """

  # Generate linearly spaced values for each color component (R, G, B).
  r = np.linspace(start_color[0], end_color[0], num=levels)
  g = np.linspace(start_color[1], end_color[1], num=levels)
  b = np.linspace(start_color[2], end_color[2], num=levels)

  # Combine the R, G, B values into a list of color tuples.
  colormap = [(r[i], g[i], b[i]) for i in range(levels)]
  return colormap


In [None]:
# @title Leaderboard plot function.


def plot_leaderboard(df: pd.DataFrame, exp: int, verbose: bool = True) -> None:
  """Creates a leaderboard plot comparing surpluses and Gini coefficients
     for different mechanisms in a given experiment.

  Args:
      df: Pandas DataFrame containing experiment data. It's assumed to have
        columns compatible with the `get_surplus_and_ginis` function.
      exp: The experiment number (1, 2, 3, or 4).
      verbose: Whether to print aggregate values and statistics (for debugging).
  """

  mech_to_plot, human_exp_name, bc_used = get_exp_info(exp)
  players_to_plot = [bc_used, human_exp_name]

  fig, axes = plt.subplots(
      1,
      len(players_to_plot),
      facecolor='none',
      figsize=(5 * len(players_to_plot), 5),
  )

  surplus_dict: Dict[str, np.ndarray] = {}
  gini_dict: Dict[str, np.ndarray] = {}

  for i, player in enumerate(players_to_plot):
    mean_coordinates: Dict[str, Tuple[float, float]] = {}

    for mech_name in mech_to_plot:
      surpluses, ginis = get_surplus_and_ginis(df, mech_name, player)
      surplus_dict[mech_name] = surpluses
      gini_dict[mech_name] = ginis

      surplus_mean, _ = get_mean_and_conf(surpluses)
      gini_mean, _ = get_mean_and_conf(ginis)

      if surpluses.size > 0:  # Only plot if data exists.
        axes[i].scatter(
            x=ginis,
            y=surpluses,
            marker='o',
            color=desaturate_color(MECH_COLOURS[mech_name], 0.4, 0.3),
            s=60,
            alpha=1,
            edgecolors='white',
            linewidths=1,
        )
        mean_coordinates[mech_name] = (gini_mean, surplus_mean)

    for mech_name in mech_to_plot:
      axes[i].scatter(
          x=mean_coordinates[mech_name][0],
          y=mean_coordinates[mech_name][1],
          marker='o',
          color=MECH_COLOURS[mech_name],
          label=mech_name,
          s=220,
          alpha=1,
          edgecolors='white',
          linewidths=1,
      )
    experiment_title = (
        'Human Evaluation'
        if i == 1 or exp == 4
        else 'Behavioral Clone Simulation'
    )
    axes[i].set_title(f'{experiment_title} {player}')
    axes[i].set_xlim(0, 0.7)
    axes[i].set_ylim(0, 14)

    if i == 0:
      axes[i].set_ylabel('Surplus')
    axes[i].set_xlabel('Gini')

    if verbose:
      print(f'Statistical tests for {player}:')
      for j, name1 in enumerate(mech_to_plot):
        for name2 in mech_to_plot[j + 1 :]:
          stat, pval = stats.ranksums(surplus_dict[name1], surplus_dict[name2])
          print(f'{name1} vs {name2} surplus z= {stat:.2f}, p= {pval:.3f}')

          stat, pval = stats.ranksums(gini_dict[name1], gini_dict[name2])
          print(f'{name1} vs {name2} gini z= {stat:.2f}, p= {pval:.3f}')
  plt.legend(bbox_to_anchor=(0.68, 0.3), fontsize=10)
  plt.tight_layout()
  plt.show()

In [None]:
# @title Scatter plot function.


def make_contour_scatter(
    df: pd.DataFrame,
    exp: int,
    verbose: bool = True,
) -> None:
  """Make the contour scatter plot.

  Args:
    df: Dataframe containing per experiment data.
    exp: Index of the experiment to plot.
    verbose: Whether to print aggregate values (for debugging).
  """
  mech_to_plot, p, p_bc = get_exp_info(exp)
  nplotrows = 4
  size = 4
  surplus_max = 14
  gini_max = 0.7
  active_player_max = 4.2
  alpha = 0.6
  nplotscol = len(mech_to_plot)
  fig, axs = plt.subplots(
      nplotrows,
      nplotscol,
      figsize=(nplotscol * size, nplotrows * size),
      facecolor='none',
  )

  trial_of_depletion_dict = {}
  active_players_dict = {}

  for ii, m in enumerate(mech_to_plot):

    # Quantities from human games.
    surplus, gini = get_surplus_and_ginis(df, m, p)
    (
        depletion,
        depletion_binary,
        active_players,
        active_players_last_trial,
        _,
    ) = get_depletion_and_active_players(df, m, p)
    bc_surplus, bc_gini = get_surplus_and_ginis(df, m, p_bc)
    bc_depletion, bc_depletion_binary, bc_active_players, _, _ = (
        get_depletion_and_active_players(df, m, p_bc)
    )

    # Scale the contours.
    ratio_success = np.sum(depletion_binary == False) / np.sum(
        depletion_binary == True
    )
    failed_color = 'grey'
    success_color = MECH_COLOURS[m]
    max_levels = 8
    success_colormap = cmap_between_colors(
        desaturate_color(MECH_COLOURS[m], 0.5, 0.4),
        desaturate_color(MECH_COLOURS[m], 0.0, 0.0),
        levels=max_levels,
    )
    failed_colormap = cmap_between_colors(
        desaturate_color('grey', 0.5, 0.4),
        desaturate_color('grey', 0.0, 0.0),
        levels=max_levels,
    )
    if ratio_success >= 1:
      failed_levels = np.maximum(2, int(max_levels / ratio_success))
      success_levels = max_levels
      success_colormap = matplotlib.colors.ListedColormap(success_colormap)
      failed_colormap = matplotlib.colors.ListedColormap(
          failed_colormap[:failed_levels]
      )
    else:
      success_levels = np.maximum(2, int(max_levels * ratio_success))
      failed_levels = max_levels
      failed_colormap = matplotlib.colors.ListedColormap(failed_colormap)
      success_colormap = matplotlib.colors.ListedColormap(
          success_colormap[:success_levels]
      )

    last_trial_active_fraction = np.mean(active_players_last_trial == 4)
    if verbose:
      print(
          p,
          m,
          'fraction pool maintained:',
          np.round(1 - np.mean(depletion_binary), decimals=2),
          'fraction 4 active last trial:',
          np.round(last_trial_active_fraction, decimals=2),
      )

    plt.subplot(nplotrows, nplotscol, ii + 1)
    data = np.stack((bc_gini, bc_surplus, bc_depletion_binary)).T
    mech_df = pd.DataFrame(
        data=data,
        index=range(data.shape[0]),
        columns=['Gini', 'Surplus', 'depletion_binary'],
    )

    ax = sns.kdeplot(
        legend=False,
        data=mech_df.query('depletion_binary == True'),
        x='Gini',
        y='Surplus',
        cmap=failed_colormap,  # cbar =True,
        fill=True,
        levels=failed_levels,
        alpha=0.5,
        thresh=0.01,
    )
    ax = sns.kdeplot(
        legend=False,
        data=mech_df.query('depletion_binary == False'),
        x='Gini',
        y='Surplus',
        cmap=success_colormap,  # cbar =True,
        fill=True,
        levels=success_levels,
        alpha=0.5,
        thresh=0.01,
    )
    legend_depleted_plotted = False
    legend_sustained_plotted = False
    for s in range(gini.shape[0]):
      if depletion_binary[s]:
        scattercolor = failed_color
        label = 'Depleted Game'
      else:
        scattercolor = success_color
        label = 'Sustained Game'
      if (not legend_depleted_plotted) and scattercolor == failed_color:
        plt.scatter(
            gini[s],
            surplus[s],
            c=scattercolor,
            alpha=alpha,
            s=70,
            edgecolors='none',
            label=label,
        )
        legend_depleted_plotted = True
      elif (not legend_sustained_plotted) and scattercolor == success_color:
        plt.scatter(
            gini[s],
            surplus[s],
            c=scattercolor,
            alpha=alpha,
            s=70,
            edgecolors='none',
            label=label,
        )
        legend_sustained_plotted = True
      else:
        plt.scatter(
            gini[s],
            surplus[s],
            c=scattercolor,
            alpha=alpha,
            s=70,
            edgecolors='none',
            label='_nolegend_',
        )

    # Print the gini surplus correlation.
    if verbose:
      gini_sustained = gini[depletion_binary == False]
      surplus_sustained = surplus[depletion_binary == False]
      gini_depleted = gini[depletion_binary == True]
      surplus_depleted = surplus[depletion_binary == True]
      sustained_res = stats.pearsonr(gini_sustained, surplus_sustained)
      depleted_res = stats.pearsonr(gini_depleted, surplus_depleted)
      print(
          p,
          m,
          'sustained: r=',
          np.round(sustained_res.statistic, decimals=2),
          'p = ',
          np.round(sustained_res.pvalue, decimals=3),
          'depleted: r=',
          np.round(depleted_res.statistic, decimals=2),
          'p = ',
          np.round(depleted_res.pvalue, decimals=3),
      )

    plt.xlim(0, gini_max)
    plt.xlabel('Gini', labelpad=-15)
    plt.xticks((0, gini_max / 2, gini_max), (0, '', gini_max))
    plt.ylabel('Surplus', labelpad=-10)
    ax.set_title(m, fontsize=16, color=MECH_COLOURS[m], fontweight='bold')
    plt.ylim(0, surplus_max)
    plt.yticks((0, surplus_max / 2, surplus_max), (0, '', surplus_max))
    plt.subplot(nplotrows, nplotscol, ii + 1 + nplotscol * 1)
    data = np.stack((
        bc_depletion + (3 * np.random.random(bc_depletion.shape) - 0.5),
        bc_surplus,
        bc_depletion_binary,
    )).T
    mech_df = pd.DataFrame(
        data=data,
        index=range(data.shape[0]),
        columns=['Depletion', 'Surplus', 'depletion_binary'],
    )

    ax = sns.kdeplot(
        legend=False,
        data=mech_df.query('depletion_binary == True'),
        x='Depletion',
        y='Surplus',
        cmap=failed_colormap,  # cbar =True,
        fill=True,
        levels=failed_levels,
        alpha=0.5,
        thresh=0.01,
    )
    ax = sns.kdeplot(
        legend=False,
        data=mech_df.query('depletion_binary == False'),
        x='Depletion',
        y='Surplus',
        cmap=success_colormap,  # cbar =True,
        fill=True,
        levels=success_levels,
        alpha=0.5,
        thresh=0.01,
    )

    legend_depleted_plotted = False
    legend_sustained_plotted = False
    for s in range(depletion.shape[0]):
      if depletion_binary[s]:
        scattercolor = failed_color
        label = 'Depleted Game'
      else:
        scattercolor = success_color
        label = 'Sustained Game'
      if (not legend_depleted_plotted) and scattercolor == failed_color:
        plt.scatter(
            depletion[s],
            surplus[s],
            c=scattercolor,
            alpha=alpha,
            s=70,
            edgecolors='none',
            label=label,
        )
        legend_depleted_plotted = True
      elif (not legend_sustained_plotted) and scattercolor == success_color:
        plt.scatter(
            depletion[s],
            surplus[s],
            c=scattercolor,
            alpha=alpha,
            s=70,
            edgecolors='none',
            label=label,
        )
        legend_sustained_plotted = True
      else:
        plt.scatter(
            depletion[s],
            surplus[s],
            c=scattercolor,
            alpha=alpha,
            s=70,
            edgecolors='none',
            label='_nolegend_',
        )
    if ii == 0:
      plt.legend()
    plt.xlim(0, 42)
    plt.xlabel('Trial of pool depletion', labelpad=-15)
    plt.xticks((0, 20, 40), (0, '', 40))
    plt.ylabel('Surplus', labelpad=-10)
    plt.ylim(0, surplus_max)
    plt.yticks((0, surplus_max / 2, surplus_max), (0, '', surplus_max))

    plt.subplot(nplotrows, nplotscol, ii + 1 + nplotscol * 2)
    data = np.stack((bc_active_players, bc_surplus, bc_depletion_binary)).T
    mech_df = pd.DataFrame(
        data=data,
        index=range(data.shape[0]),
        columns=['Active players', 'Surplus', 'depletion_binary'],
    )

    ax = sns.kdeplot(
        legend=False,
        data=mech_df.query('depletion_binary == True'),
        y='Active players',
        x='Surplus',
        cmap=failed_colormap,  # cbar =True,
        fill=True,
        levels=failed_levels,
        alpha=0.5,
        thresh=0.01,
    )
    ax = sns.kdeplot(
        legend=False,
        data=mech_df.query('depletion_binary == False'),
        y='Active players',
        x='Surplus',
        cmap=success_colormap,  # cbar =True,
        fill=True,
        levels=success_levels,
        alpha=0.5,
        thresh=0.01,
    )

    legend_depleted_plotted = False
    legend_sustained_plotted = False
    for s in range(active_players.shape[0]):
      if depletion_binary[s]:
        scattercolor = failed_color
        label = 'Depleted Game'
      else:
        scattercolor = success_color
        label = 'Sustained Game'
      if (not legend_depleted_plotted) and scattercolor == failed_color:
        plt.scatter(
            surplus[s],
            active_players[s],
            c=scattercolor,
            alpha=alpha,
            s=70,
            edgecolors='none',
            label=label,
        )
        legend_depleted_plotted = True
      elif (not legend_sustained_plotted) and scattercolor == success_color:
        plt.scatter(
            surplus[s],
            active_players[s],
            c=scattercolor,
            alpha=alpha,
            s=70,
            edgecolors='none',
            label=label,
        )
        legend_sustained_plotted = True
      else:
        plt.scatter(
            surplus[s],
            active_players[s],
            c=scattercolor,
            alpha=alpha,
            s=70,
            edgecolors='none',
            label='_nolegend_',
        )

    trial_of_depletion_dict[m] = depletion
    active_players_dict[m] = active_players
    plt.ylim(0, active_player_max)
    plt.ylabel('Active players', labelpad=-10)
    plt.yticks((0, 1, 2, 3, 4), (0, '', '', '', 4))
    plt.xlabel('Surplus', labelpad=-15)
    plt.xlim(0, surplus_max)
    plt.xticks((0, surplus_max / 2, surplus_max), (0, '', surplus_max))

    plt.subplot(nplotrows, nplotscol, ii + 1 + nplotscol * 3)
    data = np.stack((bc_active_players, bc_gini, bc_depletion_binary)).T
    mech_df = pd.DataFrame(
        data=data,
        index=range(data.shape[0]),
        columns=['Active players', 'Gini', 'depletion_binary'],
    )
    ax = sns.kdeplot(
        legend=False,
        data=mech_df.query('depletion_binary == True'),
        x='Active players',
        y='Gini',
        cmap=failed_colormap,
        fill=True,
        levels=failed_levels,
        alpha=0.5,
        thresh=0.01,
    )
    ax = sns.kdeplot(
        legend=False,
        data=mech_df.query('depletion_binary == False'),
        x='Active players',
        y='Gini',
        cmap=success_colormap,
        fill=True,
        levels=success_levels,
        alpha=0.5,
        thresh=0.01,
    )

    for s in range(active_players.shape[0]):
      if depletion_binary[s]:
        scattercolor = failed_color
      else:
        scattercolor = success_color
      plt.scatter(
          active_players[s],
          gini[s],
          c=scattercolor,
          alpha=alpha,
          s=70,
          edgecolors='none',
      )
    plt.xlim(0, 4.2)
    plt.xlabel('Active players', labelpad=-15)
    plt.xticks((0, 1, 2, 3, 4), (0, '', '', '', 4))
    plt.ylabel('Gini', labelpad=-17)
    plt.ylim(0, gini_max)
    plt.yticks((0, gini_max / 2, gini_max), (0, '', gini_max))

  if verbose:
    for i, name1 in enumerate(mech_to_plot):
      for name2 in mech_to_plot[i + 1 :]:
        stat, pval = stats.ranksums(
            trial_of_depletion_dict[name1], trial_of_depletion_dict[name2]
        )
        print(
            name1,
            'vs',
            name2,
            'depletion z=',
            np.round(stat, decimals=2),
            'p=',
            np.round(pval, decimals=3),
        )
        stat, pval = stats.ranksums(
            active_players_dict[name1], active_players_dict[name2]
        )
        print(
            name1,
            'vs',
            name2,
            'active_players z=',
            np.round(stat, decimals=2),
            'p=',
            np.round(pval, decimals=3),
        )
  print(' ')

In [None]:
#@title Dot plot function.


def make_dot_plot(df: pd.DataFrame, exp: int):
  """Make the dot plot showing the offer/pool relationship.

  Args:
    df: dataframe containing experiments.
    exp: which experiment to plot.
  """
  mech_to_plot, p, p_bc = get_exp_info(exp)
  markersize = 7
  alpha = 0.3
  col = len(mech_to_plot)
  row = 2
  fig, ax = plt.subplots(row, col, figsize=(col * 4, 2 * 4), facecolor='none')

  # Handle single mechanism case.
  if len(mech_to_plot) == 1:
    ax = ax.reshape(2, 1)

  # Loop through mechanisms.
  for i, m in enumerate(mech_to_plot):
    # Define a function to avoid redundant code for both plots.
    def plot_data(ax, p_value):
      """Plots data for a given mechanism and p-value."""
      mdf = df.query(f"mech_name_by_player == '{m} {p_value}'")
      game_wise_pool = mdf.groupby('launch_id')[
          'mechanism_observation.pool'
      ].mean()
      game_wise_gini = mdf.groupby('launch_id')['offer_gini'].mean()

      ax.scatter(
          mdf['mechanism_observation.pool'],
          mdf.offer_gini,
          c='grey',
          s=markersize,
          alpha=alpha,
          edgecolors='none',
      )
      ax.scatter(
          game_wise_pool,
          game_wise_gini,
          c=MECH_COLOURS[m],
          s=markersize * 8,
          alpha=alpha * 2,
          edgecolors='white',
          linewidths=1,
      )

      ax.set_ylim(0, 0.75)
      ax.set_xlim(0, 200)
      ax.set_yticks([0, 0.25, 0.5, 0.75], ['0', '', '', '.75'])

    # Plot for p_bc.
    plot_data(ax[0, i], p_bc)
    ax[0, i].set_title(m, fontsize=16, color=MECH_COLOURS[m], fontweight='bold')
    if i == 0:
      ax[0, i].set_ylabel('Offer Gini', labelpad=-10)
      ax[0, i].legend(['Trial', 'Game'])

    # Plot for p.
    plot_data(ax[1, i], p)
    ax[1, i].set_xlabel('Pool')
    if i == 0:
      ax[1, i].set_ylabel('Offer Gini', labelpad=-10)

In [None]:
#@title Regression plot function.


def get_regr(df_subselection: pd.DataFrame) -> List[np.ndarray]:
  """Calculates regression coefficients of actions and offers for each game.

  Args:
    df_subselection: DataFrame containing the data subset to analyze.

  Returns:
    A list of regression coefficients for each game.
  """
  corrlist = []
  for game_id in df_subselection['launch_id'].unique():
    game_df = df_subselection.query(f"launch_id == '{game_id}'")
    for p in range(4):
      # Create shifted action arrays.
      action = np.stack([
          game_df[f'player_action_{p}'].shift(i).to_numpy()
          for i in range(-4, 5)
      ])

      offer = game_df[f'offer_{p}'].to_numpy().reshape(-1, 1)

      # Trim the arrays to remove NaN values from shifting.
      action = action.T[4:-4, :]
      offer = offer[4:-4, :]

      regr = linear_model.LinearRegression()
      regr.fit(action, offer)
      corrlist.append(np.squeeze(regr.coef_))
  return corrlist


def get_regression_plot(df: pd.DataFrame, exp: int):
  """Generates a plot showing regression weights of offer/reciprocation.

  Args:
    df: DataFrame containing the experimental data.
    exp: The experiment number to plot.
  """
  mech_to_plot, p, p_bc = get_exp_info(exp)

  shiftrange = range(-4, 5)
  col = len(mech_to_plot)
  fig, ax = plt.subplots(1, col, figsize=(col * 4, 4))

  # Handle single mechanism case.
  if len(mech_to_plot) == 1:
    ax = np.array(ax).reshape(1, 1)
  else:
    ax = np.array(ax).reshape(len(mech_to_plot), 1)

  for i, m in enumerate(mech_to_plot):
    corrlist_bc = get_regr(df.query(f"mech_name_by_player == '{m} {p_bc}'"))
    corrlist_human = get_regr(df.query(f"mech_name_by_player == '{m} {p}'"))

    plt.sca(ax[i, 0])

    medians_bc = np.median(np.stack(corrlist_bc), axis=0)
    plt.plot(
        shiftrange,
        np.flip(medians_bc),
        c='lightgrey',
        linewidth=3,
        label='Behavioral Clones',
    )

    medians_human = np.median(np.stack(corrlist_human), axis=0)
    plt.plot(
        shiftrange,
        np.flip(medians_human),
        c='black',
        linewidth=3,
        label='Human Exp 1',
    )

    for j in range(len(shiftrange)):
      plt.scatter(
          [shiftrange[j]] * np.stack(corrlist_human)[:, 8 - j].shape[0],
          np.stack(corrlist_human)[:, 8 - j],
          c='grey',
          s=5,
          alpha=0.5,
          edgecolors='none',
      )

    plt.ylim(-2, 2)
    plt.xticks(shiftrange)

    if i == 0:
      plt.ylabel('Regression weights offer/reciprocation')
      plt.legend()

    plt.xlabel('Shift of reciprocation')
    plt.title(m, fontsize=16, color=MECH_COLOURS[m], fontweight='bold')
    plt.yticks([-2, -1, 0, 1, 2])

In [None]:
#@title Exclusion plot functions.


def find_exlusions(
    input: pd.Series, output_type: str, player: int
) -> np.ndarray:
  """Find exclusion events.

  Args:
    input: Series of data.
    output_type: Type of output (durations, when, amount_exclusion,
      amount_reinclusion, pool).
    player: Number of player.

  Returns:
    Array of exclusion events.
  """
  offer = input[f'offer_{player}'].to_numpy().astype(float)
  reciprocation = input[f'player_action_{player}'].to_numpy().astype(float)
  exclusion = offer < 1.0
  pool = input['mechanism_observation.pool'].to_numpy().astype(float)

  if sum(exclusion > 0):
    shifted_exclusions_forward = np.concatenate(
        (np.array([False]), exclusion[:-1]), axis=0
    )
    transition_forward = np.not_equal(exclusion, shifted_exclusions_forward)
    first_exlusion = np.logical_and(exclusion, transition_forward)
    exclusion_when_first = np.where(first_exlusion)

    shifted_exclusions_backward = np.concatenate(
        (exclusion[1:], np.array([False])), axis=0
    )
    transition_backward = np.not_equal(exclusion, shifted_exclusions_backward)
    last_exlusion = np.logical_and(exclusion, transition_backward)
    exclusion_when_last = np.where(last_exlusion)

    if exclusion_when_last[0].shape[0] > 0:
      exclusion_when_first = exclusion_when_first[0]
      exclusion_when_last = exclusion_when_last[0]
      durations = (exclusion_when_last - exclusion_when_first) + 1
      reinclusion = exclusion_when_last + 1
      reinclusion = reinclusion[reinclusion < 40]
      assert exclusion_when_last.shape == exclusion_when_first.shape
      assert offer[reinclusion].shape == pool[reinclusion].shape

      if output_type == 'durations':
        return durations
      elif output_type == 'when':
        return exclusion_when_first
      elif output_type == 'amount_exclusion':
        return reciprocation[exclusion_when_first - 1]
      elif output_type == 'amount_reinclusion':
        return offer[reinclusion]
      elif output_type == 'pool':
        return pool[reinclusion]
      else:
        raise ValueError(f'Unknown output type: {output_type}')
    else:
      return np.array([])
  else:
    return np.array([])


def get_exclusion_events(
    subset_df: pd.DataFrame,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
  """Get exclusion events.

  Args:
    subset_df: Dataframe containing subset of the data.

  Returns:
    Tuple of arrays of exclusion events.
  """
  exclusion_durations = np.array([])
  exclusion_times = np.array([])
  reinclusion_amounts = np.array([])
  reinclusion_pools = np.array([])
  exclusion_reciprocation = np.array([])

  for player in range(4):
    duration = np.concatenate(
        subset_df.groupby('launch_id')
        .apply(find_exlusions, output_type='durations', player=player)
        .reset_index()[0]
        .to_list()
    )
    exclusion_durations = np.concatenate([exclusion_durations, duration])
    when = np.concatenate(
        subset_df.groupby('launch_id')
        .apply(find_exlusions, output_type='when', player=player)
        .reset_index()[0]
        .to_list()
    )
    exclusion_times = np.concatenate([exclusion_times, when])
    amount = np.concatenate(
        subset_df.groupby('launch_id')
        .apply(find_exlusions, output_type='amount_reinclusion', player=player)
        .reset_index()[0]
        .to_list()
    )
    reinclusion_amounts = np.concatenate([reinclusion_amounts, amount])

    pool_at_reinclusion = np.concatenate(
        subset_df.groupby('launch_id')
        .apply(find_exlusions, output_type='pool', player=player)
        .reset_index()[0]
        .to_list()
    )
    reinclusion_pools = np.concatenate([reinclusion_pools, pool_at_reinclusion])

    excluding_reciprovation = np.concatenate(
        subset_df.groupby('launch_id')
        .apply(find_exlusions, output_type='amount_exclusion', player=player)
        .reset_index()[0]
        .to_list()
    )
    exclusion_reciprocation = np.concatenate(
        [exclusion_reciprocation, excluding_reciprovation]
    )
    assert exclusion_durations.shape == exclusion_times.shape
  return (
      exclusion_durations,
      exclusion_times,
      reinclusion_amounts,
      reinclusion_pools,
      exclusion_reciprocation,
  )


def get_exclusion_plot(df: pd.DataFrame, exp: int, verbose: bool = True):
  """Get plot about the exclusions from the game.

  Args:
    df: dataframe containing experiments.
    exp: which experiment to plot.
    verbose: Whether to print aggregate values (for debugging).
  """
  mech_to_plot, p, p_bc = get_exp_info(exp)
  col = len(mech_to_plot)
  row = 2
  alpha_human = 0.4
  alpha_bc = 0.2
  durations_histo_dict = {}
  durations_dict = {}
  _, ax = plt.subplots(row, col, figsize=(col * 4, 2 * 4), facecolor='none')
  if len(mech_to_plot) == 1:
    ax = ax.reshape(2, 1)
  for i, m in enumerate(mech_to_plot):
    subset_df = df[df['mech_name_by_player'] == m + ' ' + p_bc]
    bc_data_size = subset_df.shape[0]
    bc_games = bc_data_size / 40
    human_subset_df = df[df['mech_name_by_player'] == m + ' ' + p]
    human_data_size = human_subset_df.shape[0]
    human_games = human_data_size / 40
    data_ratio = bc_data_size / human_data_size
    exclusion_durations, exclusion_times, _, _, _ = get_exclusion_events(
        subset_df
    )

    plt.subplot(row, col, i + 1)
    plt.scatter(
        exclusion_durations,
        exclusion_times,
        c='grey',
        s=15,
        alpha=alpha_bc,
        edgecolors='none',
    )
    counts, bins = np.histogram(exclusion_durations, bins=range(0, 41, 2))
    ax2 = ax[0, i].twinx()
    if i == len(mech_to_plot) - 1:
      ax2.set_ylabel('Count of exlusions', labelpad=-5)
      ax2.set_yticks([0, 5], ['0', '5'])
    else:
      ax2.set_yticks([0, 5], ['', ''])
    ax2.bar(
        x=bins[:-1],
        height=counts / bc_games,
        color=MECH_COLOURS[m],
        width=2.0,
        align='edge',
        alpha=0.6,
        edgecolor='None',
    )
    ax2.set_ylim(0, 5)

    ax[0, i].set_title(m, fontsize=16, color=MECH_COLOURS[m], fontweight='bold')
    ax[0, i].set_yticks([0, 10, 20, 30, 40], [0, 10, 20, 30, 40])
    ax[0, i].set_ylim(0, 40)
    ax[0, i].set_xlim(0, 40)

    exclusion_durations, exclusion_times, _, _, _ = get_exclusion_events(
        human_subset_df
    )
    durations_dict[m] = exclusion_durations
    if i == 0:
      ax[0, i].set_ylabel('Trial of exclusion')

    plt.subplot(row, col, i + 1 + col)
    plt.scatter(
        exclusion_durations,
        exclusion_times,
        c='grey',
        s=15,
        alpha=alpha_human,
        edgecolors='none',
    )
    counts, bins = np.histogram(exclusion_durations, bins=range(0, 41, 2))

    ax3 = ax[1, i].twinx()
    if i == len(mech_to_plot) - 1:
      ax3.set_ylabel('Count of exlusions', labelpad=-5)
      ax3.set_yticks([0, 5], ['0', '5'])
    else:
      ax3.set_yticks([0, 5], ['', ''])
    ax3.bar(
        x=bins[:-1],
        height=counts / human_games,
        color=MECH_COLOURS[m],
        width=2.0,
        align='edge',
        alpha=0.6,
        edgecolor='None',
    )
    ax3.set_ylim(0, 5)
    counts, bins = np.histogram(exclusion_durations, bins=range(0, 41, 4))
    durations_histo_dict[m] = counts

    if i == 0:
      ax[1, i].set_ylabel('Trial of exclusion')
    ax[1, i].set_xlabel('Exclusion duration')

    ax[1, i].set_yticks([0, 10, 20, 30, 40], [0, 10, 20, 30, 40])
    ax[1, i].set_ylim(0, 40)
    ax[1, i].set_xlim(0, 40)

  if verbose:
    print(p, 'Human data')
    for i, name1 in enumerate(mech_to_plot):
      for j, name2 in enumerate(mech_to_plot[i + 1 :]):
        stat, pval = stats.ranksums(durations_dict[name1], durations_dict[name2])
        print(
            name1,
            'vs',
            name2,
            'durations z=',
            np.round(stat, decimals=2),
            'p=',
            np.round(pval, decimals=3),
        )
  print(' ')


def get_reinclusion_plot(df: pd.DataFrame, exp: int):
  """Get the plot about the reinclusions into the game.

  Args:
    df: dataframe containing experiments.
    exp: which experiment to plot.
  """
  mech_to_plot, p, p_bc = get_exp_info(exp)

  col = len(mech_to_plot)
  row = 2
  alpha_human = 0.4
  alpha_bc = 0.2
  _, ax = plt.subplots(row, col, figsize=(col * 4, 2 * 4))
  if len(mech_to_plot) == 1:
    ax = ax.reshape(2, 1)
  for i, m in enumerate(mech_to_plot):

    subset_df = df[df['mech_name_by_player'] == m + ' ' + p_bc]
    bc_data_size = subset_df.shape[0]
    bc_games = bc_data_size / 40

    human_subset_df = df[df['mech_name_by_player'] == m + ' ' + p]
    human_data_size = human_subset_df.shape[0]
    human_games = human_data_size / 40
    data_ratio = bc_data_size / human_data_size
    _, _, reinclusion_amounts, reinclusion_pools, _ = get_exclusion_events(
        subset_df
    )

    plt.subplot(row, col, i + 1)
    plt.scatter(
        reinclusion_amounts,
        reinclusion_pools,
        c='grey',
        s=15,
        alpha=alpha_bc,
        edgecolors='none',
    )

    counts, bins = np.histogram(reinclusion_amounts, bins=range(0, 51, 2))
    ax2 = ax[0, i].twinx()
    if i == len(mech_to_plot) - 1:
      ax2.set_ylabel('Count of reinclusion', labelpad=-5)
      ax2.set_yticks([0, 2], ['0', '2'])
    else:
      ax2.set_yticks([0, 2], ['', ''])
    ax2.bar(
        x=bins[:-1],
        height=counts / bc_games,
        color=MECH_COLOURS[m],
        width=2.0,
        align='edge',
        alpha=0.6,
        edgecolor='None',
    )
    ax2.set_ylim(0, 2)

    ax[0, i].set_title(m, fontsize=16, color=MECH_COLOURS[m], fontweight='bold')
    ax[0, i].set_yticks([0, 50, 100, 150, 200], [0, 50, 100, 150, 200])
    ax[0, i].set_ylim(0, 220)
    ax[0, i].set_xlim(0, 50)
    _, _, reinclusion_amounts, reinclusion_pools, _ = get_exclusion_events(
        human_subset_df
    )
    if i == 0:
      ax[0, i].set_ylabel('Pool at reinclusion')

    plt.subplot(row, col, i + 1 + col)
    plt.scatter(
        reinclusion_amounts,
        reinclusion_pools,
        c='grey',
        s=15,
        alpha=alpha_human,
        edgecolors='none',
    )
    counts, bins = np.histogram(reinclusion_amounts, bins=range(0, 51, 2))
    ax3 = ax[1, i].twinx()
    if i == len(mech_to_plot) - 1:
      ax3.set_ylabel('Count of reinclusions', labelpad=-5)
      ax3.set_yticks([0, 2], ['0', '2'])
    else:
      ax3.set_yticks([0, 5], ['', ''])
    ax3.bar(
        x=bins[:-1],
        height=counts / human_games,
        color=MECH_COLOURS[m],
        width=2.0,
        align='edge',
        alpha=0.6,
        edgecolor='None',
    )
    ax3.set_ylim(0, 2)

    if i == 0:
      ax[1, i].set_ylabel('Pool at reinclusion')
    ax[1, i].set_xlabel('Reinclusion amount')
    ax[1, i].set_yticks([0, 50, 100, 150, 200], [0, 50, 100, 150, 200])
    ax[1, i].set_ylim(0, 220)
    ax[1, i].set_xlim(0, 50)


def get_exclusion_amount_plot(df: pd.DataFrame, exp: int):
  """Create the plot about the amount of exclusions from the game.

  Args:
    df: dataframe containing experiments.
    exp: which experiment to plot.
  """
  mech_to_plot, p, p_bc = get_exp_info(exp)
  col = len(mech_to_plot)
  row = 2
  alpha_human = 0.4
  alpha_bc = 0.2
  jitter_scale = 0.0
  _, ax = plt.subplots(row, col, figsize=(col * 4, 2 * 4))
  if len(mech_to_plot) == 1:
    ax = ax.reshape(2, 1)
  for i, m in enumerate(mech_to_plot):

    subset_df = df[df['mech_name_by_player'] == m + ' ' + p_bc]
    bc_data_size = subset_df.shape[0]
    bc_games = bc_data_size / 40

    human_subset_df = df[df['mech_name_by_player'] == m + ' ' + p]
    human_data_size = human_subset_df.shape[0]
    human_games = human_data_size / 40

    exclusion_durations, _, _, _, exclusion_amounts = get_exclusion_events(
        subset_df
    )
    plt.subplot(row, col, i + 1)
    plt.scatter(
        exclusion_amounts,
        exclusion_durations,
        c='grey',
        s=15,
        alpha=alpha_bc,
        edgecolors='none',
    )

    counts, bins = np.histogram(exclusion_amounts, bins=range(0, 41, 2))
    ax2 = ax[0, i].twinx()
    if i == len(mech_to_plot) - 1:
      ax2.set_ylabel('Count of exlusions', labelpad=-5)
      ax2.set_yticks([0, 5], ['0', '5'])
    else:
      ax2.set_yticks([0, 5], ['', ''])
    ax2.bar(
        x=bins[:-1],
        height=counts / bc_games,
        color=MECH_COLOURS[m],
        width=2.0,
        align='edge',
        alpha=0.6,
        edgecolor='None',
    )
    ax2.set_ylim(0, 5)
    ax[0, i].set_title(m, fontsize=16, color=MECH_COLOURS[m], fontweight='bold')
    ax[0, i].set_yticks([0, 10, 20, 30, 40], [0, 10, 20, 30, 40])
    ax[0, i].set_ylim(0, 40)
    ax[0, i].set_xlim(0, 30)

    exclusion_durations, _, _, _, exclusion_amounts = get_exclusion_events(
        human_subset_df
    )
    if i == 0:
      ax[0, i].set_ylabel('Exclusion duration')

    plt.subplot(row, col, i + 1 + col)
    plt.scatter(
        exclusion_amounts,
        exclusion_durations,
        c='grey',
        s=15,
        alpha=alpha_human,
        edgecolors='none',
    )
    counts, bins = np.histogram(exclusion_amounts, bins=range(0, 31, 2))
    ax3 = ax[1, i].twinx()
    if i == len(mech_to_plot) - 1:
      ax3.set_ylabel('Count of exlusions', labelpad=-5)
      ax3.set_yticks([0, 5], ['0', '5'])
    else:
      ax3.set_yticks([0, 5], ['', ''])
    ax3.bar(
        x=bins[:-1],
        height=counts / human_games,
        color=MECH_COLOURS[m],
        width=2.0,
        align='edge',
        alpha=0.6,
        edgecolor='None',
    )
    ax3.set_ylim(0, 5)

    if i == 0:
      ax[1, i].set_ylabel('Exclusion duration')
    ax[1, i].set_xlabel('Reciprocation size before exclusion')
    ax[1, i].set_yticks([0, 10, 20, 30, 40], [0, 10, 20, 30, 40])
    ax[1, i].set_ylim(0, 40)
    ax[1, i].set_xlim(0, 30)

In [None]:
# @title Opinion plot function.


def make_opinion_plot(df: pd.DataFrame, exp: int):
  """Make the plot about participants opinions.

  Args:
   df: dataframe containing experiments.
   exp: which experiment to plot.

  """
  questions = [
      'feedback_fair',
      'feedback_contribute_me',
      'feedback_contribute_others',
      'feedback_understandable',
      'feedback_i_can_think',
      'feedback_money',
      'feedback_again',
      'feedback_beneficial_to_others'
      ]
  qnames = [
      'Fair',
      'Encouraging me',
      'Encouraging others',
      'Understandable',
      'Replaceable',
      'Satisfied',
      'Play again',
      'Beneficial for others'
      ]

  mech_to_plot, p, _ = get_exp_info(exp)
  plt.figure(figsize=(12, 6), facecolor='none')
  agreement_list_per_mechanism = []
  legend_handles = []
  for i, m in enumerate(mech_to_plot):
    agreement_list_per_question = []
    subset_df = df[df['mech_name_by_player'] == m + ' ' + p]
    question_means = []
    question_ci = []
    for q in questions:
      answers = np.stack(subset_df.query('round_id == 0')[q].to_numpy())
      # Convert string representation.
      array_2d = []
      for item in answers:
        cleaned_str = item.replace('[', '').replace(']', '')  # Remove only brackets
        int_list = [int(x) for x in cleaned_str.split(',')]  # Split on commas
        array_2d.append(int_list)
      answers = np.array(array_2d)
      agreement = np.concatenate((answers[:, 0], answers[:, 1], answers[:, 2], answers[:, 3]))
      agreement = agreement[agreement <= 5.]
      mn, c = get_mean_and_conf(agreement)
      question_means.append(mn)
      question_ci.append(c)
      agreement = np.array(agreement)
      agreement_list_per_question.append(agreement)
    agreement_list_per_mechanism.append(agreement_list_per_question)
    for q, datapoints in enumerate(agreement_list_per_question):
      parts = plt.violinplot(np.array(datapoints), positions=[q + i* 0.25 - 0.25], showmeans=True)
      if q == 0:
        legend_handles.append(parts['bodies'][0])
      for pc in parts['bodies']:
        pc.set_color(MECH_COLOURS[m])
        pc.set_alpha(.4)
      # Customize line colors
      parts['cbars'].set_color(MECH_COLOURS[m])
      parts['cmins'].set_color(MECH_COLOURS[m])
      parts['cmaxes'].set_color(MECH_COLOURS[m])
      parts['cmeans'].set_color(MECH_COLOURS[m])

    plt.xticks(range(8), qnames, rotation=15)
    plt.ylim(0, 6)

  plt.legend(legend_handles, mech_to_plot, bbox_to_anchor=(1.25, 1))

## Plot leaderboard

In [None]:
plot_leaderboard(df, 1, verbose=False)
plot_leaderboard(df, 2, verbose=False)
plot_leaderboard(df, 3, verbose=False)
plot_leaderboard(df, 4, verbose=False)

## Scatter plots

In [None]:
make_contour_scatter(df, 1, verbose=False)
make_contour_scatter(df, 2, verbose=False)
make_contour_scatter(df, 3, verbose=False)

## Dot plots.

In [None]:
df['mechanism_observation.pool'] = df['mechanism_observation.pool'].astype(float)
df['offer_gini'] = df['offer_gini'].astype(float)

make_dot_plot(df, 1)
make_dot_plot(df, 2)
make_dot_plot(df, 3)

## Regression plots.

In [None]:
df["launch_id"] = df["launch_id"].astype(str)
get_regression_plot(df, 1)
get_regression_plot(df, 2)
get_regression_plot(df, 3)

## Exclusion plots.

In [None]:
get_exclusion_plot(df, 1, verbose=False)
get_exclusion_plot(df, 2, verbose=False)
get_exclusion_plot(df, 3, verbose=False)

In [None]:
get_exclusion_amount_plot(df, 1)
get_exclusion_amount_plot(df, 2)
get_exclusion_amount_plot(df, 3)

## Reinclusion plots.

In [None]:
get_reinclusion_plot(df, 1)
get_reinclusion_plot(df, 2)
get_reinclusion_plot(df, 3)

## Opinion plots.

In [None]:
make_opinion_plot(df, 2)
make_opinion_plot(df, 3)