# Two Point Convergence Study: Preprocessing Data

### Imports

In [1]:
from tau import StandardPosition, DAGDialecticalStructure, BDDDialecticalStructure
from rethon import StandardGlobalReflectiveEquilibrium

from tau.util import inferential_density

from itertools import product, combinations, chain
from random import choice

from os import getcwd, path

import warnings
warnings.filterwarnings("ignore")

In [2]:
import tarfile
from pandas import DataFrame
import pandas as pd
from typing import List
from ast import literal_eval

### Utility

In [3]:
def literal_eval_cols(data: DataFrame, cols: List[str]):
    for col_name in cols:
        data[col_name] = data.apply(lambda x: literal_eval(x[col_name]), axis=1)

In [4]:
def normalized_agreement(pos1, pos2, re):
    """Adapted to partial positions from Betz (2013, 39)""" 
    
    n = re.dialectical_structure().sentence_pool().size()
    
    return 1 - ( re.hamming_distance(pos1, pos2, [0,1,1,2]) / (2*n) )
    #return 1 - ( re.hamming_distance(pos1, pos2, [0,1,1,1]) / n )
    
    
def similarity_list(group1, group2, re):
    similarity_list = []
    
    for pos1 in group1:
        for pos2 in group2:
            similarity_list.append(normalized_agreement(pos1, pos2, re))
            
    return similarity_list

    
def group_normalized_agreement(group1, group2, re):
    """Mean normalized agreement between two groups of positions. 
    Corresponds to normalized agreement if both groups have length 1."""
    agreement = 0
    
    for pos1 in group1:
        for pos2 in group2:
            agreement += normalized_agreement(pos1, pos2, re)
    
    # average
    agreement /= (len(group1) * len(group2))
        
    return agreement

def population_normalized_agreement(population, re):
    """Debate-wide mean normalized agreement from Betz (2013, 40) adapted to partial positions"""
    agreement = 0
    counter = 0
    
    for pos1, pos2 in combinations(population, 2):
    
        agreement += normalized_agreement(pos1, pos2, re)
        counter += 1
    
    # average
    agreement /= counter
        
    return agreement

In [5]:
def group_compatibility(group1, group2, re):
    """"""
    compatibility = 0

    for pos1 in group1:
        for pos2 in group2:
            
            if re.dialectical_structure().are_compatible(pos1, pos2):
                compatibility += 1
    
    # average
    compatibility /= (len(group1) * len(group2))
        
    return compatibility

def compatibility_list(group1, group2, re):
    """"""
    compatibility_list = []
    
    for pos1 in group1:
        for pos2 in group2:
            
            if re.dialectical_structure().are_compatible(pos1, pos2):
                compatibility_list.append(1)
            else:
                compatibility_list.append(0)
        
    return compatibility_list

def population_compatibility(population, re):
    """"""
    compatibility = 0
    counter = 0
        
    for pos1, pos2 in combinations(population, 2):
        counter +=1
            
        if re.dialectical_structure().are_compatible(pos1, pos2):
            compatibility += 1
    
    # average
    compatibility /= counter
        
    return compatibility

In [6]:
# utility for streamlined output
def get_remainders(dia, position):
    if dia.is_consistent(position):
        return [position]
    
    # find remainders
    remainders = [subpos for subpos in position.subpositions() if dia.is_consistent(subpos)]
        
    #find all max remainders
    max_remainders = []
    max_length = 0
    for remainder in remainders:

        if remainder.size() > max_length:
            max_remainders = [remainder]
            max_length = remainder.size()
            
        elif remainder.size() == max_length:

            max_remainders.append(remainder)
            
    return max_remainders

def get_streamlined_output(dia, init_coms):
    
    remainders=get_remainders(dia, init_coms)
    
    outputs = []    
    for remainder in remainders:
        # axiomatization of remainder with subpositions as source
        for mab in dia.axioms(remainder, remainder.subpositions()):
            outputs.append((mab, dia.closure(mab)))
    
    return outputs

## Preprocessing Raw Data

In [7]:
data_dir = path.join(getcwd(), "data")
#data_file_name = 'data_two_point_convergence_binned_sp__7_00.csv.tar.gz'
data_file_name = 'data_two_point_convergence_binned_sp_7_all_configs.csv.tar.gz'

In [8]:
# loading the data


if data_file_name[data_file_name.find('.'):len(data_file_name)] == '.csv.tar.gz':
    with tarfile.open(path.join(data_dir,data_file_name)) as tar:
        for tarinfo in tar:
            file_name = tarinfo.name
        tar.extractall(data_dir)
    re_data = pd.read_csv(path.join(data_dir, file_name))

else:
    re_data = pd.read_csv(path.join(data_dir,data_file_name))

print(re_data.columns)
re_data.shape

Index(['model_name', 'ds', 'n_sentence_pool', 'ds_infer_dens',
       'ds_n_consistent_complete_positions', 'account_penalties',
       'faithfulness_penalties', 'weight_account', 'weight_systematicity',
       'weight_faithfulness', 'init_coms', 'init_coms_dia_consistent',
       'fixed_point_coms', 'fixed_point_coms_consistent', 'fixed_point_theory',
       'fixed_point_dia_consistent', 'n_branches', 'fixed_points',
       'n_fixed_points', 'fp_coms_consistent', 'fp_union_consistent',
       'fixed_point_is_global_optimum', 'fixed_point_is_re_state',
       'fixed_point_is_full_re_state', 'global_optima', 'n_global_optima',
       'go_coms_consistent', 'go_union_consistent', 'go_full_re_state',
       'full_re_states', 'n_full_re_states', 'go_fixed_point',
       'fp_full_re_state', 'fp_global_optimum', 'configuration'],
      dtype='object')


(130000, 35)

In [9]:
literal_eval_cols(re_data, ["fixed_points", "global_optima", "fp_full_re_state", "go_full_re_state"])

In [10]:
re_data["configuration"] = re_data.apply(lambda row: (row["weight_account"], 
                                                      row["weight_systematicity"], 
                                                      row["weight_faithfulness"]), axis=1)

In [11]:
fp_df = re_data[['n_sentence_pool', "ds", "configuration", "init_coms", "fixed_points", "fp_full_re_state"]]

#go_df = re_data[['n_sentence_pool', "ds", "configuration", "init_coms", "global_optima", "go_full_re_state"]]

In [12]:
fp_df.shape

(130000, 6)

In [13]:
fp_df["ds"].nunique()

13000

In [14]:
fp_df["fixed_points_lengths"] = fp_df.apply(lambda row: [(len(fp[0]), len(fp[1])) 
                                                                       for fp in row["fixed_points"]], axis=1)

In [15]:
# explode fixed point information
ex_fp_df = fp_df.set_index(['n_sentence_pool', "ds", "configuration", "init_coms"]).apply(pd.Series.explode).reset_index()
ex_fp_df.shape

(230780, 7)

In [16]:
# restrict to full RE fixed points
ex_fp_df = ex_fp_df[ex_fp_df["fp_full_re_state"]]
ex_fp_df.shape

(80851, 7)

In [17]:
# restrict further to non-trivial full RE fixed points
ex_fp_df = ex_fp_df[ex_fp_df["fixed_points_lengths"]!=(1,1)]
ex_fp_df.shape

(78976, 7)

In [18]:
# count the number of unique initial commitments that yield non-trivial full RE fixed point
# per dialectical structure and configuration
ex_fp_df_grouped = ex_fp_df.groupby(["ds", "configuration"])["init_coms"].nunique().reset_index()

In [19]:
# select pairs of simulation setups that both yield at least one non-trivial full RE fixed point
ffull_re_fp_df = fp_df.merge(ex_fp_df_grouped[ex_fp_df_grouped["init_coms"]==2][["ds", "configuration"]], 
                         on=["ds", "configuration"], how="inner")
ffull_re_fp_df.shape

(33510, 7)

In [20]:
fp_df["any_nontrivial_full_re_fixed_point"] = fp_df.apply(lambda row: any([pair[0] and pair[1]!=(1,1) 
                                                                           for pair in zip(row["fp_full_re_state"], row["fixed_points_lengths"])]),
                                                          axis=1)

In [21]:
fp_df["any_nontrivial_full_re_fixed_point"].sum()

61055

In [22]:
# is there any full RE fixed point per simulation setup?
#fp_df["any_fp_full_re_state"] = fp_df["fp_full_re_state"].apply(any)

In [23]:
fp_df_grouped = fp_df.groupby(['n_sentence_pool', "ds", "configuration"]).aggregate({"any_nontrivial_full_re_fixed_point":all}).reset_index()
#fp_df_grouped = fp_df.groupby(['n_sentence_pool', "ds", "configuration"]).aggregate({"any_fp_full_re_state":all}).reset_index()

In [24]:
# select pairs of simulation setups that both yield at least one full RE fixed point
full_re_fp_df = fp_df.merge(fp_df_grouped[fp_df_grouped["any_nontrivial_full_re_fixed_point"]][["ds", "configuration"]], 
                         on=["ds", "configuration"], how="inner")
full_re_fp_df.shape

# select pairs of simulation setups that both yield at least one full RE fixed point
#full_re_fp_df = fp_df.merge(fp_df_grouped[fp_df_grouped["any_fp_full_re_state"]][["ds", "configuration"]], 
#                         on=["ds", "configuration"], how="inner")
#full_re_fp_df.shape

(33510, 8)

In [25]:
# only keep full RE fixed points
full_re_fp_df["full_re_fixed_points"] = full_re_fp_df.apply(lambda row: [pair[0] 
                                                                         for pair in zip(row["fixed_points"], row["fp_full_re_state"]) if pair[1]], axis=1)

# only keep full RE fixed points
full_re_fp_df["full_re_fixed_points_lengths"] = full_re_fp_df.apply(lambda row: [pair[0] for pair in zip(row["fixed_points_lengths"], row["fp_full_re_state"]) if pair[1]], axis=1)

In [26]:
full_re_fp_df.drop(["any_nontrivial_full_re_fixed_point", "fixed_points","fixed_points_lengths", "fp_full_re_state"], axis=1, inplace=True)

In [27]:
full_re_fp_df.shape

(33510, 6)

In [28]:
#full_re_fp_df["full_re_fp_lenghts"] = full_re_fp_df.apply(lambda row: [(len(fp[0]), len(fp[1])) 
#                                                                       for fp in row["full_re_fixed_points"]], axis=1)

In [29]:
#full_re_fp_df["any_fp_nontrivial"] = full_re_fp_df.apply(lambda row: any([fp_len!=(1,1) 
#                                                                         for fp_len in row["full_re_fixed_points"]]), axis=1)

In [30]:
#full_re_fp_df["any_fp_nontrivial"].sum()

In [31]:
#full_re_fp_df_grouped = full_re_fp_df.groupby(['n_sentence_pool', "ds", "configuration"]).aggregate({"any_fp_nontrivial":all}).reset_index()

In [32]:
# select pairs of simulation setups that both yield a nontrivial RE fixed point
#nontrivial_full_re_fp_df = fp_df.merge(full_re_fp_df_grouped[full_re_fp_df_grouped["any_fp_nontrivial"]][["ds", "configuration"]], 
#                         on=["ds", "configuration"], how="inner")
#nontrivial_full_re_fp_df.shape

In [33]:
#nontrivial_full_re_fp_df.head()

In [34]:
#nontrivial_full_re_fp_df.drop(["any_fp_nontrivial"], axis=1, inplace=True)

In [35]:
#ex_fp_df = nontrivial_full_re_fp_df.set_index(['n_sentence_pool', "ds", "configuration", "init_coms"]).apply(pd.Series.explode).reset_index()
#print(ex_fp_df.shape)
#ex_go_df = go_df.set_index(['n_sentence_pool', "ds", "configuration", "init_coms"]).apply(pd.Series.explode).reset_index()
#print(ex_go_df.shape)

In [36]:
ex_fp_df = full_re_fp_df.set_index(['n_sentence_pool', "ds", "configuration", "init_coms"]).apply(pd.Series.explode).reset_index()
print(ex_fp_df.shape)

(43663, 6)


In [37]:
ex_fp_df.head()

Unnamed: 0,n_sentence_pool,ds,configuration,init_coms,full_re_fixed_points,full_re_fixed_points_lengths
0,7,"[[-2, 7, 1], [-4, 2], [5, 1], [3, 6, 5], [4, 3...","(0.55, 0.35, 0.1)","{-7, -5, -4, -3, -2}","({-1, 6}, {2, 6, -5, -4, -3, -1})","(2, 6)"
1,7,"[[-2, 7, 1], [-4, 2], [5, 1], [3, 6, 5], [4, 3...","(0.55, 0.35, 0.1)","{-7, -4, -3, -2}","({-3}, {2, -4, -3})","(1, 3)"
2,7,"[[-2, 7, 1], [-4, 2], [5, 1], [3, 6, 5], [4, 3...","(0.46, 0.1, 0.44)","{-7, -5, -4, -3, -2}","({-7, -1, 6}, {2, 6, -7, -5, -4, -3, -1})","(3, 7)"
3,7,"[[-2, 7, 1], [-4, 2], [5, 1], [3, 6, 5], [4, 3...","(0.46, 0.1, 0.44)","{-7, -4, -3, -2}","({-7, -3}, {-7, 2, -4, -3})","(2, 4)"
4,7,"[[7, -6, -2], [-5, -4, 2], [-3, -1, -6], [-1, ...","(0.46, 0.1, 0.44)","{7, -2, -4, -3, -1}","({-4, -2, 7}, {3, 5, 6, 7, -2, -4, -1})","(3, 7)"


In [38]:
# restrict exploded data frame to full RE states
#ex_fp_df = ex_fp_df[ex_fp_df["fp_full_re_state"]]
#print(ex_fp_df.shape)
#ex_go_df = ex_go_df[ex_go_df["go_full_re_state"]]
#print(ex_go_df.shape)

In [39]:
# check whether there are at least 2 different initial commitments per structure and configuration left
ex_fp_df.groupby(["ds", "configuration"])["init_coms"].nunique().reset_index()["init_coms"].describe()

count    16755.0
mean         2.0
std          0.0
min          2.0
25%          2.0
50%          2.0
75%          2.0
max          2.0
Name: init_coms, dtype: float64

In [40]:
ex_fp_df.groupby(["configuration"])["full_re_fixed_points"].size().reset_index()

Unnamed: 0,configuration,full_re_fixed_points
0,"(0.35, 0.55, 0.1)",5458
1,"(0.46, 0.1, 0.44)",16205
2,"(0.55, 0.2, 0.25)",7368
3,"(0.55, 0.35, 0.1)",10216
4,"(0.7, 0.2, 0.1)",4416


In [41]:
ex_fp_df.columns

Index(['n_sentence_pool', 'ds', 'configuration', 'init_coms',
       'full_re_fixed_points', 'full_re_fixed_points_lengths'],
      dtype='object')

In [42]:
#rename columns
ex_fp_df.columns = ['n_sentence_pool', 'ds', 'configuration', 'init_coms',
       'fixed_points', 'full_re_fixed_points_lengths']

### Streamlined outputs

In [43]:
# streamlined outputs
so_df = ex_fp_df.drop_duplicates(['n_sentence_pool', "ds", "init_coms"])
so_df.drop(["configuration", "fixed_points",  'full_re_fixed_points_lengths'], inplace=True, axis=1)
so_df.shape

(18916, 3)

In [44]:
so_df.columns

Index(['n_sentence_pool', 'ds', 'init_coms'], dtype='object')

In [45]:
so_df["streamlined_output"] = so_df.apply(lambda row: get_streamlined_output(BDDDialecticalStructure(row['n_sentence_pool'], literal_eval(row["ds"])),
                                                                        StandardPosition.from_set(literal_eval(row["init_coms"]), row['n_sentence_pool'])), axis=1)

In [46]:
print(so_df.columns)
print(so_df.shape)

Index(['n_sentence_pool', 'ds', 'init_coms', 'streamlined_output'], dtype='object')
(18916, 4)


In [47]:
ex_so_df = so_df.set_index(['n_sentence_pool', "ds", "init_coms"]).apply(pd.Series.explode).reset_index()
ex_so_df.shape

(27946, 4)

In [48]:
# overall number of streamlined outputs reached from individual commitments
so_df["streamlined_output"].map(len).describe()

count    18916.000000
mean         1.477374
std          0.784441
min          1.000000
25%          1.000000
50%          1.000000
75%          2.000000
max          9.000000
Name: streamlined_output, dtype: float64

### Additional data generation

In [49]:
result_df = pd.DataFrame()

In [50]:
for args in ex_fp_df["ds"].unique():
    
    n =ex_fp_df[ex_fp_df["ds"]==args]["n_sentence_pool"].unique()[0]
    
    dia = BDDDialecticalStructure(n, literal_eval(args))
    re = StandardGlobalReflectiveEquilibrium(dia)
    
    result_row = {}
    result_row["ds"] = args
    result_row["n"] = n
    result_row["infer_dens"] = inferential_density(dia)
    
    # restrict dataframes to dialectical structure
    sub_ex_fp_df = ex_fp_df[(ex_fp_df["ds"]==args)]
    #sub_ex_go_df = ex_go_df[(ex_go_df["ds"]==args)]
    sub_ex_so_df = ex_so_df[(ex_so_df["ds"]==args)]
    
    # initial commitments (identical for fp, go and so)
    init_coms = [StandardPosition.from_set(literal_eval(coms), n) for coms in sub_ex_fp_df["init_coms"].unique()]
    

    result_row["init_coms_agreement"] = normalized_agreement(init_coms[0], 
                                                             init_coms[1], re)
    
    result_row["init_coms_compat"] = 1 if dia.are_compatible(init_coms[0], init_coms[1]) else 0
    
    result_row["init_coms_size"] = sum(coms.size() for coms in init_coms)/len(init_coms)
    
    # streamlined outputs (results: independent of configuration)
            
    # initial coms 
    ic1, ic2 = sub_ex_so_df["init_coms"].unique()     

    # streamlined commitments
    so_coms1 = [so[1] for so in sub_ex_so_df[sub_ex_so_df["init_coms"]==ic1]["streamlined_output"]]
    so_coms2 = [so[1] for so in sub_ex_so_df[sub_ex_so_df["init_coms"]==ic2]["streamlined_output"]]
    
    #so_coms1 = set(so_coms1)
    #so_coms2 = set(so_coms2)
    #so_coms = set(chain(so_coms1, so_coms2))
    so_coms = list(chain(so_coms1, so_coms2))
    
    result_row["so_coms_group_agreement"] = group_normalized_agreement(so_coms1, so_coms2, re)
    result_row["so_coms_group_compat"] = group_compatibility(so_coms1, so_coms2, re)
    #result_row["so_coms_pop_agreement"] = population_normalized_agreement(so_coms, re)
    #result_row["so_coms_pop_compat"] = population_compatibility(so_coms, re)
    result_row["so_coms_size"] = sum(coms.size() for coms in so_coms)/len(so_coms)
    #result_row["so_coms_share"] = len(so_coms1.intersection(so_coms2))/len(so_coms)
    
    # steamlined theories
    so_thes1 = [so[0] for so in sub_ex_so_df[ sub_ex_so_df["init_coms"]==ic1]["streamlined_output"]]
    so_thes2 = [so[0] for so in sub_ex_so_df[ sub_ex_so_df["init_coms"]==ic2]["streamlined_output"]]
    
    # closure of so theories
    so_thes1 = [dia.closure(so_the) for so_the in so_thes1]
    so_thes2 = [dia.closure(so_the) for so_the in so_thes2]
    
    #so_thes1 = set(so_thes1)
    #so_thes2 = set(so_thes2)
    #so_thes = set(chain(so_thes1, so_thes2))
    so_thes = list(chain(so_thes1, so_thes2))
    
    result_row["so_thes_group_agreement"] = group_normalized_agreement(so_thes1, so_thes2, re)
    result_row["so_thes_group_compat"] = group_compatibility(so_thes1, so_thes2, re)
    result_row["so_coms_compat_list"] = compatibility_list(so_coms1, so_coms2, re)
    result_row["so_coms_simil_list"] = similarity_list(so_coms1, so_coms2, re)
    #result_row[soo_thes_pop_agreement"] = population_normalized_agreement(so_thes, re)
    #result_row["so_thes_pop_compat"] = population_compatibility(so_thes, re)
    result_row["so_thes_size"] = sum(the.size() for the in so_thes)/len(so_thes)
    #result_row["so_thes_share"] = len(so_thes1.intersection(so_thes2))/len(so_thes)
    
    # loop through configurations
    for config in ex_fp_df[ex_fp_df["ds"]==args]["configuration"].unique():

        result_row["configuration"] = config     
        
        # restrict dataframes further
        fp_dff = sub_ex_fp_df[sub_ex_fp_df["configuration"]==config]
        #go_dff = sub_ex_go_df[sub_ex_go_df["configuration"]==config]

        
        # fixed points
        
        #result_row["all_fp_full_re"] = fp_dff["fp_full_re_state"].all()

            
        # initial coms (identical for go and fp)
        ic1, ic2 = fp_dff["init_coms"].unique()
        
        # fixed points commitments
        fp_coms1 = [StandardPosition.from_set(fp[1], n) for fp in fp_dff[fp_dff["init_coms"]==ic1]["fixed_points"]]
        fp_coms2 = [StandardPosition.from_set(fp[1], n) for fp in fp_dff[fp_dff["init_coms"]==ic2]["fixed_points"]]
        
        #fp_coms1 = set(fp_coms1)
        #fp_coms2 = set(fp_coms2)
        #fp_coms = set(chain(fp_coms1, fp_coms2))
        fp_coms = list(chain(fp_coms1, fp_coms2))
        
        result_row["fp_coms_group_agreement"] = group_normalized_agreement(fp_coms1, fp_coms2, re)
        result_row["fp_coms_group_compat"] = group_compatibility(fp_coms1, fp_coms2, re)
        result_row["fp_coms_compat_list"] = compatibility_list(fp_coms1, fp_coms2, re)
        result_row["fp_coms_simil_list"] = similarity_list(fp_coms1, fp_coms2, re)
        
        #result_row["fp_coms_pop_agreement"] = population_normalized_agreement(fp_coms, re)
        #result_row["fp_coms_pop_compat"] = population_compatibility(fp_coms, re)
        result_row["fp_coms_size"] = sum(coms.size() for coms in fp_coms)/len(fp_coms)
        #result_row["fp_coms_share"] = len(fp_coms1.intersection(fp_coms2))/len(fp_coms)
        
        # fixed points theories
        fp_thes1 = [StandardPosition.from_set(fp[0], n) for fp in fp_dff[fp_dff["init_coms"]==ic1]["fixed_points"]]
        fp_thes2 = [StandardPosition.from_set(fp[0], n) for fp in fp_dff[fp_dff["init_coms"]==ic2]["fixed_points"]]
        
        # closure of fp theories
        fp_thes1 = [dia.closure(fp_the) for fp_the in fp_thes1]
        fp_thes2 = [dia.closure(fp_the) for fp_the in fp_thes2]
        
        #fp_thes1 = set(fp_thes1)
        #fp_thes2 = set(fp_thes2)
        #fp_thes = set(chain(fp_thes1, fp_thes2))
        fp_thes = list(chain(fp_thes1, fp_thes2))
        
        result_row["fp_thes_group_agreement"] = group_normalized_agreement(fp_thes1, fp_thes2, re)
        result_row["fp_thes_group_compat"] = group_compatibility(fp_thes1, fp_thes2, re)
        #result_row["fp_thes_pop_agreement"] = population_normalized_agreement(fp_thes, re)
        #result_row["fp_thes_pop_compat"] = population_compatibility(fp_thes, re)
        result_row["fp_thes_size"] = sum(the.size() for the in fp_thes)/len(fp_thes)
        #result_row["fp_thes_share"] = len(fp_thes1.intersection(fp_thes2))/len(fp_thes)
        
        # global optima
            
        #result_row["all_go_full_re"] = go_dff["go_full_re_state"].all()

        # initial coms (identical for go and fp)
        #ic1, ic2 = go_dff["init_coms"].unique()     
    
        # global optima commitments
        #go_coms1 = [StandardPosition.from_set(go[1], n) for go in go_dff[go_dff["init_coms"]==ic1]["global_optima"]]
        #go_coms2 = [StandardPosition.from_set(go[1], n) for go in go_dff[go_dff["init_coms"]==ic2]["global_optima"]]
        
        #go_coms1 = set(go_coms1)
        #go_coms2 = set(go_coms2)
        #go_coms = set(chain(go_coms1, go_coms2))
        #go_coms = list(chain(go_coms1, go_coms2))
        
        #result_row["go_coms_agreement"] = group_normalized_agreement(go_coms1, go_coms2, re)
        #result_row["go_coms_compat"] = group_compatibility(go_coms1, go_coms2, re)
        #result_row["go_coms_agreement"] = population_normalized_agreement(go_coms, re)
        #result_row["go_coms_compat"] = population_compatibility(go_coms, re)
        #result_row["go_coms_size"] = sum(coms.size() for coms in go_coms)/len(go_coms)
        #result_row["go_coms_share"] = len(go_coms1.intersection(go_coms2))/len(go_coms)
        
        # global optima theories
        #go_thes1 = [StandardPosition.from_set(go[0], n) for go in go_dff[go_dff["init_coms"]==ic1]["global_optima"]]
        #go_thes2 = [StandardPosition.from_set(go[0], n) for go in go_dff[go_dff["init_coms"]==ic2]["global_optima"]]
        
        # closure of go theories
        #go_thes1 = [dia.closure(go_the) for go_the in go_thes1]
        #go_thes2 = [dia.closure(go_the) for go_the in go_thes2]
        
        #go_thes1 = set(go_thes1)
        #go_thes2 = set(go_thes2)
        #go_thes = set(chain(go_thes1, go_thes2))
        #go_thes = list(chain(go_thes1, go_thes2))
        
        #result_row["go_thes_agreement"] = group_normalized_agreement(go_thes1, go_thes2, re)
        #result_row["go_thes_compat"] = group_compatibility(go_thes1, go_thes2, re)
        #result_row["go_thes_agreement"] = population_normalized_agreement(go_thes, re)
        #result_row["go_thes_compat"] = population_compatibility(go_thes, re)
        #result_row["go_thes_size"] = sum(the.size() for the in go_thes)/len(go_thes)
        #result_row["go_thes_share"] = len(go_thes1.intersection(go_thes2))/len(go_thes)

        
        # append row to dataframe
        result_df = result_df.append(result_row, ignore_index=True)

In [51]:
result_df.shape

(16755, 23)

In [52]:
# group similarity
result_df["fp_coms_group_agreement"].describe()

count    16755.000000
mean         0.574948
std          0.291135
min          0.000000
25%          0.357143
50%          0.571429
75%          0.857143
max          1.000000
Name: fp_coms_group_agreement, dtype: float64

In [108]:
result_df.groupby("init_coms_agreement")["ds"].size().reset_index()

Unnamed: 0,init_coms_agreement,ds
0,0.071429,130
1,0.142857,134
2,0.214286,104
3,0.285714,115
4,0.357143,153
5,0.428571,167
6,0.5,154
7,0.571429,186
8,0.642857,202
9,0.714286,202


In [109]:
result_df.groupby("init_coms_agreement")["fp_coms_group_agreement"].median()

init_coms_agreement
0.071429    0.285714
0.142857    0.321429
0.214286    0.357143
0.285714    0.428571
0.357143    0.428571
0.428571    0.500000
0.500000    0.500000
0.571429    0.642857
0.642857    0.714286
0.714286    0.785714
0.785714    0.928571
0.857143    1.000000
0.928571    1.000000
Name: fp_coms_group_agreement, dtype: float64

In [53]:
#import tarfile


data_dir = path.join(getcwd(), "data")
file_name="preprocessed_data_two_point_convergence_binned_sp_7_all_configs.csv"

# write results to a .csv.file
result_df.to_csv(path.join(data_dir, file_name), index=False)

# add to a tar file
tar_file = path.join(data_dir, file_name + '.tar.gz')
with tarfile.open(tar_file, "w:gz") as tar:
    tar.add(path.join(data_dir, file_name), recursive=False, arcname=file_name)