In [None]:
import math
from matplotlib import pyplot as plt
import numpy as np
import numpy.random as nprand
import pandas as pd
import random
import scipy.stats as spstats
%matplotlib inline

from loomio import *
from netdelib import *
from socialchoice import *
from timeseries import *

## Evolution - Group Level

In [None]:
class NetDelibEvoInitialMedian(object):
    def __init__(self, df_score):
        self.df_control = df_score[df_score.treatment == 1]
        self.df_random = df_score[df_score.treatment == 2]

    def plot_kendall(self):
        control = self.kendall(self.df_control)
        random = self.kendall(self.df_random)

        control_x = control[0]
        control_y = np.array(control[2]) - np.array(control[1])
        random_x = random[0]
        random_y = np.array(random[2]) - np.array(random[1])
        
        plt.plot(control_x, control_y, '.', color="#00007f7f", markersize=30, label='Control')
        plt.plot(random_x, random_y, '.', color="#007f007f", markersize=30, label='Random-Pod')
        plt.title('Distance to Initial Median (Kendall)')
        plt.xlabel('Participant Shift')
        plt.ylabel('Median Distance Shift')
        plt.xlim([0,14])
        plt.ylim([-14,14])

    def plot_ballot(self):
        control = self.ballot(self.df_control)
        random = self.ballot(self.df_random)

        control_x = control[0]
        control_y = np.array(control[2]) - np.array(control[1])
        random_x = random[0]
        random_y = np.array(random[2]) - np.array(random[1])
        
        plt.plot(control_x, control_y, '.', color="#00007f7f", markersize=30, label='Control')
        plt.plot(random_x, random_y, '.', color="#007f007f", markersize=30, label='Random-Pod')
        plt.title('Dissimilarity to Initial Median (Ballot)')
        plt.xlabel('Participant Change')
        plt.ylabel('Change in Dissimilarity to Median')
        plt.xlim([0,1])
        plt.ylim([0,1])
    
    def plot_crossing(self):
        control = self.crossing(self.df_control)
        random = self.crossing(self.df_random)

        control_x = control[0]
        control_y = np.array(control[2]) - np.array(control[1])
        random_x = random[0]
        random_y = np.array(random[2]) - np.array(random[1])
        
        plt.plot(control_x, control_y, '.', color="#00007f7f", markersize=30, label='Control')
        plt.plot(random_x, random_y, '.', color="#007f007f", markersize=30, label='Random-Pod')
        plt.title('Distance to Initial Median (Crossing)')
        plt.xlabel('Participant Change')
        plt.ylabel('Change in Dissimilarity to Median')
        plt.xlim([0,1])
        plt.ylim([-1,1])
    
    
    def kendall(self, df_treatment):

        # Calculate sequence collection and profiles
        collection = make_preference_sequence_collection(df_treatment)
        profiles = ProfileSequence.from_preference_sequence_collection(collection)
                          
        # Find initial Kemeny-Young profiles
        ky_init = make_kemeny_young_set(df_treatment, 0)

        # Find mean distance to initial K-Y profile
        init_kendall = []
        final_kendall = []
        shifts = []
        participant_ids = collection.participant_ids()
        for participant_id in participant_ids:
            # Get initial and final preferences
            sequence = collection[participant_id]
            pref_init = Preference(sequence[0])
            pref_final = Preference(sequence[3])

            # Find difference
            shifts.append(pref_init.kendall_tau(pref_final))
            
            participant_init = []
            participant_final = []
            # There might be multiple medians, average them
            for ky in ky_init:
                kyp = Preference(ky)
                participant_init.append(kyp.kendall_tau(pref_init))
                participant_final.append(kyp.kendall_tau(pref_final))
            init_kendall.append(np.mean(participant_init))
            final_kendall.append(np.mean(participant_final))
        return (shifts, init_kendall, final_kendall)

    def ballot(self, df_treatment):

        # Calculate sequence collection and profiles
        collection = make_preference_sequence_collection(df_treatment)
        profiles = ProfileSequence.from_preference_sequence_collection(collection)
                          
        # Find initial weighted swap median profiles
        median_init = make_ballot_set(df_treatment, 0)

        # Find mean distance to initial median profile
        init_ballot = []
        final_ballot = []
        shifts = []
        participant_ids = collection.participant_ids()
        for participant_id in participant_ids:
            # Get initial and final preferences
            sequence = collection[participant_id]
            pref_init = Preference(sequence[0])
            pref_final = Preference(sequence[3])

            # Find difference
            shift = pref_init.ballot_dissimilarity(pref_final)
            shifts.append(shift)

            participant_init = []
            participant_final = []
            sequence = collection[participant_id]
            for ranking in median_init:
                pref = Preference(ranking)
                participant_init.append(pref.ballot_dissimilarity(sequence[0]))
                participant_final.append(pref.ballot_dissimilarity(sequence[3]))
            
            mean_init = np.mean(participant_init)
            mean_final = np.mean(participant_final)
            
            if mean_final - mean_init > shift:
                print(mean_final - mean_init - shift)
                print("d_i", mean_init)
                print("d_f", mean_final)
                print("shift", shift)
                print(pref_init)
                print(pref_final)
                print(median_init)
                print('---')
            
            init_ballot.append(mean_init)
            final_ballot.append(mean_final)
        return (shifts, init_ballot, final_ballot)

    def crossing(self, df_treatment):

        # Calculate sequence collection and profiles
        collection = make_preference_sequence_collection(df_treatment)
        profiles = ProfileSequence.from_preference_sequence_collection(collection)
                          
        # Find initial weighted swap median profiles
        median_init = make_crossing_set(df_treatment, 0)

        # Find mean distance to initial median profile
        init = []
        final = []
        shifts = []
        participant_ids = collection.participant_ids()
        for participant_id in participant_ids:
            # Get initial and final preferences
            sequence = collection[participant_id]
            pref_init = Preference(sequence[0])
            pref_final = Preference(sequence[3])

            # Find difference
            shift = pref_init.crossing_dissimilarity(pref_final)
            shifts.append(shift)

            participant_init = []
            participant_final = []
            sequence = collection[participant_id]
            for ranking in median_init:
                pref = Preference(ranking)
                participant_init.append(pref.crossing_dissimilarity(sequence[0]))
                participant_final.append(pref.crossing_dissimilarity(sequence[3]))
            
            mean_init = np.mean(participant_init)
            mean_final = np.mean(participant_final)
            
            if mean_final - mean_init > shift:
                print(mean_final - mean_init - shift)
                print("d_i", mean_init)
                print("d_f", mean_final)
                print("shift", shift)
                print(pref_init)
                print(pref_final)
                print(median_init)
                print('---')
            
            init.append(mean_init)
            final.append(mean_final)
        return (shifts, init, final)



In [None]:
df_score, proposal_map, proposal_rev_map = load_loomio_score('results/results_2_3.tsv')
df_score = fill_attrition(df_score)

netdelib = NetDelibEvoInitialMedian(df_score)

In [None]:
# Blue is control 
plt.figure(figsize=(8,4))
plt.subplot(1,2,1)
netdelib.plot_kendall()
plt.subplot(1,2,2)
netdelib.plot_crossing()
plt.tight_layout()

In [None]:
control = netdelib.crossing(netdelib.df_control)
random = netdelib.crossing(netdelib.df_random)

control_deviation_shift = np.array(control[2]) - np.array(control[1])
random_deviation_shift = np.array(random[2]) - np.array(random[1])
control_pref_shift = np.array(control[0])
random_pref_shift = np.array(random[0])

y1, y2 = control_deviation_shift, random_deviation_shift
plt.hist([y1, y2], label=['Control', 'Random-Pod'])
t, p = spstats.ttest_ind(y1, y2, nan_policy="omit")
print(t, p)
t, p = spstats.ttest_1samp(y1, 0)
print(t, p)
t, p = spstats.ttest_1samp(y2, 0)
print(t, p)

plt.legend()