In [1]:
import pandas as pd
import json
from tqdm import tqdm
from scipy.stats import rankdata

# Rank most common techniques employed by politicians in each community

## Read propaganda techniques by politicians over time

In [2]:
with open('../../../data/06_propaganda/tweets_vips_annotated_clean.json') as f:
    prop_politicians = json.load(f)

In [6]:
def get_time_period(date):
    start_before = pd.to_datetime('2022-07-01')
    start_during = pd.to_datetime('2022-08-26')
    start_after = pd.to_datetime('2022-09-25')
    end_after = pd.to_datetime('2022-10-31')

    # date to datetime
    date = pd.to_datetime(date)

    if date >= start_before and date < start_during:
        return 'B'
    elif date >= start_during and date < start_after:
        return 'D'
    elif date >= start_after and date <= end_after:
        return 'A'


In [7]:
# for each tweet, add info on time period
for tweet in prop_politicians:
    prop_politicians[tweet]['time_period'] = get_time_period(prop_politicians[tweet]['created_at'])

## Read file about which community politicians belong to

In [8]:
comm_before = pd.read_csv("../../../data/before_communities.csv")
comm_during = pd.read_csv("../../../data/during_communities.csv")
comm_after = pd.read_csv("../../../data/after_communities.csv")

In [9]:
# for each tweet, add info on community based on time period
for tweet in prop_politicians:
    user_id_prop = prop_politicians[tweet]['author_id']
    if prop_politicians[tweet]['time_period'] == 'B':
        try:
            prop_politicians[tweet]['community'] = list(comm_before[comm_before['user_id'] == int(user_id_prop)]['comm_label'].values)[0]
        except:
            prop_politicians[tweet]['community'] = None
    elif prop_politicians[tweet]['time_period'] == 'D':
        try:
            prop_politicians[tweet]['community'] = list(comm_during[comm_during['user_id'] == int(user_id_prop)]['comm_label'].values)[0]
        except:
            prop_politicians[tweet]['community'] = None
    elif prop_politicians[tweet]['time_period'] == 'A':
        try:
            prop_politicians[tweet]['community'] = list(comm_after[comm_after['user_id'] == int(user_id_prop)]['comm_label'].values)[0]
        except:
            prop_politicians[tweet]['community'] = None

In [10]:
# for each community in each time period, get the ranking of the most frequent techniques
communities = {'B': comm_before, 'D': comm_during, 'A': comm_after}

overall_ranking = []
for time_period in communities:
    ranking = []
    for comm in tqdm(communities[time_period]['comm_label'].unique()):
        # if empty community, skip
        if comm is None:
            continue
        tweets_comm = [tweet for tweet in prop_politicians if prop_politicians[tweet]['community'] == comm and prop_politicians[tweet]['time_period'] == time_period]
        techniques = []
        for tweet in tweets_comm:
            if "annotations" in prop_politicians[tweet]:
                for technique in prop_politicians[tweet]['annotations']:
                    techniques.append(technique["label"])
        techniques = pd.Series(techniques)
        techniques = techniques.value_counts()
        techniques = techniques / techniques.sum()
        # separate techniques names and frequencies
        ranking.append({'community': comm, 'techniques': techniques.index.tolist(), 'frequencies': techniques.values.tolist()})
    overall_ranking.append({'time_period': time_period, 'ranking': ranking})

  techniques = pd.Series(techniques)
100%|██████████| 7/7 [00:00<00:00, 105.61it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

  techniques = pd.Series(techniques)
100%|██████████| 6/6 [00:00<00:00, 160.86it/s]
  techniques = pd.Series(techniques)
  techniques = pd.Series(techniques)
100%|██████████| 9/9 [00:00<00:00, 235.69it/s]


# Rank techniques which swingers in a given community are most vulnerable to

## Read community to which swingers belong

In [11]:
comm_bd_swingers = pd.read_csv("../../../data/05_swing_voters/swing_voters_bd.csv")
comm_da_swingers = pd.read_csv("../../../data/05_swing_voters/swing_voters_da.csv")
comm_ba_swingers = pd.read_csv("../../../data/05_swing_voters/swing_voters_ba.csv")

In [12]:
hard_swing = pd.read_csv("../../../data/hard_swing.csv")
hard_swing["single_party"] = hard_swing["party1"] + "_" + hard_swing["party2"]
comm_bd_swingers["single_party"] = comm_bd_swingers["party1"] + "_" + comm_bd_swingers["party2"]
comm_da_swingers["single_party"] = comm_da_swingers["party1"] + "_" + comm_da_swingers["party2"]
comm_ba_swingers["single_party"] = comm_ba_swingers["party1"] + "_" + comm_ba_swingers["party2"]

In [13]:
comm_bd_swingers = comm_bd_swingers[comm_bd_swingers["single_party"].isin(hard_swing["single_party"].values)]
comm_da_swingers = comm_da_swingers[comm_da_swingers["single_party"].isin(hard_swing["single_party"].values)]
comm_ba_swingers = comm_ba_swingers[comm_ba_swingers["single_party"].isin(hard_swing["single_party"].values)]

Size of communities

In [14]:
comm_bd_swingers["single_party"].value_counts()

FdI_PD                108
L;Az-Iv_PD;AVS        100
PD;AVS_Az-Iv           76
PD;AVS_FdI;M5s         64
M5s_PD;AVS             61
FdI_PD;AVS             39
M5s_PD                 34
FdI_Az-Iv              32
L;FdI_Az-Iv            20
L;FdI_PD               15
L;Az-Iv_PD             13
M5s_NM;FI;FdI;PD;L      8
L;FdI_PD;AVS            4
FI_PD                   1
FI_Az-Iv                1
Name: single_party, dtype: int64

In [15]:
comm_da_swingers["single_party"].value_counts()

PD_FdI;Az-Iv          200
Az-Iv_PD;AVS          128
PD;AVS_FdI;Az-Iv       94
FdI;M5s_PD;AVS         88
PD;AVS_M5s             50
Az-Iv_FdI;FI           10
PD_M5s                  9
Az-Iv_L;FdI             9
PD_L;FdI                8
PD_FdI;FI               7
PD;AVS_FdI;FI           6
PD;AVS_L;FdI            5
FdI;M5s_PD;Az-Iv        4
NM;FI;FdI;PD;L_M5s      1
PD;AVS_FdI              1
PD;AVS_NM;FdI           1
Az-Iv_M5s               1
Name: single_party, dtype: int64

In [16]:
comm_ba_swingers["single_party"].value_counts()

L;Az-Iv_PD;AVS      85
M5s_PD;AVS          81
M5s_FdI;Az-Iv       81
FdI_PD;AVS          76
PD;AVS_FdI;Az-Iv    52
PD;AVS_M5s          52
FdI_PD;Az-Iv        18
L;FdI_PD;Az-Iv      16
FdI_M5s             14
M5s_L;FdI           13
L;FdI_PD;AVS         8
M5s_FdI;FI           4
PD;AVS_L;FdI         2
PD;AVS_NM;FdI        2
L;Az-Iv_M5s          2
M5s_PD;Az-Iv         1
M5s_NM;FdI           1
PD;AVS_FdI;FI        1
FI_PD;Az-Iv          1
Name: single_party, dtype: int64

## Read vulnerability of swingers to propaganda techniques

In [48]:
with open ('../../../data/07_propaganda_vulnerability/propaganda_swingers.json') as f:
    vulnerability_swingers = json.load(f)

## Match the two info

In [49]:
start_before = '2022-07-01'
start_during = '2022-08-26'
start_after = '2022-09-25'
end_after = '2022-10-31'

### BD

In [20]:
# Create a dictionary where keys are users and values are a list of tweets where they belong to "BD"
user_bd_tweets_before = {}
user_bd_tweets_during = {}
for tweet, instances in vulnerability_swingers.items():
    for instance in instances:
        user, category, date_creation = instance
        if int(user) not in comm_bd_swingers["user"].values:
            continue
        if category == "BD" and date_creation < start_during:
            if user not in user_bd_tweets_before:
                user_bd_tweets_before[user] = []
            user_bd_tweets_before[user].append(tweet)
        elif category == "BD" and date_creation >= start_during and date_creation < start_after:
            if user not in user_bd_tweets_during:
                user_bd_tweets_during[user] = []
            user_bd_tweets_during[user].append(tweet)

In [50]:
# Now build tweets_bd using the precomputed user_bd_tweets dictionary
tweets_bd_before = []
tweets_bd_during = []
tot_vuln_bd_before = 0
tot_vuln_bd_during = 0
for i, row in comm_bd_swingers.iterrows():
    user = row['user']
    community_pre = row['party1']
    community_post = row['party2']
    
    # If the user has any BD tweets, add them to the result
    if str(user) in user_bd_tweets_before:
        for tweet in user_bd_tweets_before[str(user)]:
            tweets_bd_before.append([str(tweet), community_pre])
        tot_vuln_bd_before += 1

    if str(user) in user_bd_tweets_during:
        for tweet in user_bd_tweets_during[str(user)]:
            tweets_bd_during.append([str(tweet), community_post])
        tot_vuln_bd_during += 1

print(f"Perc. of users with BD tweets before: {(tot_vuln_bd_before/len(comm_bd_swingers))*100}%")
print(f"Perc. of users with BD tweets during: {(tot_vuln_bd_during/len(comm_bd_swingers))*100}%")
    


Perc. of users with BD tweets before: 13.368055555555555%
Perc. of users with BD tweets during: 11.805555555555555%


### DA

In [51]:
user_da_tweets_during = {}
user_da_tweets_after = {}
for tweet, instances in vulnerability_swingers.items():
    for instance in instances:
        user, category, date_creation = instance
        if int(user) not in comm_da_swingers["user"].values:
            continue
        if category == "DA" and date_creation >= start_during and date_creation < start_after:
            if user not in user_da_tweets_during:
                user_da_tweets_during[user] = []
            user_da_tweets_during[user].append(tweet)
        elif category == "DA" and date_creation >= start_after:
            if user not in user_da_tweets_after:
                user_da_tweets_after[user] = []
            user_da_tweets_after[user].append(tweet)

In [52]:
tweets_da_during = []
tweets_da_after = []
tot_vuln_da_during = 0
tot_vuln_da_after = 0
for i, row in comm_da_swingers.iterrows():
    user = row['user']
    community_pre = row['party1']
    community_post = row['party2']
    
    if str(user) in user_da_tweets_during:
        for tweet in user_da_tweets_during[str(user)]:
            tweets_da_during.append([str(tweet), community_pre])
        tot_vuln_da_during += 1
    if str(user) in user_da_tweets_after:
        for tweet in user_da_tweets_after[str(user)]:
            tweets_da_after.append([str(tweet), community_post])
        tot_vuln_da_after += 1


print(f"Perc. of users with DA tweets before: {(tot_vuln_da_during/len(comm_da_swingers))*100}%")
print(f"Perc. of users with DA tweets during: {(tot_vuln_da_after/len(comm_da_swingers))*100}%")
    

Perc. of users with DA tweets before: 27.652733118971064%
Perc. of users with DA tweets during: 28.938906752411576%


### BA

In [53]:
user_ba_tweets_before = {}
user_ba_tweets_after = {}
for tweet, instances in vulnerability_swingers.items():
    for instance in instances:
        user, category, date_creation = instance
        if int(user) not in comm_ba_swingers["user"].values:
            continue
        if category == "BA" and date_creation < start_during:
            if user not in user_ba_tweets_before:
                user_ba_tweets_before[user] = []
            user_ba_tweets_before[user].append(tweet)
        elif category == "BA" and date_creation >= start_after:
            if user not in user_ba_tweets_after:
                user_ba_tweets_after[user] = []
            user_ba_tweets_after[user].append(tweet)

In [54]:
tweets_ba_before = []
tweets_ba_after = []
tot_vuln_ba_before = 0
tot_vuln_ba_after = 0
for i, row in comm_ba_swingers.iterrows():
    user = row['user']
    community_pre = row['party1']
    community_post = row['party2']
    
    if str(user) in user_ba_tweets_before:
        for tweet in user_ba_tweets_before[str(user)]:
            tweets_ba_before.append([str(tweet), community_pre])
        tot_vuln_ba_before += 1
    if str(user) in user_ba_tweets_after:
        for tweet in user_ba_tweets_after[str(user)]:
            tweets_ba_after.append([str(tweet), community_post])
        tot_vuln_ba_after += 1

print(f"Perc. of users with BA tweets before: {(tot_vuln_ba_before/len(comm_ba_swingers))*100}%")
print(f"Perc. of users with BA tweets after: {(tot_vuln_ba_after/len(comm_ba_swingers))*100}%")

Perc. of users with BA tweets before: 60.19607843137255%
Perc. of users with BA tweets after: 45.294117647058826%


## For each tweet, extract info on propaganda techniques

In [29]:
tweet_bd_v2_before = []
for tweet in tweets_bd_before:
    tweet_id = tweet[0]
    community = tweet[1]
    if "annotations" in prop_politicians[tweet_id]:
        techniques = [technique["label"] for technique in prop_politicians[tweet_id]["annotations"]]
        tweet_bd_v2_before.append([tweet_id, community, techniques])

tweet_bd_v2_during = []
for tweet in tweets_bd_during:
    tweet_id = tweet[0]
    community = tweet[1]
    if "annotations" in prop_politicians[tweet_id]:
        techniques = [technique["label"] for technique in prop_politicians[tweet_id]["annotations"]]
        tweet_bd_v2_during.append([tweet_id, community, techniques])

In [30]:
tweet_da_v2_during = []
for tweet in tweets_da_during:
    tweet_id = tweet[0]
    community = tweet[1]
    if "annotations" in prop_politicians[tweet_id]:
        techniques = [technique["label"] for technique in prop_politicians[tweet_id]["annotations"]]
        tweet_da_v2_during.append([tweet_id, community, techniques])

tweet_da_v2_after = []
for tweet in tweets_da_after:
    tweet_id = tweet[0]
    community = tweet[1]
    if "annotations" in prop_politicians[tweet_id]:
        techniques = [technique["label"] for technique in prop_politicians[tweet_id]["annotations"]]
        tweet_da_v2_after.append([tweet_id, community, techniques])

In [31]:
tweet_ba_v2_before = []
for tweet in tweets_ba_before:
    tweet_id = tweet[0]
    community = tweet[1]
    if "annotations" in prop_politicians[tweet_id]:
        techniques = [technique["label"] for technique in prop_politicians[tweet_id]["annotations"]]
        tweet_ba_v2_before.append([tweet_id, community, techniques])

tweet_ba_v2_after = []
for tweet in tweets_ba_after:
    tweet_id = tweet[0]
    community = tweet[1]
    if "annotations" in prop_politicians[tweet_id]:
        techniques = [technique["label"] for technique in prop_politicians[tweet_id]["annotations"]]
        tweet_ba_v2_after.append([tweet_id, community, techniques])

## Ranking

In [32]:
# rank in the format of overall_ranking
ranking_bd_pre = []
for comm in comm_bd_swingers['party1'].unique():
    tweets_comm = [tweet for tweet in tweet_bd_v2_before if tweet[1] == comm]
    if len(tweets_comm) == 0:
        continue
    techniques = []
    for tweet in tweets_comm:
        techniques.extend(tweet[2])
    techniques = pd.Series(techniques)
    techniques = techniques.value_counts()
    techniques = techniques / techniques.sum()
    ranking_bd_pre.append({'community': comm, 'techniques': techniques.index.tolist(), 'frequencies': techniques.values.tolist()})

ranking_bd_post = []
for comm in comm_bd_swingers['party2'].unique():
    tweets_comm = [tweet for tweet in tweet_bd_v2_during if tweet[1] == comm]
    if len(tweets_comm) == 0:
        continue
    techniques = []
    for tweet in tweets_comm:
        techniques.extend(tweet[2])
    techniques = pd.Series(techniques)
    techniques = techniques.value_counts()
    techniques = techniques / techniques.sum()
    ranking_bd_post.append({'community': comm, 'techniques': techniques.index.tolist(), 'frequencies': techniques.values.tolist()})

In [33]:
ranking_da_pre = []
for comm in comm_da_swingers['party1'].unique():
    tweets_comm = [tweet for tweet in tweet_da_v2_during if tweet[1] == comm]
    if len(tweets_comm) == 0:
        continue
    techniques = []
    for tweet in tweets_comm:
        techniques.extend(tweet[2])
    techniques = pd.Series(techniques)
    techniques = techniques.value_counts()
    techniques = techniques / techniques.sum()
    ranking_da_pre.append({'community': comm, 'techniques': techniques.index.tolist(), 'frequencies': techniques.values.tolist()})

ranking_da_post = []
for comm in comm_da_swingers['party2'].unique():
    tweets_comm = [tweet for tweet in tweet_da_v2_after if tweet[1] == comm]
    if len(tweets_comm) == 0:
        continue
    techniques = []
    for tweet in tweets_comm:
        techniques.extend(tweet[2])
    techniques = pd.Series(techniques)
    techniques = techniques.value_counts()
    techniques = techniques / techniques.sum()
    ranking_da_post.append({'community': comm, 'techniques': techniques.index.tolist(), 'frequencies': techniques.values.tolist()})

In [34]:
ranking_ba_pre = []
for comm in comm_ba_swingers['party1'].unique():
    tweets_comm = [tweet for tweet in tweet_ba_v2_before if tweet[1] == comm]
    if len(tweets_comm) == 0:
        continue
    techniques = []
    for tweet in tweets_comm:
        techniques.extend(tweet[2])
    techniques = pd.Series(techniques)
    techniques = techniques.value_counts()
    techniques = techniques / techniques.sum()
    ranking_ba_pre.append({'community': comm, 'techniques': techniques.index.tolist(), 'frequencies': techniques.values.tolist()})

ranking_ba_post = []
for comm in comm_ba_swingers['party2'].unique():
    tweets_comm = [tweet for tweet in tweet_ba_v2_after if tweet[1] == comm]
    if len(tweets_comm) == 0:
        continue
    techniques = []
    for tweet in tweets_comm:
        techniques.extend(tweet[2])
    techniques = pd.Series(techniques)
    techniques = techniques.value_counts()
    techniques = techniques / techniques.sum()
    ranking_ba_post.append({'community': comm, 'techniques': techniques.index.tolist(), 'frequencies': techniques.values.tolist()})

# Compare rankings

## Before-during swing voters

Before community

In [35]:
all_ranks = overall_ranking[0]['ranking'] # before
overal_avg_perc = 0
iter = 0
for community in all_ranks:
    for rank in ranking_bd_pre:
        if community['community'] == rank['community']:
            # re-order community['techniques'] based on community['frequencies']
            community['techniques'] = [x for _, x in sorted(zip(community['frequencies'], community['techniques']), reverse=True)]
            # re-order rank['techniques'] based on rank['frequencies']
            rank['techniques'] = [x for _, x in sorted(zip(rank['frequencies'], rank['techniques']), reverse=True)]
            
            # print top-10 techniques 
            print(f"Community {community['community']}")
            print(f"Top-10 community: {community['techniques'][:10]}")
            print(f"Top-10 swingers: {rank['techniques'][:10]}")
            # how many techniques in common (% overlap)
            common = set(community['techniques'][:10]).intersection(set(rank['techniques'][:10]))
            tot_techniques = len(set(community['techniques'][:10]))
            print(f"Common %: {len(common)/tot_techniques * 100}")
            overal_avg_perc += len(common)/tot_techniques * 100
            iter += 1
            print()

print(f"Average overlap: {overal_avg_perc/iter}")


Community FdI
Top-10 community: ['Doubt', 'Conversation_Killer', 'Name_Calling-Labeling', 'Loaded_Language', 'Questioning_the_Reputation', 'Slogans', 'Appeal_to_Values', 'Guilt_by_Association', 'False_Dilemma-No_Choice', 'Appeal_to_Fear-Prejudice']
Top-10 swingers: ['Doubt', 'Loaded_Language', 'Questioning_the_Reputation', 'Conversation_Killer', 'Appeal_to_Values', 'Name_Calling-Labeling', 'Slogans', 'Flag_Waving', 'Appeal_to_Hypocrisy', 'Appeal_to_Time']
Common %: 70.0

Community M5s
Top-10 community: ['Doubt', 'Appeal_to_Values', 'Loaded_Language', 'Slogans', 'Questioning_the_Reputation', 'Conversation_Killer', 'Name_Calling-Labeling', 'Appeal_to_Hypocrisy', 'Appeal_to_Time', 'Flag_Waving']
Top-10 swingers: ['Doubt', 'Conversation_Killer', 'Appeal_to_Values', 'Questioning_the_Reputation', 'Loaded_Language', 'Slogans', 'Name_Calling-Labeling', 'Straw_Man', 'False_Dilemma-No_Choice', 'Appeal_to_Hypocrisy']
Common %: 80.0

Community PD;AVS
Top-10 community: ['Doubt', 'Appeal_to_Values',

During community

In [36]:
all_ranks = overall_ranking[1]['ranking'] # during
overal_avg_perc = 0
iter = 0
for community in all_ranks:
    for rank in ranking_bd_post:
        if community['community'] == rank['community']:
            # re-order community['techniques'] based on community['frequencies']
            community['techniques'] = [x for _, x in sorted(zip(community['frequencies'], community['techniques']), reverse=True)]
            # re-order rank['techniques'] based on rank['frequencies']
            rank['techniques'] = [x for _, x in sorted(zip(rank['frequencies'], rank['techniques']), reverse=True)]
            
            # print top-10 techniques 
            print(f"Community {community['community']}")
            print(f"Top-10 community: {community['techniques'][:10]}")
            print(f"Top-10 swingers: {rank['techniques'][:10]}")
            # how many techniques in common (% overlap)
            common = set(community['techniques'][:10]).intersection(set(rank['techniques'][:10]))
            tot_techniques = len(set(community['techniques'][:10]))
            print(f"Common %: {len(common)/tot_techniques * 100}")
            overal_avg_perc += len(common)/tot_techniques * 100
            iter += 1
            print()

print(f"Average overlap: {overal_avg_perc/iter}")



Community FdI;M5s
Top-10 community: ['Doubt', 'Slogans', 'Appeal_to_Values', 'Loaded_Language', 'Conversation_Killer', 'Questioning_the_Reputation', 'Appeal_to_Fear-Prejudice', 'False_Dilemma-No_Choice', 'Appeal_to_Hypocrisy', 'Flag_Waving']
Top-10 swingers: ['Doubt', 'Appeal_to_Values', 'Questioning_the_Reputation', 'Name_Calling-Labeling', 'Conversation_Killer', 'Loaded_Language', 'Guilt_by_Association', 'Flag_Waving', 'Exaggeration-Minimisation', 'Appeal_to_Fear-Prejudice']
Common %: 70.0

Community PD
Top-10 community: ['Doubt', 'Questioning_the_Reputation', 'Loaded_Language', 'Conversation_Killer', 'Slogans', 'Name_Calling-Labeling', 'Flag_Waving', 'Appeal_to_Values', 'Appeal_to_Hypocrisy']
Top-10 swingers: ['Doubt', 'Loaded_Language', 'Conversation_Killer', 'Slogans', 'Questioning_the_Reputation', 'Name_Calling-Labeling', 'Appeal_to_Hypocrisy']
Common %: 77.77777777777779

Community NM;FI;FdI;PD;L
Top-10 community: ['Doubt', 'Slogans', 'Loaded_Language', 'Appeal_to_Values', 'Flag

## Before-after swing voters

Before community

In [37]:
all_ranks = overall_ranking[0]['ranking'] # before
overal_avg_perc = 0
iter = 0
for community in all_ranks:
    for rank in ranking_ba_pre:
        if community['community'] == rank['community']:
            # re-order community['techniques'] based on community['frequencies']
            community['techniques'] = [x for _, x in sorted(zip(community['frequencies'], community['techniques']), reverse=True)]
            # re-order rank['techniques'] based on rank['frequencies']
            rank['techniques'] = [x for _, x in sorted(zip(rank['frequencies'], rank['techniques']), reverse=True)]
            
            # print top-10 techniques 
            print(f"Community {community['community']}")
            print(f"Top-10 community: {community['techniques'][:10]}")
            print(f"Top-10 swingers: {rank['techniques'][:10]}")
            # how many techniques in common (% overlap)
            common = set(community['techniques'][:10]).intersection(set(rank['techniques'][:10]))
            tot_techniques = len(set(community['techniques'][:10]))
            print(f"Common %: {len(common)/tot_techniques * 100}")
            overal_avg_perc += len(common)/tot_techniques * 100
            iter += 1
            print()

print(f"Average overlap: {overal_avg_perc/iter}")


Community FdI
Top-10 community: ['Doubt', 'Conversation_Killer', 'Name_Calling-Labeling', 'Loaded_Language', 'Questioning_the_Reputation', 'Slogans', 'Appeal_to_Values', 'Guilt_by_Association', 'False_Dilemma-No_Choice', 'Appeal_to_Fear-Prejudice']
Top-10 swingers: ['Doubt', 'Questioning_the_Reputation', 'Loaded_Language', 'Conversation_Killer', 'Appeal_to_Values', 'Slogans', 'Flag_Waving', 'Name_Calling-Labeling', 'Appeal_to_Hypocrisy', 'False_Dilemma-No_Choice']
Common %: 80.0

Community M5s
Top-10 community: ['Doubt', 'Appeal_to_Values', 'Loaded_Language', 'Slogans', 'Questioning_the_Reputation', 'Conversation_Killer', 'Name_Calling-Labeling', 'Appeal_to_Hypocrisy', 'Appeal_to_Time', 'Flag_Waving']
Top-10 swingers: ['Doubt', 'Questioning_the_Reputation', 'Loaded_Language', 'Conversation_Killer', 'Appeal_to_Values', 'Name_Calling-Labeling', 'Slogans', 'Flag_Waving', 'Appeal_to_Hypocrisy', 'Appeal_to_Fear-Prejudice']
Common %: 90.0

Community PD;AVS
Top-10 community: ['Doubt', 'Appeal

After community

In [38]:
all_ranks = overall_ranking[2]['ranking'] # after
overal_avg_perc = 0
iter = 0
for community in all_ranks:
    for rank in ranking_ba_post:
        if community['community'] == rank['community']:
            # re-order community['techniques'] based on community['frequencies']
            community['techniques'] = [x for _, x in sorted(zip(community['frequencies'], community['techniques']), reverse=True)]
            # re-order rank['techniques'] based on rank['frequencies']
            rank['techniques'] = [x for _, x in sorted(zip(rank['frequencies'], rank['techniques']), reverse=True)]
            
            # print top-10 techniques 
            print(f"Community {community['community']}")
            print(f"Top-10 community: {community['techniques'][:10]}")
            print(f"Top-10 swingers: {rank['techniques'][:10]}")
            # how many techniques in common (% overlap)
            common = set(community['techniques'][:10]).intersection(set(rank['techniques'][:10]))
            tot_techniques = len(set(community['techniques'][:10]))
            print(f"Common %: {len(common)/tot_techniques * 100}")
            overal_avg_perc += len(common)/tot_techniques * 100
            iter += 1
            print()

print(f"Average overlap: {overal_avg_perc/iter}")


Community FdI;Az-Iv
Top-10 community: ['Doubt', 'Loaded_Language', 'Slogans', 'Conversation_Killer', 'Name_Calling-Labeling', 'Appeal_to_Hypocrisy', 'Appeal_to_Fear-Prejudice', 'Straw_Man', 'Questioning_the_Reputation', 'False_Dilemma-No_Choice']
Top-10 swingers: ['Doubt', 'Slogans', 'Appeal_to_Values', 'Questioning_the_Reputation', 'Loaded_Language', 'Name_Calling-Labeling', 'Conversation_Killer', 'Flag_Waving', 'False_Dilemma-No_Choice', 'Appeal_to_Fear-Prejudice']
Common %: 80.0

Community PD;AVS
Top-10 community: ['Doubt', 'Appeal_to_Values', 'Loaded_Language', 'Slogans', 'Questioning_the_Reputation', 'Name_Calling-Labeling', 'Conversation_Killer', 'Appeal_to_Hypocrisy', 'Flag_Waving', 'Appeal_to_Fear-Prejudice']
Top-10 swingers: ['Doubt', 'Slogans', 'Loaded_Language', 'Appeal_to_Values', 'Questioning_the_Reputation', 'Name_Calling-Labeling', 'Conversation_Killer', 'Flag_Waving', 'False_Dilemma-No_Choice', 'Appeal_to_Fear-Prejudice']
Common %: 90.0

Community M5s
Top-10 community: 

## During-after swing voters

During community

In [32]:
all_ranks = overall_ranking[1]['ranking'] # during
overal_avg_perc = 0
iter = 0
for community in all_ranks:
    for rank in ranking_da_pre:
        if community['community'] == rank['community']:
            # re-order community['techniques'] based on community['frequencies']
            community['techniques'] = [x for _, x in sorted(zip(community['frequencies'], community['techniques']), reverse=True)]
            # re-order rank['techniques'] based on rank['frequencies']
            rank['techniques'] = [x for _, x in sorted(zip(rank['frequencies'], rank['techniques']), reverse=True)]
            
            # print top-10 techniques 
            print(f"Community {community['community']}")
            print(f"Top-10 community: {community['techniques'][:10]}")
            print(f"Top-10 swingers: {rank['techniques'][:10]}")
            # how many techniques in common (% overlap)
            common = set(community['techniques'][:10]).intersection(set(rank['techniques'][:10]))
            tot_techniques = len(set(community['techniques'][:10]))
            print(f"Common %: {len(common)/tot_techniques * 100}")
            overal_avg_perc += len(common)/tot_techniques * 100
            iter += 1
            print()

print(f"Average overlap: {overal_avg_perc/iter}")


Community FdI;M5s
Top-10 community: ['Doubt', 'Slogans', 'Appeal_to_Values', 'Loaded_Language', 'Conversation_Killer', 'Questioning_the_Reputation', 'Appeal_to_Fear-Prejudice', 'False_Dilemma-No_Choice', 'Appeal_to_Hypocrisy', 'Flag_Waving']
Top-10 swingers: ['Doubt', 'Appeal_to_Values', 'Loaded_Language', 'Slogans', 'Appeal_to_Fear-Prejudice', 'Questioning_the_Reputation', 'Conversation_Killer', 'Name_Calling-Labeling', 'Appeal_to_Hypocrisy', 'Straw_Man']
Common %: 80.0

Community PD
Top-10 community: ['Doubt', 'Questioning_the_Reputation', 'Loaded_Language', 'Conversation_Killer', 'Slogans', 'Name_Calling-Labeling', 'Flag_Waving', 'Appeal_to_Values', 'Appeal_to_Hypocrisy']
Top-10 swingers: ['Doubt', 'Questioning_the_Reputation', 'Loaded_Language', 'Slogans', 'Appeal_to_Hypocrisy', 'Appeal_to_Values', 'Name_Calling-Labeling', 'Conversation_Killer', 'Flag_Waving', 'False_Dilemma-No_Choice']
Common %: 100.0

Community PD;AVS
Top-10 community: ['Doubt', 'Appeal_to_Values', 'Loaded_Langua

After community

In [39]:
all_ranks = overall_ranking[2]['ranking'] # after
overal_avg_perc = 0
iter = 0
for community in all_ranks:
    for rank in ranking_da_post:
        if community['community'] == rank['community']:
            # re-order community['techniques'] based on community['frequencies']
            community['techniques'] = [x for _, x in sorted(zip(community['frequencies'], community['techniques']), reverse=True)]
            # re-order rank['techniques'] based on rank['frequencies']
            rank['techniques'] = [x for _, x in sorted(zip(rank['frequencies'], rank['techniques']), reverse=True)]
            
            # print top-10 techniques 
            print(f"Community {community['community']}")
            print(f"Top-10 community: {community['techniques'][:10]}")
            print(f"Top-10 swingers: {rank['techniques'][:10]}")
            # how many techniques in common
            common = set(community['techniques'][:10]).intersection(set(rank['techniques'][:10]))
            tot_techniques = len(set(community['techniques'][:10]))
            if tot_techniques == 0:
                print("No techniques in community")
            else:
                print(f"Common %: {len(common)/tot_techniques * 100}")
                overal_avg_perc += len(common)/tot_techniques * 100
            iter += 1
            print()

print(f"Average overlap: {overal_avg_perc/iter}")


Community FdI;Az-Iv
Top-10 community: ['Doubt', 'Loaded_Language', 'Slogans', 'Conversation_Killer', 'Name_Calling-Labeling', 'Appeal_to_Hypocrisy', 'Appeal_to_Fear-Prejudice', 'Straw_Man', 'Questioning_the_Reputation', 'False_Dilemma-No_Choice']
Top-10 swingers: ['Doubt', 'Appeal_to_Values', 'Slogans', 'Questioning_the_Reputation', 'Loaded_Language', 'Conversation_Killer', 'Name_Calling-Labeling', 'Flag_Waving', 'Exaggeration-Minimisation', 'Causal_Oversimplification']
Common %: 60.0

Community PD;AVS
Top-10 community: ['Doubt', 'Appeal_to_Values', 'Loaded_Language', 'Slogans', 'Questioning_the_Reputation', 'Name_Calling-Labeling', 'Conversation_Killer', 'Appeal_to_Hypocrisy', 'Flag_Waving', 'Appeal_to_Fear-Prejudice']
Top-10 swingers: ['Doubt', 'Conversation_Killer', 'Appeal_to_Values', 'Questioning_the_Reputation', 'Loaded_Language', 'Slogans', 'Name_Calling-Labeling', 'Flag_Waving', 'False_Dilemma-No_Choice', 'Guilt_by_Association']
Common %: 80.0

Community M5s
Top-10 community: [

# Rank correlation

In [40]:
from scipy.stats import kendalltau
import numpy as np

Fix order of techniques

In [41]:
all_techniques = []
for tweet in prop_politicians:
    if "annotations" in prop_politicians[tweet]:
        for technique in prop_politicians[tweet]['annotations']:
            all_techniques.append(technique["label"])

all_techniques = list(set(all_techniques))

## Before-during

Before community

In [42]:
all_ranks = overall_ranking[0]['ranking'] # before
overall_avg_tau = 0
iter = 0
for community in all_ranks:
    for rank in ranking_bd_pre:
        if community['community'] == rank['community']:

            df1 = pd.DataFrame(community)
            df2 = pd.DataFrame(rank)

            merged_df = pd.merge(df1, df2, on='techniques', how='outer', suffixes=('_file1', '_file2'))

            for tech in all_techniques:
                if tech not in merged_df['techniques'].values:
                    # add row
                    merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)

            # fill NaNs on frequencies
            merged_df['frequencies_file1'] = merged_df['frequencies_file1'].fillna(0)
            merged_df['frequencies_file2'] = merged_df['frequencies_file2'].fillna(0)

            # if all techniques are 0, skip community
            if merged_df['frequencies_file1'].sum() == 0 or merged_df['frequencies_file2'].sum() == 0:
                continue

            # Rank based on frequencies
            merged_df['rank_file1'] = merged_df['frequencies_file1'].rank(ascending=False)
            merged_df['rank_file2'] = merged_df['frequencies_file2'].rank(ascending=False)

            # Calculate correlation
            kendalltau_corr = kendalltau(merged_df['rank_file1'], merged_df['rank_file2'])

            overall_avg_tau += kendalltau_corr.correlation
            iter += 1
            
print(f"Average Kendall: {overall_avg_tau/iter}")


Average Kendall: 0.7320780401513199


  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 

During community

In [43]:
all_ranks = overall_ranking[1]['ranking'] # during
overall_avg_tau = 0
iter = 0
for community in all_ranks:
    for rank in ranking_bd_post:
        if community['community'] == rank['community']:

            df1 = pd.DataFrame(community)
            df2 = pd.DataFrame(rank)

            merged_df = pd.merge(df1, df2, on='techniques', how='outer', suffixes=('_file1', '_file2'))

            for tech in all_techniques:
                if tech not in merged_df['techniques'].values:
                    # add row
                    merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)

            # fill NaNs on frequencies
            merged_df['frequencies_file1'] = merged_df['frequencies_file1'].fillna(0)
            merged_df['frequencies_file2'] = merged_df['frequencies_file2'].fillna(0)

            # Rank based on frequencies
            merged_df['rank_file1'] = merged_df['frequencies_file1'].rank(ascending=False)
            merged_df['rank_file2'] = merged_df['frequencies_file2'].rank(ascending=False)

            # Calculate correlation
            kendalltau_corr = kendalltau(merged_df['rank_file1'], merged_df['rank_file2'])

            overall_avg_tau += kendalltau_corr.correlation
            iter += 1

print(f"Average Kendall: {overall_avg_tau/iter}")


Average Kendall: 0.7440664499495303


  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 

## During-after

During community

In [44]:
all_ranks = overall_ranking[1]['ranking'] # during
overall_avg_tau = 0
iter = 0
for community in all_ranks:
    for rank in ranking_da_pre:
        if community['community'] == rank['community']:

            df1 = pd.DataFrame(community)
            df2 = pd.DataFrame(rank)

            merged_df = pd.merge(df1, df2, on='techniques', how='outer', suffixes=('_file1', '_file2'))

            for tech in all_techniques:
                if tech not in merged_df['techniques'].values:
                    # add row
                    merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)

            # fill NaNs on frequencies
            merged_df['frequencies_file1'] = merged_df['frequencies_file1'].fillna(0)
            merged_df['frequencies_file2'] = merged_df['frequencies_file2'].fillna(0)

            # if all techniques are 0, skip community
            if merged_df['frequencies_file1'].sum() == 0 or merged_df['frequencies_file2'].sum() == 0:
                continue

            # Rank based on frequencies
            merged_df['rank_file1'] = merged_df['frequencies_file1'].rank(ascending=False)
            merged_df['rank_file2'] = merged_df['frequencies_file2'].rank(ascending=False)

            # Calculate correlation
            kendalltau_corr = kendalltau(merged_df['rank_file1'], merged_df['rank_file2'])

            overall_avg_tau += kendalltau_corr.correlation
            iter += 1

print(f"Average Kendall: {overall_avg_tau/iter}")


Average Kendall: 0.7945433263751986


  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 

After community

In [45]:
all_ranks = overall_ranking[2]['ranking'] # after
overall_avg_tau = 0
iter = 0
for community in all_ranks:
    for rank in ranking_da_post:
        if community['community'] == rank['community']:

            df1 = pd.DataFrame(community)
            df2 = pd.DataFrame(rank)

            merged_df = pd.merge(df1, df2, on='techniques', how='outer', suffixes=('_file1', '_file2'))

            for tech in all_techniques:
                if tech not in merged_df['techniques'].values:
                    # add row
                    merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)

            # fill NaNs on frequencies
            merged_df['frequencies_file1'] = merged_df['frequencies_file1'].fillna(0)
            merged_df['frequencies_file2'] = merged_df['frequencies_file2'].fillna(0)

            # if all techniques are 0, skip community
            if merged_df['frequencies_file1'].sum() == 0 or merged_df['frequencies_file2'].sum() == 0:
                continue

            # Rank based on frequencies
            merged_df['rank_file1'] = merged_df['frequencies_file1'].rank(ascending=False)
            merged_df['rank_file2'] = merged_df['frequencies_file2'].rank(ascending=False)

            # Calculate correlation
            kendalltau_corr = kendalltau(merged_df['rank_file1'], merged_df['rank_file2'])

            overall_avg_tau += kendalltau_corr.correlation
            iter += 1

print(f"Average Kendall: {overall_avg_tau/iter}")


  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 

Average Kendall: 0.6742469434670478


## Before-after

Before community

In [46]:
all_ranks = overall_ranking[0]['ranking'] # before
overall_avg_tau = 0
iter = 0
for community in all_ranks:
    for rank in ranking_ba_pre:
        if community['community'] == rank['community']:

            df1 = pd.DataFrame(community)
            df2 = pd.DataFrame(rank)

            merged_df = pd.merge(df1, df2, on='techniques', how='outer', suffixes=('_file1', '_file2'))

            for tech in all_techniques:
                if tech not in merged_df['techniques'].values:
                    # add row
                    merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)

            # fill NaNs on frequencies
            merged_df['frequencies_file1'] = merged_df['frequencies_file1'].fillna(0)
            merged_df['frequencies_file2'] = merged_df['frequencies_file2'].fillna(0)

            # if all techniques are 0, skip community
            if merged_df['frequencies_file1'].sum() == 0 or merged_df['frequencies_file2'].sum() == 0:
                continue

            # Rank based on frequencies
            merged_df['rank_file1'] = merged_df['frequencies_file1'].rank(ascending=False)
            merged_df['rank_file2'] = merged_df['frequencies_file2'].rank(ascending=False)

            # Calculate correlation
            kendalltau_corr = kendalltau(merged_df['rank_file1'], merged_df['rank_file2'])

            overall_avg_tau += kendalltau_corr.correlation
            iter += 1

print(f"Average Kendall: {overall_avg_tau/iter}")


Average Kendall: 0.7830923864433923


  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)


After community

In [47]:
all_ranks = overall_ranking[2]['ranking'] # after
overall_avg_tau = 0
iter = 0
for community in all_ranks:
    for rank in ranking_ba_post:
        if community['community'] == rank['community']:

            df1 = pd.DataFrame(community)
            df2 = pd.DataFrame(rank)

            merged_df = pd.merge(df1, df2, on='techniques', how='outer', suffixes=('_file1', '_file2'))

            for tech in all_techniques:
                if tech not in merged_df['techniques'].values:
                    # add row
                    merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)

            # fill NaNs on frequencies
            merged_df['frequencies_file1'] = merged_df['frequencies_file1'].fillna(0)
            merged_df['frequencies_file2'] = merged_df['frequencies_file2'].fillna(0)

            # if all techniques are 0, skip community
            if merged_df['frequencies_file1'].sum() == 0 or merged_df['frequencies_file2'].sum() == 0:
                continue

            # Rank based on frequencies
            merged_df['rank_file1'] = merged_df['frequencies_file1'].rank(ascending=False)
            merged_df['rank_file2'] = merged_df['frequencies_file2'].rank(ascending=False)

            # Calculate correlation
            kendalltau_corr = kendalltau(merged_df['rank_file1'], merged_df['rank_file2'])

            overall_avg_tau += kendalltau_corr.correlation
            iter += 1

print(f"Average Kendall: {overall_avg_tau/iter}")


  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 'frequencies_file2': 0}, ignore_index=True)
  merged_df = merged_df.append({'community_file1': np.nan, 'techniques': tech, 'frequencies_file1': 0, 'community_file2': np.nan, 

Average Kendall: 0.6760176200554981
