In [1]:
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd
import os

## Microscopic analysis of swingers

### Loading the data...

In [62]:
basepath = "../../../results/SNAM/communities/comms_composition_csv"

before_labeled_users = pd.read_csv(os.path.join(basepath, 'before_communities.csv'))
during_labeled_users = pd.read_csv(os.path.join(basepath, 'during_communities.csv'))
after_labeled_users = pd.read_csv(os.path.join(basepath, 'after_communities.csv'))
labeled_representatives = pd.read_csv(os.path.join(basepath, 'representatives_communities.csv'))

In [3]:
before_labeled_users.head()

Unnamed: 0,user_id,community_id,comm_label,most_common_party,all_parties
0,1084450777748459520,0,FdI,Fratelli d'Italia,Fratelli d'Italia
1,1524854601295577088,0,FdI,Fratelli d'Italia,Fratelli d'Italia
2,1401427332657713152,0,FdI,Fratelli d'Italia,Fratelli d'Italia
3,1224753522,0,FdI,Fratelli d'Italia,Fratelli d'Italia
4,823182867773669376,0,FdI,Fratelli d'Italia,Fratelli d'Italia


In [63]:
labeled_representatives.head()

Unnamed: 0,user_id,period,comm_id,comm_label
0,425752285,before,4,PD;AVS
1,425752285,during,3,PD;AVS
2,425752285,after,6,PD;AVS
3,1135141640,before,4,PD;AVS
4,1135141640,during,3,PD;AVS


In [64]:
# filter labeled_representatived based on period
before_representatives_df = labeled_representatives[labeled_representatives['period'] == 'before'].reset_index(drop=True)
during_representatives_df = labeled_representatives[labeled_representatives['period'] == 'during'].reset_index(drop=True)
after_representatives_df = labeled_representatives[labeled_representatives['period'] == 'after'].reset_index(drop=True)

### How many users stay active across periods?

##### All users

In [5]:
before_users = set(before_labeled_users['user_id'].values)
during_users = set(during_labeled_users['user_id'].values)
after_users = set(after_labeled_users['user_id'].values)

len(before_users), len(during_users), len(after_users)

(11844, 16992, 14874)

In [6]:
# get how many users remain active and how many users become inactive (either leave the platform or lurk)
active_bd = before_users.intersection(during_users)
inactive_bd = before_users.difference(during_users)
print('active before-during: ', len(active_bd), ' inactive before-during: ', len(inactive_bd))

active_da = during_users.intersection(after_users)
inactive_da = during_users.difference(after_users)
print('active during-after: ', len(active_da), ' inactive during-after: ', len(inactive_da))

active_ba = before_users.intersection(after_users)
inactive_ba = before_users.difference(after_users)
print('active before-after: ', len(active_ba), ' inactive before-after: ', len(inactive_ba))

active before-during:  7564  inactive before-during:  4280
active during-after:  9660  inactive during-after:  7332
active before-after:  6108  inactive before-after:  5736


##### Representatives

In [57]:
before_representatives = set(before_representatives_df['id'].values)
during_representatives = set(during_representatives_df['id'].values)
after_representatives = set(after_representatives_df['id'].values)

len(before_representatives), len(during_representatives), len(after_representatives)

(98, 142, 90)

In [78]:
# get how many users remain active and how many users become inactive (either leave the platform or lurk)
active_repr_bd = before_representatives.intersection(during_representatives)
inactive_repr_bd = before_representatives.difference(during_representatives)
print('active before-during: ', len(active_repr_bd), ' inactive before-during: ', len(inactive_repr_bd))

active_repr_da = during_representatives.intersection(after_representatives)
inactive_repr_da = during_representatives.difference(after_representatives)
print('active during-after: ', len(active_repr_da), ' inactive during-after: ', len(inactive_repr_da))

active_repr_ba = before_users.intersection(after_users)
inactive_repr_ba = before_users.difference(after_users)
print('active before-after: ', len(active_repr_ba), ' inactive before-after: ', len(inactive_repr_ba))

active before-during:  84  inactive before-during:  14
active during-after:  82  inactive during-after:  60
active before-after:  6108  inactive before-after:  5736


### How many users change their party across periods?

#### All users

In [7]:
output_path = "../../../results/SNAM/swinger_detection/communities"

In [8]:
before_labeled_users.head()

Unnamed: 0,user_id,community_id,comm_label,most_common_party,all_parties
0,1084450777748459520,0,FdI,Fratelli d'Italia,Fratelli d'Italia
1,1524854601295577088,0,FdI,Fratelli d'Italia,Fratelli d'Italia
2,1401427332657713152,0,FdI,Fratelli d'Italia,Fratelli d'Italia
3,1224753522,0,FdI,Fratelli d'Italia,Fratelli d'Italia
4,823182867773669376,0,FdI,Fratelli d'Italia,Fratelli d'Italia


In [36]:
def check_party(df1, df2, users_to_analyze):
    n_same_party = 0
    n_diff_party = 0
    party2party = {}
    # {user_id: user, party1: party, party2: party}
    swingers = []

    for row in df1.iterrows():
        user1 = row[1]['user_id']
        comm_label1 = row[1]['comm_label']

        if user1 not in users_to_analyze:
            continue

        comm_label2 = df2[df2['user_id'] == user1]['comm_label'].values[0]

        parties1 = set(comm_label1.split(';')) if not pd.isnull(comm_label1) else set()
        parties2 = set(comm_label2.split(';')) if not pd.isnull(comm_label2) else set()

        if parties1 == parties2:
            n_same_party += 1
        else:
            n_diff_party += 1

            key = (comm_label1, comm_label2)
            party2party[key] = party2party.get(key, 0) + 1

            data = {'user': user1, 'party1': comm_label1, 'party2': comm_label2}
            swingers.append(data)

    return n_same_party, n_diff_party, party2party, swingers

##### Before campaign -> electoral campaign

In [37]:
n_same_party_bd, n_diff_party_bd, party_swingers_bd, swingers_bd = check_party(before_labeled_users, during_labeled_users, active_bd)
print('total active users across the first period', len(active_bd))
print('same party: ', n_same_party_bd, ' diff party: ', n_diff_party_bd)
print('same party: ', n_same_party_bd/len(active_bd), ' diff party: ', n_diff_party_bd/len(active_bd))

total active users across the first period 7564
same party:  2240  diff party:  5324
same party:  0.29613960867266  diff party:  0.70386039132734


In [41]:
len(swingers_bd)

5324

In [39]:
# from list of json objects to pandas dataframe
swingers_df = pd.DataFrame(swingers_bd)
swingers_df.head()

fout = f'{output_path}/swingers_bd.csv'
swingers_df.to_csv(fout, index=False)

In [42]:
# sort by value party_swingers_bd
sorted_party_swingers_bd = sorted(party_swingers_bd.items(), key=lambda kv: kv[1], reverse=True)
sorted_party_swingers_bd

[(('L;Az-Iv', 'Az-Iv'), 1808),
 (('M5s', 'FdI;M5s'), 1320),
 (('L;FdI', 'NM;FI;FdI;PD;L'), 914),
 (('L;FdI', nan), 191),
 ((nan, 'NM;FI;FdI;PD;L'), 161),
 (('FdI', 'PD'), 108),
 (('L;Az-Iv', 'PD;AVS'), 100),
 (('M5s', nan), 81),
 (('PD;AVS', 'Az-Iv'), 76),
 (('PD;AVS', 'FdI;M5s'), 64),
 (('M5s', 'PD;AVS'), 61),
 (('FI', 'NM;FI;FdI;PD;L'), 46),
 (('FdI', 'PD;AVS'), 39),
 (('PD;AVS', nan), 35),
 (('L;Az-Iv', nan), 35),
 (('FdI', nan), 34),
 (('M5s', 'PD'), 34),
 (('FdI', 'Az-Iv'), 32),
 ((nan, 'FdI;M5s'), 26),
 (('L;FdI', 'Az-Iv'), 20),
 (('FdI', 'NM;FI;FdI;PD;L'), 19),
 (('FdI', 'FdI;M5s'), 15),
 (('L;FdI', 'PD'), 15),
 (('PD;AVS', 'PD'), 13),
 (('L;Az-Iv', 'PD'), 13),
 ((nan, 'PD;AVS'), 12),
 (('L;Az-Iv', 'NM;FI;FdI;PD;L'), 9),
 ((nan, 'PD'), 9),
 (('M5s', 'NM;FI;FdI;PD;L'), 8),
 (('FI', nan), 6),
 (('PD;AVS', 'NM;FI;FdI;PD;L'), 5),
 (('L;Az-Iv', 'FdI;M5s'), 5),
 (('L;FdI', 'PD;AVS'), 4),
 (('L;FdI', 'FdI;M5s'), 2),
 ((nan, 'Az-Iv'), 2),
 (('FI', 'PD'), 1),
 (('FI', 'Az-Iv'), 1)]

##### Eletoral campaign -> After elections

In [75]:
n_same_party_da, n_diff_party_da, party_swingers_da, swingers_da = check_party(during_labeled_users, after_labeled_users, active_da)
print('total active users across the second period', len(active_da))
print('same party: ', n_same_party_da, ' diff party: ', n_diff_party_da)
print('same party: ', n_same_party_da/len(active_da), ' diff party: ', n_diff_party_da/len(active_da))

total active users across the second period 9660
same party:  1357  diff party:  8303
same party:  0.14047619047619048  diff party:  0.8595238095238096


In [76]:
swingers_df = pd.DataFrame(swingers_da)
swingers_df.head()

fout = f'{output_path}/swingers_da.csv'
swingers_df.to_csv(fout, index=False)

In [45]:
sorted_party_swingers_da = sorted(party_swingers_da.items(), key=lambda kv: kv[1], reverse=True)
sorted_party_swingers_da

[(('Az-Iv', 'PD;Az-Iv'), 1877),
 ((nan, 'L;FdI'), 1757),
 (('FdI;M5s', 'M5s'), 1551),
 (('NM;FI;FdI;PD;L', 'L;FdI'), 898),
 (('NM;FI;FdI;PD;L', 'FdI;FI'), 673),
 (('PD', 'FdI;Az-Iv'), 200),
 (('Az-Iv', 'PD;AVS'), 128),
 ((nan, 'FdI;Az-Iv'), 110),
 (('Az-Iv', 'FdI;Az-Iv'), 103),
 (('PD;AVS', 'PD;Az-Iv'), 96),
 (('PD;AVS', 'FdI;Az-Iv'), 94),
 (('FdI;M5s', 'FdI;Az-Iv'), 92),
 (('PD', 'PD;AVS'), 91),
 ((nan, 'M5s'), 90),
 (('FdI;M5s', 'PD;AVS'), 88),
 ((nan, 'PD;AVS'), 87),
 (('PD;AVS', 'M5s'), 50),
 ((nan, 'FdI;FI'), 46),
 (('NM;FI;FdI;PD;L', 'FdI;Az-Iv'), 42),
 (('FdI;M5s', 'L;FdI'), 24),
 (('NM;FI;FdI;PD;L', 'PD;AVS'), 24),
 (('PD;AVS', nan), 22),
 (('NM;FI;FdI;PD;L', 'PD;Az-Iv'), 20),
 ((nan, 'PD;Az-Iv'), 19),
 (('NM;FI;FdI;PD;L', 'NM;FdI'), 17),
 (('Az-Iv', 'FdI;FI'), 10),
 (('PD', 'M5s'), 9),
 (('Az-Iv', 'L;FdI'), 9),
 (('FdI;M5s', nan), 8),
 (('PD', 'L;FdI'), 8),
 (('PD', 'PD;Az-Iv'), 8),
 (('PD', nan), 8),
 (('PD', 'FdI;FI'), 7),
 (('NM;FI;FdI;PD;L', nan), 6),
 (('PD;AVS', 'FdI;FI'

##### Before campaign -> After elections

In [46]:
n_same_party_ba, n_diff_party_ba, party_swinger_ba, swingers_ba = check_party(before_labeled_users, after_labeled_users, active_ba)
print('same party: ', n_same_party_ba, ' diff party: ', n_diff_party_ba)

same party:  2409  diff party:  3699


In [47]:
swingers_df = pd.DataFrame(swingers_ba)
swingers_df.head()

fout = f'{output_path}/swingers_ba.csv'
swingers_df.to_csv(fout, index=False)

In [48]:
sorted_party_swingers_ba = sorted(party_swinger_ba.items(), key=lambda kv: kv[1], reverse=True)
sorted_party_swingers_ba

[(('L;Az-Iv', 'PD;Az-Iv'), 1313),
 ((nan, 'L;FdI'), 1046),
 (('L;FdI', 'FdI;FI'), 283),
 (('FdI', 'FdI;Az-Iv'), 122),
 (('L;Az-Iv', 'PD;AVS'), 85),
 (('M5s', 'FdI;Az-Iv'), 81),
 (('M5s', 'PD;AVS'), 81),
 (('FdI', 'PD;AVS'), 76),
 (('L;Az-Iv', 'FdI;Az-Iv'), 75),
 (('PD;AVS', 'PD;Az-Iv'), 55),
 (('PD;AVS', 'M5s'), 52),
 (('PD;AVS', 'FdI;Az-Iv'), 52),
 ((nan, 'FdI;Az-Iv'), 44),
 (('L;FdI', 'FdI;Az-Iv'), 37),
 (('FI', 'FdI;FI'), 33),
 ((nan, 'FdI;FI'), 30),
 ((nan, 'PD;AVS'), 24),
 ((nan, 'M5s'), 21),
 (('FdI', 'PD;Az-Iv'), 18),
 (('PD;AVS', nan), 17),
 (('L;FdI', 'PD;Az-Iv'), 16),
 (('FdI', 'FdI;FI'), 15),
 (('FdI', 'M5s'), 14),
 (('M5s', 'L;FdI'), 13),
 (('FdI', 'L;FdI'), 11),
 (('M5s', nan), 11),
 (('L;Az-Iv', 'FdI;FI'), 9),
 (('L;FdI', nan), 9),
 (('L;Az-Iv', 'L;FdI'), 8),
 (('L;FdI', 'PD;AVS'), 8),
 (('FdI', nan), 5),
 (('FI', nan), 5),
 (('L;FdI', 'NM;FdI'), 5),
 ((nan, 'PD;Az-Iv'), 5),
 (('M5s', 'FdI;FI'), 4),
 (('L;Az-Iv', nan), 3),
 (('PD;AVS', 'L;FdI'), 2),
 (('PD;AVS', 'NM;FdI')

#### Representatives

##### Before campaign -> electoral campaign

In [65]:
repr_n_same_party_bd, repr_n_diff_party_bd, repr_party_swingers_bd, repr_swingers_bd = check_party(before_representatives_df, during_representatives_df, active_repr_bd)
print('total active users across the first period', len(active_repr_bd))
print('same party: ', repr_n_same_party_bd, ' diff party: ', repr_n_diff_party_bd)
print('same party: ', repr_n_same_party_bd/len(active_repr_bd), ' diff party: ', repr_n_diff_party_bd/len(active_repr_bd))

total active users across the first period 84
same party:  28  diff party:  56
same party:  0.3333333333333333  diff party:  0.6666666666666666


In [66]:
# from list of json objects to pandas dataframe
repr_swingers_df = pd.DataFrame(repr_swingers_bd)
repr_swingers_df.head()

fout = f'{output_path}/repr_swingers_bd.csv'
repr_swingers_df.to_csv(fout, index=False)

In [68]:
# sort by value party_swingers_bd
sorted_repr_party_swingers_bd = sorted(repr_party_swingers_bd.items(), key=lambda kv: kv[1], reverse=True)
sorted_repr_party_swingers_bd

[(('FdI;L', 'FdI;L;PD;NM;FI'), 27),
 (('Az-Iv;L', 'Az-Iv'), 18),
 (('FI', 'FdI;L;PD;NM;FI'), 6),
 (('M5s', 'FdI;M5s'), 2),
 (('FdI', 'FdI;L;PD;NM;FI'), 1),
 (('Az-Iv;L', 'FdI;L;PD;NM;FI'), 1),
 (('PD;AVS', 'PD'), 1)]

##### Eletoral campaign -> After elections

In [69]:
repr_n_same_party_da, repr_n_diff_party_da, repr_party_swingers_da, repr_swingers_da = check_party(during_representatives_df, after_representatives_df, active_repr_da)
print('total active users across the second period', len(active_repr_da))
print('same party: ', repr_n_same_party_da, ' diff party: ', repr_n_diff_party_da)
print('same party: ', repr_n_same_party_da/len(active_repr_da), ' diff party: ', repr_n_diff_party_da/len(active_repr_da))

total active users across the second period 82
same party:  18  diff party:  64
same party:  0.21951219512195122  diff party:  0.7804878048780488


In [72]:
repr_swingers_df = pd.DataFrame(repr_swingers_da)
repr_swingers_df.head()

fout = f'{output_path}/repr_swingers_da.csv'
repr_swingers_df.to_csv(fout, index=False)

##### Before campaign -> After elections

In [77]:
repr_n_same_party_ba, repr_n_diff_party_ba, repr_party_swinger_ba, repr_swingers_ba = check_party(before_representatives_df, after_representatives_df, active_repr_ba)
print('same party: ', repr_n_same_party_ba, ' diff party: ', repr_n_diff_party_ba)

same party:  26  diff party:  43


In [79]:
repr_swingers_df = pd.DataFrame(repr_swingers_ba)
repr_swingers_df.head()

fout = f'{output_path}/repr_swingers_ba.csv'
repr_swingers_df.to_csv(fout, index=False)