In [2]:
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd
import os

# Microscopic analysis of vote_swingers

With the term *swing voter* we refer to a user who changes their supporting party over a given observation period. To capture the emergence of the swing voter phenomenon, we leveraged a mesoscopic analysis at the community level to conduct a microscopic analysis at the user level. In this case, we associated each user in each timeframe with the political label of the community they belonged to. Essentially, we analyzed the migration process of users who switched their community of reference between subsequent election periods. 

## Loading the data...

In [16]:
basepath = "../../../data/04_communities/comms_with_political_label_csv"

before_labeled_users = pd.read_csv(os.path.join(basepath, 'pre_campaign_communities.csv'))
during_labeled_users = pd.read_csv(os.path.join(basepath, 'during_campaign_communities.csv'))
after_labeled_users = pd.read_csv(os.path.join(basepath, 'post_elections_communities.csv'))
labeled_representatives = pd.read_csv(os.path.join(basepath, 'representatives_communities.csv'))

In [5]:
before_labeled_users.head()

Unnamed: 0,user_id,community_id,comm_label,most_common_party,all_parties
0,431976775,0,FdI,Fratelli d'Italia,Fratelli d'Italia
1,1038909791966572544,0,FdI,Fratelli d'Italia,Fratelli d'Italia
2,72248630,0,FdI,Fratelli d'Italia,Fratelli d'Italia
3,837278820088623104,0,FdI,Fratelli d'Italia,Fratelli d'Italia
4,456172613,0,FdI,Fratelli d'Italia,Fratelli d'Italia


In [6]:
labeled_representatives.head()

Unnamed: 0,user_id,period,community_id,community_label
0,425752285,before,2,PD;AVS
1,425752285,during,4,PD;AVS
2,425752285,after,5,PD;AVS
3,1135141640,before,2,PD;AVS
4,1135141640,during,4,PD;AVS


In [7]:
# filter labeled_representatived based on period
before_representatives_df = labeled_representatives[labeled_representatives['period'] == 'before'].reset_index(drop=True)
during_representatives_df = labeled_representatives[labeled_representatives['period'] == 'during'].reset_index(drop=True)
after_representatives_df = labeled_representatives[labeled_representatives['period'] == 'after'].reset_index(drop=True)

## How many users stay active across the observation periods?

### All users

In [8]:
before_users = set(before_labeled_users['user_id'].values)
during_users = set(during_labeled_users['user_id'].values)
after_users = set(after_labeled_users['user_id'].values)

len(before_users), len(during_users), len(after_users)

(11844, 16992, 14874)

In [9]:
# get how many users remain active and how many users become inactive (either leave the platform or lurk)
active_bd = before_users.intersection(during_users)
inactive_bd = before_users.difference(during_users)
print('active before-during: ', len(active_bd), ' inactive before-during: ', len(inactive_bd))

active_da = during_users.intersection(after_users)
inactive_da = during_users.difference(after_users)
print('active during-after: ', len(active_da), ' inactive during-after: ', len(inactive_da))

active_ba = before_users.intersection(after_users)
inactive_ba = before_users.difference(after_users)
print('active before-after: ', len(active_ba), ' inactive before-after: ', len(inactive_ba))

active before-during:  7564  inactive before-during:  4280
active during-after:  9660  inactive during-after:  7332
active before-after:  6108  inactive before-after:  5736


### Representatives

In [10]:
before_representatives = set(before_representatives_df['user_id'].values)
during_representatives = set(during_representatives_df['user_id'].values)
after_representatives = set(after_representatives_df['user_id'].values)

len(before_representatives), len(during_representatives), len(after_representatives)

(98, 142, 90)

In [11]:
# get how many users remain active and how many users become inactive (either leave the platform or lurk)
active_repr_bd = before_representatives.intersection(during_representatives)
inactive_repr_bd = before_representatives.difference(during_representatives)
print('active before-during: ', len(active_repr_bd), ' inactive before-during: ', len(inactive_repr_bd))

active_repr_da = during_representatives.intersection(after_representatives)
inactive_repr_da = during_representatives.difference(after_representatives)
print('active during-after: ', len(active_repr_da), ' inactive during-after: ', len(inactive_repr_da))

active_repr_ba = before_representatives.intersection(after_representatives)
inactive_repr_ba = before_representatives.difference(after_representatives)
print('active before-after: ', len(active_repr_ba), ' inactive before-after: ', len(inactive_repr_ba))

active before-during:  84  inactive before-during:  14
active during-after:  82  inactive during-after:  60
active before-after:  69  inactive before-after:  29


## How many users change their party across periods?

### All users

In [12]:
output_path = "../../../data/05_swing_voters"

In [13]:
before_labeled_users.head()

Unnamed: 0,user_id,community_id,comm_label,most_common_party,all_parties
0,431976775,0,FdI,Fratelli d'Italia,Fratelli d'Italia
1,1038909791966572544,0,FdI,Fratelli d'Italia,Fratelli d'Italia
2,72248630,0,FdI,Fratelli d'Italia,Fratelli d'Italia
3,837278820088623104,0,FdI,Fratelli d'Italia,Fratelli d'Italia
4,456172613,0,FdI,Fratelli d'Italia,Fratelli d'Italia


In [19]:
def check_party(df1, df2, users_to_analyze):
    n_same_party = 0
    n_diff_party = 0
    party2party = {}
    # {user_id: user, party1: party, party2: party}
    vote_swingers = []

    for row in df1.iterrows():
        user1 = row[1]['user_id']
        comm_label1 = row[1]['community_label']

        if user1 not in users_to_analyze:
            continue

        comm_label2 = df2[df2['user_id'] == user1]['community_label'].values[0]

        parties1 = set(comm_label1.split(';')) if not pd.isnull(comm_label1) else set()
        parties2 = set(comm_label2.split(';')) if not pd.isnull(comm_label2) else set()

        if parties1 == parties2:
            n_same_party += 1
        else:
            n_diff_party += 1

            key = (comm_label1, comm_label2)
            party2party[key] = party2party.get(key, 0) + 1

            data = {'user': user1, 'party1': comm_label1, 'party2': comm_label2}
            vote_swingers.append(data)

    return n_same_party, n_diff_party, party2party, vote_swingers

#### Before campaign -> electoral campaign

In [20]:
n_same_party_bd, n_diff_party_bd, party_vote_swingers_bd, vote_swingers_bd = check_party(before_labeled_users, during_labeled_users, active_bd)
print('total active users across the first period', len(active_bd))
print('same party: ', n_same_party_bd, ' diff party: ', n_diff_party_bd)
print('same party: ', n_same_party_bd/len(active_bd), ' diff party: ', n_diff_party_bd/len(active_bd))

total active users across the first period 7564
same party:  2240  diff party:  5324
same party:  0.29613960867266  diff party:  0.70386039132734


In [21]:
len(vote_swingers_bd)

5324

In [22]:
# from list of json objects to pandas dataframe
vote_swingers_df = pd.DataFrame(vote_swingers_bd)
vote_swingers_df.head()

fout = f'{output_path}/swing_voters_bd.csv'
vote_swingers_df.to_csv(fout, index=False)

In [23]:
# sort by value party_vote_swingers_bd
sorted_party_vote_swingers_bd = sorted(party_vote_swingers_bd.items(), key=lambda kv: kv[1], reverse=True)
sorted_party_vote_swingers_bd

[(('L;Az-Iv', 'Az-Iv'), 1808),
 (('M5s', 'M5s;FdI'), 1320),
 (('L;FdI', 'FI;L;PD;FdI;NM'), 914),
 (('L;FdI', nan), 191),
 ((nan, 'FI;L;PD;FdI;NM'), 161),
 (('FdI', 'PD'), 108),
 (('L;Az-Iv', 'AVS;PD'), 100),
 (('M5s', nan), 81),
 (('AVS;PD', 'Az-Iv'), 76),
 (('AVS;PD', 'M5s;FdI'), 64),
 (('M5s', 'AVS;PD'), 61),
 (('FI', 'FI;L;PD;FdI;NM'), 46),
 (('FdI', 'AVS;PD'), 39),
 (('AVS;PD', nan), 35),
 (('L;Az-Iv', nan), 35),
 (('FdI', nan), 34),
 (('M5s', 'PD'), 34),
 (('FdI', 'Az-Iv'), 32),
 ((nan, 'M5s;FdI'), 26),
 (('L;FdI', 'Az-Iv'), 20),
 (('FdI', 'FI;L;PD;FdI;NM'), 19),
 (('FdI', 'M5s;FdI'), 15),
 (('L;FdI', 'PD'), 15),
 (('AVS;PD', 'PD'), 13),
 (('L;Az-Iv', 'PD'), 13),
 ((nan, 'AVS;PD'), 12),
 (('L;Az-Iv', 'FI;L;PD;FdI;NM'), 9),
 ((nan, 'PD'), 9),
 (('M5s', 'FI;L;PD;FdI;NM'), 8),
 (('FI', nan), 6),
 (('AVS;PD', 'FI;L;PD;FdI;NM'), 5),
 (('L;Az-Iv', 'M5s;FdI'), 5),
 (('L;FdI', 'AVS;PD'), 4),
 (('L;FdI', 'M5s;FdI'), 2),
 ((nan, 'Az-Iv'), 2),
 (('FI', 'PD'), 1),
 (('FI', 'Az-Iv'), 1)]

#### Eletoral campaign -> After elections

In [24]:
n_same_party_da, n_diff_party_da, party_vote_swingers_da, vote_swingers_da = check_party(during_labeled_users, after_labeled_users, active_da)
print('total active users across the second period', len(active_da))
print('same party: ', n_same_party_da, ' diff party: ', n_diff_party_da)
print('same party: ', n_same_party_da/len(active_da), ' diff party: ', n_diff_party_da/len(active_da))

total active users across the second period 9660
same party:  1357  diff party:  8303
same party:  0.14047619047619048  diff party:  0.8595238095238096


In [25]:
vote_swingers_df = pd.DataFrame(vote_swingers_da)
vote_swingers_df.head()

fout = f'{output_path}/vote_swingers_da.csv'
vote_swingers_df.to_csv(fout, index=False)

In [26]:
sorted_party_vote_swingers_da = sorted(party_vote_swingers_da.items(), key=lambda kv: kv[1], reverse=True)
sorted_party_vote_swingers_da

[(('Az-Iv', 'PD;Az-Iv'), 1877),
 ((nan, 'L;FdI'), 1757),
 (('M5s;FdI', 'M5s'), 1551),
 (('FI;L;PD;FdI;NM', 'L;FdI'), 898),
 (('FI;L;PD;FdI;NM', 'FI;FdI'), 673),
 (('PD', 'Az-Iv;FdI'), 200),
 (('Az-Iv', 'AVS;PD'), 128),
 ((nan, 'Az-Iv;FdI'), 110),
 (('Az-Iv', 'Az-Iv;FdI'), 103),
 (('AVS;PD', 'PD;Az-Iv'), 96),
 (('AVS;PD', 'Az-Iv;FdI'), 94),
 (('M5s;FdI', 'Az-Iv;FdI'), 92),
 (('PD', 'AVS;PD'), 91),
 ((nan, 'M5s'), 90),
 (('M5s;FdI', 'AVS;PD'), 88),
 ((nan, 'AVS;PD'), 87),
 (('AVS;PD', 'M5s'), 50),
 ((nan, 'FI;FdI'), 46),
 (('FI;L;PD;FdI;NM', 'Az-Iv;FdI'), 42),
 (('M5s;FdI', 'L;FdI'), 24),
 (('FI;L;PD;FdI;NM', 'AVS;PD'), 24),
 (('AVS;PD', nan), 22),
 (('FI;L;PD;FdI;NM', 'PD;Az-Iv'), 20),
 ((nan, 'PD;Az-Iv'), 19),
 (('FI;L;PD;FdI;NM', 'NM;FdI'), 17),
 (('Az-Iv', 'FI;FdI'), 10),
 (('PD', 'M5s'), 9),
 (('Az-Iv', 'L;FdI'), 9),
 (('M5s;FdI', nan), 8),
 (('PD', nan), 8),
 (('PD', 'L;FdI'), 8),
 (('PD', 'PD;Az-Iv'), 8),
 (('PD', 'FI;FdI'), 7),
 (('FI;L;PD;FdI;NM', nan), 6),
 (('AVS;PD', 'FI;FdI'

#### Before campaign -> After elections

In [27]:
n_same_party_ba, n_diff_party_ba, party_swinger_ba, vote_swingers_ba = check_party(before_labeled_users, after_labeled_users, active_ba)
print('same party: ', n_same_party_ba, ' diff party: ', n_diff_party_ba)

same party:  2409  diff party:  3699


In [28]:
vote_swingers_df = pd.DataFrame(vote_swingers_ba)
vote_swingers_df.head()

fout = f'{output_path}/vote_swingers_ba.csv'
vote_swingers_df.to_csv(fout, index=False)

In [29]:
sorted_party_vote_swingers_ba = sorted(party_swinger_ba.items(), key=lambda kv: kv[1], reverse=True)
sorted_party_vote_swingers_ba

[(('L;Az-Iv', 'PD;Az-Iv'), 1313),
 ((nan, 'L;FdI'), 1046),
 (('L;FdI', 'FI;FdI'), 283),
 (('FdI', 'Az-Iv;FdI'), 122),
 (('L;Az-Iv', 'AVS;PD'), 85),
 (('M5s', 'Az-Iv;FdI'), 81),
 (('M5s', 'AVS;PD'), 81),
 (('FdI', 'AVS;PD'), 76),
 (('L;Az-Iv', 'Az-Iv;FdI'), 75),
 (('AVS;PD', 'PD;Az-Iv'), 55),
 (('AVS;PD', 'M5s'), 52),
 (('AVS;PD', 'Az-Iv;FdI'), 52),
 ((nan, 'Az-Iv;FdI'), 44),
 (('L;FdI', 'Az-Iv;FdI'), 37),
 (('FI', 'FI;FdI'), 33),
 ((nan, 'FI;FdI'), 30),
 ((nan, 'AVS;PD'), 24),
 ((nan, 'M5s'), 21),
 (('FdI', 'PD;Az-Iv'), 18),
 (('AVS;PD', nan), 17),
 (('L;FdI', 'PD;Az-Iv'), 16),
 (('FdI', 'FI;FdI'), 15),
 (('FdI', 'M5s'), 14),
 (('M5s', 'L;FdI'), 13),
 (('FdI', 'L;FdI'), 11),
 (('M5s', nan), 11),
 (('L;Az-Iv', 'FI;FdI'), 9),
 (('L;FdI', nan), 9),
 (('L;Az-Iv', 'L;FdI'), 8),
 (('L;FdI', 'AVS;PD'), 8),
 (('FdI', nan), 5),
 (('FI', nan), 5),
 (('L;FdI', 'NM;FdI'), 5),
 ((nan, 'PD;Az-Iv'), 5),
 (('M5s', 'FI;FdI'), 4),
 (('L;Az-Iv', nan), 3),
 (('AVS;PD', 'L;FdI'), 2),
 (('AVS;PD', 'NM;FdI')

### Representatives

#### Before campaign -> electoral campaign

In [30]:
repr_n_same_party_bd, repr_n_diff_party_bd, repr_party_vote_swingers_bd, repr_vote_swingers_bd = check_party(before_representatives_df, during_representatives_df, active_repr_bd)
print('total active users across the first period', len(active_repr_bd))
print('same party: ', repr_n_same_party_bd, ' diff party: ', repr_n_diff_party_bd)
print('same party: ', repr_n_same_party_bd/len(active_repr_bd), ' diff party: ', repr_n_diff_party_bd/len(active_repr_bd))

total active users across the first period 84
same party:  28  diff party:  56
same party:  0.3333333333333333  diff party:  0.6666666666666666


In [31]:
# from list of json objects to pandas dataframe
repr_vote_swingers_df = pd.DataFrame(repr_vote_swingers_bd)
repr_vote_swingers_df.head()

fout = f'{output_path}/repr_vote_swingers_bd.csv'
repr_vote_swingers_df.to_csv(fout, index=False)

In [32]:
# sort by value party_vote_swingers_bd
sorted_repr_party_vote_swingers_bd = sorted(repr_party_vote_swingers_bd.items(), key=lambda kv: kv[1], reverse=True)
sorted_repr_party_vote_swingers_bd

[(('FdI;L', 'FdI;PD;FI;NM;L'), 27),
 (('Az-Iv;L', 'Az-Iv'), 18),
 (('FI', 'FdI;PD;FI;NM;L'), 6),
 (('M5s', 'FdI;M5s'), 2),
 (('FdI', 'FdI;PD;FI;NM;L'), 1),
 (('Az-Iv;L', 'FdI;PD;FI;NM;L'), 1),
 (('PD;AVS', 'PD'), 1)]

##### Eletoral campaign -> After elections

In [33]:
repr_n_same_party_da, repr_n_diff_party_da, repr_party_vote_swingers_da, repr_vote_swingers_da = check_party(during_representatives_df, after_representatives_df, active_repr_da)
print('total active users across the second period', len(active_repr_da))
print('same party: ', repr_n_same_party_da, ' diff party: ', repr_n_diff_party_da)
print('same party: ', repr_n_same_party_da/len(active_repr_da), ' diff party: ', repr_n_diff_party_da/len(active_repr_da))

total active users across the second period 82
same party:  18  diff party:  64
same party:  0.21951219512195122  diff party:  0.7804878048780488


In [34]:
repr_vote_swingers_df = pd.DataFrame(repr_vote_swingers_da)
repr_vote_swingers_df.head()

fout = f'{output_path}/repr_vote_swingers_da.csv'
repr_vote_swingers_df.to_csv(fout, index=False)

#### Before campaign -> After elections

In [35]:
repr_n_same_party_ba, repr_n_diff_party_ba, repr_party_swinger_ba, repr_vote_swingers_ba = check_party(before_representatives_df, after_representatives_df, active_repr_ba)
print('same party: ', repr_n_same_party_ba, ' diff party: ', repr_n_diff_party_ba)

same party:  26  diff party:  43


In [36]:
repr_vote_swingers_df = pd.DataFrame(repr_vote_swingers_ba)
repr_vote_swingers_df.head()

fout = f'{output_path}/repr_vote_swingers_ba.csv'
repr_vote_swingers_df.to_csv(fout, index=False)