In [1]:
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd
import os

### Microscopic analysis of swingers

#### Loading the data...

In [2]:
basepath = "../../../results/SNAM/swinger_detection/retweets"

before_labeled_users = pd.read_csv(os.path.join(basepath, 'before_campaign_labeled_users.csv'))
during_labeled_users = pd.read_csv(os.path.join(basepath, 'during_campaign_labeled_users.csv'))
after_labeled_users = pd.read_csv(os.path.join(basepath, 'after_elections_labeled_users.csv'))

#### How many users stay active across periods?

In [3]:
before_users = set(before_labeled_users['source'].values)
during_users = set(during_labeled_users['source'].values)
after_users = set(after_labeled_users['source'].values)

len(before_users), len(during_users), len(after_users)

(499, 666, 370)

In [4]:
# get how many users remain active and how many users become inactive (either leave the platform or lurk)
active_bd = before_users.intersection(during_users)
inactive_bd = before_users.difference(during_users)
print('active before-during: ', len(active_bd), ' inactive before-during: ', len(inactive_bd))

active_da = during_users.intersection(after_users)
inactive_da = during_users.difference(after_users)
print('active during-after: ', len(active_da), ' inactive during-after: ', len(inactive_da))

active_ba = before_users.intersection(after_users)
inactive_ba = before_users.difference(after_users)
print('active before-after: ', len(active_ba), ' inactive before-after: ', len(inactive_ba))

active before-during:  227  inactive before-during:  272
active during-after:  258  inactive during-after:  408
active before-after:  153  inactive before-after:  346


#### How many users change their party across periods?

In [26]:
output_path = "../../../results/SNAM/swinger_detection/retweets"

In [6]:
before_labeled_users.head()

Unnamed: 0,source,Party,weight
0,1001091645515935744,Alleanza Verdi Sinistra,15
1,1001344387102728192,Azione - Italia Viva,18
2,1002639111205871616,Lega,8
3,1004874680,Azione - Italia Viva,53
4,1005980574,Azione - Italia Viva,9


In [22]:
def check_party(df1, df2, users_to_analyze):
    n_same_party = 0
    n_diff_party = 0
    party2party = {}
    # {user_id: user, party1: party, party2: party}
    swingers = []

    for row in df1.iterrows():
        user1 = row[1]['source']
        party1 = row[1]['Party']

        if user1 not in users_to_analyze:
            continue

        # user2 = df2[df2['source'] == user1]['source'].values[0]
        party2 = df2[df2['source'] == user1]['Party'].values[0]

        if party1 == party2:
            n_same_party += 1
        else:
            n_diff_party += 1

            key = (party1, party2)
            party2party[key] = party2party.get(key, 0) + 1

            data = {'user': user1, 'party1': party1, 'party2': party2}
            swingers.append(data)

    return n_same_party, n_diff_party, party2party, swingers

##### Before campaign -> electoral campaign

In [23]:
n_same_party_bd, n_diff_party_bd, party_swingers_bd, swingers_bd = check_party(before_labeled_users, during_labeled_users, active_bd)
print('total active users across the first period', len(active_bd))
print('same party: ', n_same_party_bd, ' diff party: ', n_diff_party_bd)
print('same party: ', n_same_party_bd/len(active_bd), ' diff party: ', n_diff_party_bd/len(active_bd))

total active users across the first period 227
same party:  216  diff party:  11
same party:  0.9515418502202643  diff party:  0.048458149779735685


In [27]:
# from list of json objects to pandas dataframe
swingers_df = pd.DataFrame(swingers_bd)
swingers_df.head()

fout = f'{output_path}/swingers_bd.csv'
swingers_df.to_csv(fout, index=False)

In [9]:
# sort by value party_swingers_bd
sorted_party_swingers_bd = sorted(party_swingers_bd.items(), key=lambda kv: kv[1], reverse=True)
sorted_party_swingers_bd

[(('Partito Democratico', 'Azione - Italia Viva'), 3),
 (('Azione - Italia Viva', 'Partito Democratico'), 2),
 (("Fratelli d'Italia", 'Lega'), 2),
 (('Alleanza Verdi Sinistra', 'Partito Democratico'), 2),
 (("Fratelli d'Italia", 'Forza Italia'), 1),
 (('Lega', 'Azione - Italia Viva'), 1)]

##### Eletoral campaign -> After elections

In [28]:
n_same_party_da, n_diff_party_da, party_swingers_da, swingers_da = check_party(during_labeled_users, after_labeled_users, active_da)
print('total active users across the second period', len(active_da))
print('same party: ', n_same_party_da, ' diff party: ', n_diff_party_da)
print('same party: ', n_same_party_da/len(active_da), ' diff party: ', n_diff_party_da/len(active_da))

total active users across the second period 258
same party:  244  diff party:  14
same party:  0.9457364341085271  diff party:  0.05426356589147287


In [30]:
swingers_df = pd.DataFrame(swingers_da)
swingers_df.head()

fout = f'{output_path}/swingers_da.csv'
swingers_df.to_csv(fout, index=False)

In [29]:
sorted_party_swingers_da = sorted(party_swingers_da.items(), key=lambda kv: kv[1], reverse=True)
sorted_party_swingers_da

[(("Fratelli d'Italia", 'Lega'), 5),
 (('Azione - Italia Viva', 'Partito Democratico'), 2),
 (('Forza Italia', "Fratelli d'Italia"), 2),
 (("Fratelli d'Italia", 'Azione - Italia Viva'), 1),
 (('Partito Democratico', 'Alleanza Verdi Sinistra'), 1),
 (('Forza Italia', 'Lega'), 1),
 (('Lega', "Fratelli d'Italia"), 1),
 (('Forza Italia', 'Azione - Italia Viva'), 1)]

##### Before campaign -> After elections

In [32]:
n_same_party_ba, n_diff_party_ba, party_swinger_ba, swingers_ba = check_party(before_labeled_users, after_labeled_users, active_ba)
print('same party: ', n_same_party_ba, ' diff party: ', n_diff_party_ba)

same party:  141  diff party:  12


In [33]:
swingers_df = pd.DataFrame(swingers_ba)
swingers_df.head()

fout = f'{output_path}/swingers_ba.csv'
swingers_df.to_csv(fout, index=False)

In [13]:
sorted_party_swingers_ba = sorted(party_swinger_ba.items(), key=lambda kv: kv[1], reverse=True)
sorted_party_swingers_ba

[(("Fratelli d'Italia", 'Lega'), 5),
 (('Azione - Italia Viva', 'Partito Democratico'), 2),
 (('Partito Democratico', 'Azione - Italia Viva'), 2),
 (('Lega', "Fratelli d'Italia"), 1),
 (('Movimento 5s', 'Partito Democratico'), 1),
 (('Forza Italia', 'Azione - Italia Viva'), 1)]