In [1]:
import matplotlib.pyplot as plt
import networkx as nx
import os
import pandas as pd

### Microscopic analysis of swingers

#### Loading the data

In [3]:
representatives_df = pd.read_csv("../../../data/raw_data/twitter_representatives_handles_final.csv")

In [4]:
representatives_df.head()

Unnamed: 0.5,Unnamed: 0.4,Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Name,Twitter-Handle,Party,Gender,ids
0,0,0,0,0,0,Bonelli Angelo,AngeloBonelli1,Alleanza Verdi Sinistra,M,425752285
1,1,1,1,1,1,Borrelli Francesco Emilio,NotizieFrance,Alleanza Verdi Sinistra,M,417961167
2,2,2,2,2,2,Dori Devis,DevisDori,Alleanza Verdi Sinistra,M,1228450685840220160
3,3,3,3,3,3,Evi Eleonora,EleonoraEvi,Alleanza Verdi Sinistra,F,1135141640
4,4,4,4,4,4,Fratoianni Nicola,NFratoianni,Alleanza Verdi Sinistra,M,425686235


In [9]:
# Reading network data
basepath = '../../../data/backbone_graphs'

before_data = pd.read_csv(os.path.join(basepath, 'before_campaign_backbone.csv'))
during_data = pd.read_csv(os.path.join(basepath, 'during_campaign_backbone.csv'))
after_data = pd.read_csv(os.path.join(basepath, 'after_elections_backbone.csv'))

# Forcing the conversion to string
before_data['source'] = before_data['source'].astype(str)
before_data['target'] = before_data['target'].astype(str)
during_data['source'] = during_data['source'].astype(str)
during_data['target'] = during_data['target'].astype(str)
after_data['source'] = after_data['source'].astype(str)
after_data['target'] = after_data['target'].astype(str)

# Shapes
print("Before data has shape: ", before_data.shape)
print("During data has shape: ", during_data.shape)
print("After data has shape: ", after_data.shape)

Before data has shape:  (38209, 4)
During data has shape:  (56953, 4)
After data has shape:  (42864, 4)


In [10]:
before_data

Unnamed: 0,source,target,weight,p_value
0,3241806322,29416653,132,3.150182e-51
1,29416653,389131517,38,1.914250e-14
2,29416653,169566296,27,1.061201e-07
3,29416653,1342502335361273856,22,2.116990e-06
4,29416653,3308422191,20,6.992131e-06
...,...,...,...,...
38204,127532707,2301639524,4,4.440879e-02
38205,1489925735401799680,1391451476761923584,4,4.074783e-02
38206,434505068,3005727892,4,4.212454e-02
38207,1586682241,1071557549391011840,4,4.259258e-02


#### Data filtering

In [11]:
# we filter out the users in Target that are not in the representatives list
w_before_data = before_data[before_data['target'].isin(representatives_df['ids'])]
w_during_data = during_data[during_data['target'].isin(representatives_df['ids'])]
w_after_data = after_data[after_data['target'].isin(representatives_df['ids'])]

# Shapes
print("Before data has shape: ", w_before_data.shape)
print("During data has shape: ", w_during_data.shape)
print("After data has shape: ", w_after_data.shape)


Before data has shape:  (959, 4)
During data has shape:  (1614, 4)
After data has shape:  (705, 4)


In [12]:
# we attach the party to the representatives in the target column
w_before_data = pd.merge(w_before_data, representatives_df, left_on='target', right_on='ids')
w_during_data = pd.merge(w_during_data, representatives_df, left_on='target', right_on='ids')
w_after_data = pd.merge(w_after_data, representatives_df, left_on='target', right_on='ids')

# dropping all extra columns 
w_before_data = w_before_data.drop(columns=['Twitter-Handle', 'Gender', 'ids', 'Unnamed: 0.4', 'Unnamed: 0.3', 'Unnamed: 0.2', 'Unnamed: 0.1', 'Unnamed: 0'])
w_during_data = w_during_data.drop(columns=['Twitter-Handle', 'Gender', 'ids', 'Unnamed: 0.4', 'Unnamed: 0.3', 'Unnamed: 0.2', 'Unnamed: 0.1', 'Unnamed: 0'])
w_after_data = w_after_data.drop(columns=['Twitter-Handle', 'Gender', 'ids', 'Unnamed: 0.4', 'Unnamed: 0.3', 'Unnamed: 0.2', 'Unnamed: 0.1', 'Unnamed: 0'])

In [13]:
w_before_data

Unnamed: 0,source,target,weight,p_value,Name,Party
0,1323186403614875648,2416067982,84,1.322345e-11,Calenda Carlo,Azione - Italia Viva
1,989346433,2416067982,121,2.859149e-25,Calenda Carlo,Azione - Italia Viva
2,1111697667946659840,999578121123848192,35,2.956891e-04,Conte Giuseppe,Movimento 5s
3,841701064118284288,999578121123848192,17,2.230352e-03,Conte Giuseppe,Movimento 5s
4,948578516982956032,999578121123848192,35,3.637663e-06,Conte Giuseppe,Movimento 5s
...,...,...,...,...,...,...
954,1420318759835914240,403544693,4,4.142503e-02,Zingaretti Nicola,Partito Democratico
955,256998498,403544693,5,1.843394e-02,Zingaretti Nicola,Partito Democratico
956,1493698125659320320,12514212,4,2.819991e-02,Orfini Matteo,Partito Democratico
957,723898926,418028983,5,2.583908e-02,Terzi Di Sant'Agata Giuliomaria,Fratelli d'Italia


#### Evaluate users' party

In [14]:
# now we group by user and party and count the most common party based on the column weight
w_before_data_grouped = w_before_data.groupby(['source', 'Party'])['weight'].sum().reset_index()
w_during_data_grouped = w_during_data.groupby(['source', 'Party'])['weight'].sum().reset_index()
w_after_data_grouped = w_after_data.groupby(['source', 'Party'])['weight'].sum().reset_index()

In [15]:
w_before_data_grouped

Unnamed: 0,source,Party,weight
0,1001091645515935744,Alleanza Verdi Sinistra,15
1,1001091645515935744,Partito Democratico,8
2,1001344387102728192,Azione - Italia Viva,18
3,1002639111205871616,Lega,8
4,1004874680,Azione - Italia Viva,53
...,...,...,...
530,992686250,Partito Democratico,6
531,993804838705213440,Partito Democratico,10
532,996272602342674432,Forza Italia,11
533,997354591,Partito Democratico,6


In [16]:
# now we get the most common party for each user
w_before_data_grouped = w_before_data_grouped.loc[w_before_data_grouped.groupby('source')['weight'].idxmax()]
w_during_data_grouped = w_during_data_grouped.loc[w_during_data_grouped.groupby('source')['weight'].idxmax()]
w_after_data_grouped = w_after_data_grouped.loc[w_after_data_grouped.groupby('source')['weight'].idxmax()]

In [17]:
w_before_data_grouped

Unnamed: 0,source,Party,weight
0,1001091645515935744,Alleanza Verdi Sinistra,15
2,1001344387102728192,Azione - Italia Viva,18
3,1002639111205871616,Lega,8
4,1004874680,Azione - Italia Viva,53
6,1005980574,Azione - Italia Viva,9
...,...,...,...
530,992686250,Partito Democratico,6
531,993804838705213440,Partito Democratico,10
532,996272602342674432,Forza Italia,11
533,997354591,Partito Democratico,6


In [21]:
# store the df, ignoring the index
output_path = "../../../results/SNAM/swinger_detection/retweets"

w_before_data_grouped.to_csv(os.path.join(output_path, 'before_campaign_labeled_users.csv'), index=False)
w_during_data_grouped.to_csv(os.path.join(output_path, 'during_campaign_labeled_users.csv'), index=False)
w_after_data_grouped.to_csv(os.path.join(output_path, 'after_elections_labeled_users.csv'), index=False)