In [5]:
import matplotlib.pyplot as plt
import networkx as nx
import os
import pandas as pd

### Microscopic analysis of swingers

#### Loading the data

In [2]:
representatives_df = pd.read_csv("../../data/raw_data/twitter_representatives_handles_final.csv")

In [3]:
representatives_df.head()

Unnamed: 0.5,Unnamed: 0.4,Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Name,Twitter-Handle,Party,Gender,ids
0,0,0,0,0,0,Bonelli Angelo,AngeloBonelli1,Alleanza Verdi Sinistra,M,425752285
1,1,1,1,1,1,Borrelli Francesco Emilio,NotizieFrance,Alleanza Verdi Sinistra,M,417961167
2,2,2,2,2,2,Dori Devis,DevisDori,Alleanza Verdi Sinistra,M,1228450685840220160
3,3,3,3,3,3,Evi Eleonora,EleonoraEvi,Alleanza Verdi Sinistra,F,1135141640
4,4,4,4,4,4,Fratoianni Nicola,NFratoianni,Alleanza Verdi Sinistra,M,425686235


In [6]:
# Reading network data
basepath = '../../data/network_data'

before_data = pd.read_csv(os.path.join(basepath, 'before_campaign_representative.graph'))
during_data = pd.read_csv(os.path.join(basepath, 'campaign_representative.graph'))
after_data = pd.read_csv(os.path.join(basepath, 'after_elections_representative.graph'))

# Forcing the conversion to string
before_data['source'] = before_data['source'].astype(str)
before_data['target'] = before_data['target'].astype(str)
during_data['source'] = during_data['source'].astype(str)
during_data['target'] = during_data['target'].astype(str)
after_data['source'] = after_data['source'].astype(str)
after_data['target'] = after_data['target'].astype(str)

# Shapes
print("Before data has shape: ", before_data.shape)
print("During data has shape: ", during_data.shape)
print("After data has shape: ", after_data.shape)

Before data has shape:  (72716, 3)
During data has shape:  (92920, 3)
After data has shape:  (91749, 3)


In [7]:
before_data

Unnamed: 0,source,target,weight
0,22659500,2416067982,177
1,22659500,395183088,11
2,22659500,104485125,7
3,22659500,420332560,6
4,22659500,940499085886480384,5
...,...,...,...
72711,1496033584221769728,999578121123848192,1
72712,1495901503277703168,270839361,1
72713,1495901503277703168,337767301,1
72714,1496013824557785088,440815818,1


#### Data filtering

In [11]:
# we filter out the users in Target that are not in the representatives list
w_before_data = before_data[before_data['target'].isin(representatives_df['ids'])]
w_during_data = during_data[during_data['target'].isin(representatives_df['ids'])]
w_after_data = after_data[after_data['target'].isin(representatives_df['ids'])]

# Shapes
print("Before data has shape: ", w_before_data.shape)
print("During data has shape: ", w_during_data.shape)
print("After data has shape: ", w_after_data.shape)


Before data has shape:  (72094, 3)
During data has shape:  (91823, 3)
After data has shape:  (91208, 3)


In [12]:
# we attach the party to the representatives in the target column
w_before_data = pd.merge(w_before_data, representatives_df, left_on='target', right_on='ids')
w_during_data = pd.merge(w_during_data, representatives_df, left_on='target', right_on='ids')
w_after_data = pd.merge(w_after_data, representatives_df, left_on='target', right_on='ids')

# dropping all extra columns 
w_before_data = w_before_data.drop(columns=['Twitter-Handle', 'Gender', 'ids', 'Unnamed: 0.4', 'Unnamed: 0.3', 'Unnamed: 0.2', 'Unnamed: 0.1', 'Unnamed: 0'])
w_during_data = w_during_data.drop(columns=['Twitter-Handle', 'Gender', 'ids', 'Unnamed: 0.4', 'Unnamed: 0.3', 'Unnamed: 0.2', 'Unnamed: 0.1', 'Unnamed: 0'])
w_after_data = w_after_data.drop(columns=['Twitter-Handle', 'Gender', 'ids', 'Unnamed: 0.4', 'Unnamed: 0.3', 'Unnamed: 0.2', 'Unnamed: 0.1', 'Unnamed: 0'])

In [13]:
w_before_data

Unnamed: 0,source,target,weight,Name,Party
0,22659500,2416067982,177,Calenda Carlo,Azione - Italia Viva
1,2416067982,2416067982,12,Calenda Carlo,Azione - Italia Viva
2,1025767333988581376,2416067982,128,Calenda Carlo,Azione - Italia Viva
3,1151521670,2416067982,127,Calenda Carlo,Azione - Italia Viva
4,1103360481341902848,2416067982,126,Calenda Carlo,Azione - Italia Viva
...,...,...,...,...,...
72089,1505218435718000640,1556324421794660352,1,Crisanti Andrea,Partito Democratico
72090,2980425346,1556324421794660352,1,Crisanti Andrea,Partito Democratico
72091,2946565571,1556324421794660352,1,Crisanti Andrea,Partito Democratico
72092,815552279797448704,1556324421794660352,1,Crisanti Andrea,Partito Democratico


#### Evaluate users' party

In [14]:
# now we group by user and party and count the most common party based on the column weight
w_before_data_grouped = w_before_data.groupby(['source', 'Party'])['weight'].sum().reset_index()
w_during_data_grouped = w_during_data.groupby(['source', 'Party'])['weight'].sum().reset_index()
w_after_data_grouped = w_after_data.groupby(['source', 'Party'])['weight'].sum().reset_index()

In [15]:
# now we get the most common party for each user
w_before_data_grouped = w_before_data_grouped.loc[w_before_data_grouped.groupby('source')['weight'].idxmax()]
w_during_data_grouped = w_during_data_grouped.loc[w_during_data_grouped.groupby('source')['weight'].idxmax()]
w_after_data_grouped = w_after_data_grouped.loc[w_after_data_grouped.groupby('source')['weight'].idxmax()]

In [16]:
w_before_data_grouped

Unnamed: 0,source,Party,weight
0,1000045820,Azione - Italia Viva,8
1,1000071459396640768,Azione - Italia Viva,7
2,1000087728770383872,Fratelli d'Italia,1
4,1000101522,Fratelli d'Italia,1
5,1000108672352227328,Movimento 5s,1
...,...,...,...
44301,999798813996003328,Fratelli d'Italia,3
44302,999870901,Azione - Italia Viva,1
44303,999908006241357824,Forza Italia,1
44305,999928050,Azione - Italia Viva,3


In [18]:
# store the df, ignoring the index
output_path = "../../data/labeled_users_microscopic"

w_before_data_grouped.to_csv(os.path.join(output_path, 'before_campaign_labeled_users.csv'), index=False)
w_during_data_grouped.to_csv(os.path.join(output_path, 'during_campaign_labeled_users.csv'), index=False)
w_after_data_grouped.to_csv(os.path.join(output_path, 'after_elections_labeled_users.csv'), index=False)