In [1]:
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import os
import pandas as pd
import pickle

### Loading the data

#### Representatives data

In [2]:
representatives_df = pd.read_csv("../../../data/00_raw_data/twitter_representatives_handles_final.csv")
representatives_comms = pd.read_csv("../../../data/04_communities/comms_with_political_label_csv/representatives_communities.csv") 

In [3]:
representatives_df = representatives_df.drop(columns=representatives_df.columns[:5])

# Storing IDs and setting them as index
representative_IDs = representatives_df["ids"].tolist()
representatives_df = representatives_df.set_index("ids")

representatives_df.head()

Unnamed: 0_level_0,Name,Twitter-Handle,Party,Gender
ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
425752285,Bonelli Angelo,AngeloBonelli1,Alleanza Verdi Sinistra,M
417961167,Borrelli Francesco Emilio,NotizieFrance,Alleanza Verdi Sinistra,M
1228450685840220160,Dori Devis,DevisDori,Alleanza Verdi Sinistra,M
1135141640,Evi Eleonora,EleonoraEvi,Alleanza Verdi Sinistra,F
425686235,Fratoianni Nicola,NFratoianni,Alleanza Verdi Sinistra,M


#### Loading the retweet networks

In [5]:
# loading the networks since we need to get the
# indegree of each representative
# Reading data
basepath = '../../../data/03_backbone_nets'

before_data = pd.read_csv(os.path.join(basepath, 'before_campaign_backbone.csv'))
during_data = pd.read_csv(os.path.join(basepath, 'electoral_campaign_backbone.csv'))
after_data = pd.read_csv(os.path.join(basepath, 'after_elections_backbone.csv'))

# Forcing the conversion to string
before_data['source'] = before_data['source'].astype(str)
before_data['target'] = before_data['target'].astype(str)
during_data['source'] = during_data['source'].astype(str)
during_data['target'] = during_data['target'].astype(str)
after_data['source'] = after_data['source'].astype(str)
after_data['target'] = after_data['target'].astype(str)

# Shapes
print("Before data has shape: ", before_data.shape)
print("During data has shape: ", during_data.shape)
print("After data has shape: ", after_data.shape)

Before data has shape:  (38209, 4)
During data has shape:  (56953, 4)
After data has shape:  (42864, 4)


In [6]:
# Creating graphs
before_graph = nx.from_pandas_edgelist(before_data, source='source', target='target', edge_attr='weight', create_using=nx.Graph())
during_graph = nx.from_pandas_edgelist(during_data, source='source', target='target', edge_attr='weight', create_using=nx.Graph())
after_graph = nx.from_pandas_edgelist(after_data, source='source', target='target', edge_attr='weight', create_using=nx.Graph())

# Some basic stats
print("Before graph has: ", before_graph.number_of_nodes(), " nodes and ", before_graph.number_of_edges(), " edges")
print("During graph has: ", during_graph.number_of_nodes(), " nodes and ", during_graph.number_of_edges(), " edges")
print("After graph has: ", after_graph.number_of_nodes(), " nodes and ", after_graph.number_of_edges(), " edges")

Before graph has:  12008  nodes and  38209  edges
During graph has:  18150  nodes and  56953  edges
After graph has:  15943  nodes and  42864  edges


#### Loading community data

In [7]:
comm_path = f'../../../data/04_communities'

# Load the communities
before_communities = pickle.load(open(f'{comm_path}/before_communities_all_data.pickle', 'rb'))
during_communities = pickle.load(open(f'{comm_path}/during_communities_all_data.pickle', 'rb'))
after_communities = pickle.load(open(f'{comm_path}/after_communities_all_data.pickle', 'rb'))

In [8]:
before_communities[0]

(0,
 {'1004967978386026496',
  '1006227546952949760',
  '1007332756601425920',
  '1013879910',
  '1015989050',
  '1023465239478710272',
  '1023972842948452352',
  '1025012509',
  '1033267486500769792',
  '1033280112714756096',
  '1037049997',
  '1038177516',
  '1038909791966572544',
  '1060592551005167616',
  '1064950180410269696',
  '1073496504378290176',
  '1081290202465947648',
  '1082242342395043840',
  '1083073091327143936',
  '1084076649367437312',
  '1084450777748459520',
  '1095668360874999808',
  '1101050610466590720',
  '1101531482508722176',
  '1103360481341902848',
  '1109829834',
  '1112397484058427392',
  '112810232',
  '1130107176075321344',
  '113410319',
  '1138038249182683136',
  '113981849',
  '1139996376060895232',
  '1142667001',
  '1150384973922349056',
  '1150467950723444736',
  '1160238387753865216',
  '1161196922',
  '1162015389288714240',
  '1163020839605985280',
  '1163796132',
  '1165420249',
  '1166155885359652864',
  '1168612912056999936',
  '1186406702',


### Get the political communities

In [9]:
representatives_comms.head()

Unnamed: 0,user_id,period,community_id,community_label
0,425752285,before,2,PD;AVS
1,425752285,during,4,PD;AVS
2,425752285,after,5,PD;AVS
3,1135141640,before,2,PD;AVS
4,1135141640,during,4,PD;AVS


In [10]:
# groupby period and get a set of comm_id 
comms_to_consider = representatives_comms.groupby('period')['community_id'].apply(set).reset_index()

In [11]:
comms_to_consider

Unnamed: 0,period,community_id
0,after,"{0, 1, 2, 4, 5, 6, 7, 9, 15}"
1,before,"{0, 1, 2, 3, 4, 6}"
2,during,"{0, 1, 2, 4, 5}"


In [12]:
representatives_df.head()

Unnamed: 0_level_0,Name,Twitter-Handle,Party,Gender
ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
425752285,Bonelli Angelo,AngeloBonelli1,Alleanza Verdi Sinistra,M
417961167,Borrelli Francesco Emilio,NotizieFrance,Alleanza Verdi Sinistra,M
1228450685840220160,Dori Devis,DevisDori,Alleanza Verdi Sinistra,M
1135141640,Evi Eleonora,EleonoraEvi,Alleanza Verdi Sinistra,F
425686235,Fratoianni Nicola,NFratoianni,Alleanza Verdi Sinistra,M


### Before campaign

In [13]:
before_polical_comms = comms_to_consider[comms_to_consider['period'] == 'before']['community_id'].values[0]
before_polical_comms

{0, 1, 2, 3, 4, 6}

In [14]:
before_voterank = {}

# find the top-x representatives in each political community
before_top_reprs_by_comm = {}
top = 15

for comm in before_polical_comms:
    comm_id = before_communities[comm][0]
    users = before_communities[comm][1]

    print(f"Community {comm_id} has {len(users)} members")

    # get the subgraph induced by the community
    subgraph = before_graph.subgraph(users)
    subgraph_voterank = {str(k):v for k,v in dict(zip(nx.voterank(subgraph), range(1, subgraph.number_of_nodes() + 1))).items()}

    before_voterank[comm_id] = subgraph_voterank

    #
    # get the top ranked users
    #
    top_users = sorted(before_voterank[comm_id].items(), key=lambda x: x[1])[:top]

    # and mapping them to their names if they are representatives
    # top_users = [(representatives_df.loc[k].Name, v) if k in representative_IDs else (k, v) for k,v in top_users]
    top_users = [(representatives_df.loc[k].Name, v) for k,v in top_users if k in representative_IDs]

    before_top_reprs_by_comm[comm_id] = top_users

Community 0 has 406 members
Community 1 has 2057 members
Community 2 has 1832 members
Community 3 has 2827 members
Community 4 has 85 members
Community 6 has 1845 members


In [15]:
before_top_reprs_by_comm

{0: [],
 1: [('Conte Giuseppe', 1)],
 2: [('Letta Enrico', 3), ('Boldrini Laura', 12)],
 3: [('Calenda Carlo', 1), ('Renzi Matteo', 3), ('Marattin Luigi', 5)],
 4: [('Berlusconi Silvio', 1),
  ('Tajani Antonio', 2),
  ("Mule' Giorgio", 6),
  ('Gasparri Maurizio', 9)],
 6: [('Meloni Giorgia', 1),
  ('Salvini Matteo', 2),
  ('Borghi Claudio', 3),
  ('Bagnai Alberto', 15)]}

### During campaign

In [16]:
during_polical_comms = comms_to_consider[comms_to_consider['period'] == 'during']['community_id'].values[0]
during_polical_comms

{0, 1, 2, 4, 5}

In [17]:
during_voterank = {}

# find the top-x representatives in each political community
during_top_reprs_by_comm = {}
top = 15

for comm in during_polical_comms:
    comm_id = during_communities[comm][0]
    users = during_communities[comm][1]

    print(f"Community {comm_id} has {len(users)} members")

    # get the subgraph induced by the community
    subgraph = during_graph.subgraph(users)
    subgraph_voterank = {str(k):v for k,v in dict(zip(nx.voterank(subgraph), range(1, subgraph.number_of_nodes() + 1))).items()}

    during_voterank[comm_id] = subgraph_voterank

    #
    # get the top ranked users
    #
    top_users = sorted(during_voterank[comm_id].items(), key=lambda x: x[1])[:top]

    # and mapping them to their names if they are representatives
    # top_users = [(representatives_df.loc[k].Name, v) if k in representative_IDs else (k, v) for k,v in top_users]
    top_users = [(representatives_df.loc[k].Name, v) for k,v in top_users if k in representative_IDs]

    during_top_reprs_by_comm[comm_id] = top_users

Community 0 has 2874 members
Community 1 has 478 members
Community 2 has 2568 members
Community 4 has 2700 members
Community 5 has 3942 members


In [18]:
during_top_reprs_by_comm

{0: [('Conte Giuseppe', 1)],
 1: [],
 2: [('Meloni Giorgia', 1), ('Salvini Matteo', 2), ('Borghi Claudio', 4)],
 4: [('Letta Enrico', 2), ('Cottarelli Carlo', 10), ('Fratoianni Nicola', 15)],
 5: [('Calenda Carlo', 1), ('Renzi Matteo', 2), ('Marattin Luigi', 9)]}

### After campaign

In [19]:
after_polical_comms = comms_to_consider[comms_to_consider['period'] == 'after']['community_id'].values[0]
after_polical_comms

{0, 1, 2, 4, 5, 6, 7, 9, 15}

In [20]:
after_voterank = {}

# find the top-x representatives in each political community
after_top_reprs_by_comm = {}
top = 15

for comm in after_polical_comms:
    comm_id = after_communities[comm][0]
    users = after_communities[comm][1]

    print(f"Community {comm_id} has {len(users)} members")

    # get the subgraph induced by the community
    subgraph = after_graph.subgraph(users)
    subgraph_voterank = {str(k):v for k,v in dict(zip(nx.voterank(subgraph), range(1, subgraph.number_of_nodes() + 1))).items()}

    after_voterank[comm_id] = subgraph_voterank

    #
    # get the top ranked users
    #
    top_users = sorted(after_voterank[comm_id].items(), key=lambda x: x[1])[:top]

    # and mapping them to their names if they are representatives
    # top_users = [(representatives_df.loc[k].Name, v) if k in representative_IDs else (k, v) for k,v in top_users]
    top_users = [(representatives_df.loc[k].Name, v) for k,v in top_users if k in representative_IDs]

    after_top_reprs_by_comm[comm_id] = top_users

Community 0 has 1418 members
Community 1 has 1940 members
Community 2 has 2065 members
Community 4 has 36 members
Community 5 has 421 members
Community 6 has 2244 members
Community 7 has 3870 members
Community 9 has 27 members
Community 15 has 2435 members


In [21]:
after_top_reprs_by_comm

{0: [],
 1: [('Letta Enrico', 5)],
 2: [('Conte Giuseppe', 1)],
 4: [],
 5: [('Fratoianni Nicola', 15)],
 6: [('Meloni Giorgia', 1), ('Tajani Antonio', 11)],
 7: [('Borghi Claudio', 5), ('Salvini Matteo', 7)],
 9: [],
 15: [('Calenda Carlo', 1), ('Renzi Matteo', 12), ('Marattin Luigi', 15)]}

### Comparison

In [22]:
before_top_reprs_by_comm

{0: [],
 1: [('Conte Giuseppe', 1)],
 2: [('Letta Enrico', 3), ('Boldrini Laura', 12)],
 3: [('Calenda Carlo', 1), ('Renzi Matteo', 3), ('Marattin Luigi', 5)],
 4: [('Berlusconi Silvio', 1),
  ('Tajani Antonio', 2),
  ("Mule' Giorgio", 6),
  ('Gasparri Maurizio', 9)],
 6: [('Meloni Giorgia', 1),
  ('Salvini Matteo', 2),
  ('Borghi Claudio', 3),
  ('Bagnai Alberto', 15)]}

In [23]:
during_top_reprs_by_comm

{0: [('Conte Giuseppe', 1)],
 1: [],
 2: [('Meloni Giorgia', 1), ('Salvini Matteo', 2), ('Borghi Claudio', 4)],
 4: [('Letta Enrico', 2), ('Cottarelli Carlo', 10), ('Fratoianni Nicola', 15)],
 5: [('Calenda Carlo', 1), ('Renzi Matteo', 2), ('Marattin Luigi', 9)]}

In [24]:
after_top_reprs_by_comm

{0: [],
 1: [('Letta Enrico', 5)],
 2: [('Conte Giuseppe', 1)],
 4: [],
 5: [('Fratoianni Nicola', 15)],
 6: [('Meloni Giorgia', 1), ('Tajani Antonio', 11)],
 7: [('Borghi Claudio', 5), ('Salvini Matteo', 7)],
 9: [],
 15: [('Calenda Carlo', 1), ('Renzi Matteo', 12), ('Marattin Luigi', 15)]}

In [25]:
repr_data = {'before': before_top_reprs_by_comm, 'during': during_top_reprs_by_comm, 'after': after_top_reprs_by_comm}

# df
data_to_df = []

for period, data in repr_data.items():
    print(period)
    for comm_id, reprs in data.items():
        print(comm_id)
        for repr in reprs:
            print(repr)

            repr_name = repr[0]
            repr_id = representatives_df[representatives_df['Name'] == repr[0]].index[0]
            
            new_row = {'ids': repr_id, 'name': repr_name, f'voterank_{period}': repr[1]}
            data_to_df.append(new_row)

        print("---")
    print("---------------------------------")



before
0
---
1
('Conte Giuseppe', 1)
---
2
('Letta Enrico', 3)
('Boldrini Laura', 12)
---
3
('Calenda Carlo', 1)
('Renzi Matteo', 3)
('Marattin Luigi', 5)
---
4
('Berlusconi Silvio', 1)
('Tajani Antonio', 2)
("Mule' Giorgio", 6)
('Gasparri Maurizio', 9)
---
6
('Meloni Giorgia', 1)
('Salvini Matteo', 2)
('Borghi Claudio', 3)
('Bagnai Alberto', 15)
---
---------------------------------
during
0
('Conte Giuseppe', 1)
---
1
---
2
('Meloni Giorgia', 1)
('Salvini Matteo', 2)
('Borghi Claudio', 4)
---
4
('Letta Enrico', 2)
('Cottarelli Carlo', 10)
('Fratoianni Nicola', 15)
---
5
('Calenda Carlo', 1)
('Renzi Matteo', 2)
('Marattin Luigi', 9)
---
---------------------------------
after
0
---
1
('Letta Enrico', 5)
---
2
('Conte Giuseppe', 1)
---
4
---
5
('Fratoianni Nicola', 15)
---
6
('Meloni Giorgia', 1)
('Tajani Antonio', 11)
---
7
('Borghi Claudio', 5)
('Salvini Matteo', 7)
---
9
---
15
('Calenda Carlo', 1)
('Renzi Matteo', 12)
('Marattin Luigi', 15)
---
---------------------------------


In [26]:
repr_voterank = pd.DataFrame(data_to_df, columns=['ids', 'name', 'voterank_before', 'voterank_during', 'voterank_after'])

In [27]:
repr_voterank = repr_voterank.groupby('ids').agg(
    {'name': 'first', 'voterank_before': 'first', 'voterank_during': 'first', 'voterank_after': 'first'}).reset_index()

In [28]:
repr_voterank

Unnamed: 0,ids,name,voterank_before,voterank_during,voterank_after
0,130537001,Meloni Giorgia,1.0,1.0,1.0
1,18762875,Renzi Matteo,3.0,2.0,12.0
2,212364488,Mule' Giorgio,6.0,,
3,221902171,Boldrini Laura,12.0,,
4,2416067982,Calenda Carlo,1.0,1.0,1.0
5,270839361,Salvini Matteo,2.0,2.0,7.0
6,337767301,Borghi Claudio,3.0,4.0,5.0
7,413217587,Gasparri Maurizio,9.0,,
8,419622371,Letta Enrico,3.0,2.0,5.0
9,425686235,Fratoianni Nicola,,15.0,15.0
