In [2]:
import networkx as nx
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
from multiprocessing.dummy import Pool as ThreadPool
import community as community_louvain
import builder

In [3]:
users, interactions = builder.full_network()

In [28]:
user_info = builder.user_info()

In [5]:
core = [u for u in users if u[1]['account'] == 'removed']
core_users = [u[0] for u in core]
core_interactions = interactions[interactions['source'].isin(core_users) & interactions['target'].isin(core_users)]

In [6]:
core_info = core_interactions.groupby(['source'])['retweets', 'replies', 'mentions', 'total'].sum().reset_index()
core_info = user_info.merge(core_info, left_on='userid', right_on='source')
core_info.drop(['source','total'], axis=1, inplace=True)
core_info['follower_count'] = core_info['follower_count'].astype(float)
core_info['following_count'] = core_info['following_count'].astype(float)
core_info = core_info.set_index('userid')

# Core Graph

In [None]:
self_contact = core_interactions[core_interactions['source'] == core_interactions['target']]['source'].unique()

In [None]:
self_contact = [u for u in self_contact if u not in core_interactions[core_interactions['source'] != core_interactions['target']]['source'].unique()]
self_contact = [u for u in self_contact if u not in core_interactions[core_interactions['source'] != core_interactions['target']]['target'].unique()]

In [29]:
core_interactions = core_interactions[~core_interactions['source'].isin(self_contact)]

In [30]:
undirected_core = nx.Graph()
undirected_core.add_weighted_edges_from(core_interactions[['source','target','total']].values, weight='total')

### Louvain Communities

In [32]:
part, mod = louvain(undirected_core)
partitions = pd.Series(part).reset_index()
partitions.columns = ['userid','community']

In [33]:
user_info = user_info[~user_info['userid'].isin(self_contact)].drop(['follower_count','following_count','account_creation_date','account_language'], axis=1)
user_info = user_info.merge(partitions, on='userid')

In [36]:
community_info = user_info.groupby(['community','country'])['userid'].count().unstack('country')
community_info = community_info.fillna(0)
community_info['total'] = community_info['iran'] + community_info['russia']
community_info = community_info[community_info['total'] > 5]
communities = community_info.index.values

In [38]:
com = 0
mapping = {}
for c in communities:
    mapping[c] = com
    com += 1

In [39]:
user_info['label'] = user_info['community'].map(mapping)
user_info.loc[(~user_info['community'].isin(communities)) & (user_info['country'] == 'iran'), 'label'] = com
com += 1
user_info.loc[(~user_info['community'].isin(communities)) & (user_info['country'] == 'russia'), 'label'] = com

In [41]:
mapping = user_info[user_info['userid'].isin(self_contact)][['userid','label']].set_index('userid')['label'].to_dict()

In [43]:
plt.figure(num=None, figsize=(20, 20), dpi=80)
fig = plt.figure(1)
pos = nx.kamada_kawai_layout(undirected_core)
# nx.draw_networkx_nodes(undirected_core, pos, node_size=10, cmap=plt.cm.jet, node_color=)
ec = nx.draw_networkx_edges(undirected_core, pos, alpha=0.2)
nc = nx.draw_networkx_nodes(undirected_core, pos, nodelist=mapping.keys(), node_color=list(mapping.values()), 
                                with_labels=False, node_size=10, cmap=plt.cm.jet)
plt.axis('off')
plt.title('Russian & Iranian Suspended Twitter Accounts')
plt.savefig('kamada_kawai_communities.pdf',bbox_inches="tight")
plt.close()

NameError: name 'graph' is not defined

# Full Graph

In [None]:
full_graph = nx.Graph()
full_graph.add_weighted_edges_from(interactions[['source','target','total']].values, weight='total')

In [None]:
part, mod = louvain(full_graph)

In [None]:
user_info.drop(['follower_count','following_count','account_creation_date','account_language'], axis=1, inplace=True)

In [None]:
interaction_info = interactions[['source','country']].drop_duplicates()
interaction_info.columns = ['userid','country']

In [None]:
i2 = interactions[['target','country']].drop_duplicates()
i2.columns = ['userid','country']

In [None]:
interaction_info = interaction_info.append(i2).drop_duplicates()

In [None]:
user_info = user_info.append(interaction_info).drop_duplicates()

In [None]:
partitions = pd.Series(part).reset_index()
partitions.columns = ['userid','community']

In [None]:
user_info = user_info.merge(partitions, on='userid')
community_info = user_info.groupby(['community','country'])['userid'].count().unstack('country')
community_info = community_info.fillna(0)
community_info['total'] = community_info['iran'] + community_info['russia']
community_info = community_info[community_info['total'] > 200]
communities = community_info.index.values

In [None]:
com = 0
mapping = {}
for c in communities:
    mapping[c] = com
    com += 1

In [None]:
user_info['label'] = user_info['community'].map(mapping)

In [None]:
user_info.loc[(~user_info['community'].isin(communities)) & (user_info['country'] == 'iran'), 'label'] = com
com += 1
user_info.loc[(~user_info['community'].isin(communities)) & (user_info['country'] == 'russia'), 'label'] = com

In [None]:
mapping = user_info[['userid','label']].set_index('userid')['label'].to_dict()