In [None]:
import os
os.sys.path.append(os.path.dirname(os.path.abspath('.')))

In [None]:
import networkx as nx
import pandas as pd
from matplotlib import pyplot as plt
import matplotlib as mpl
from src import builder, network_analysis as na
plt.style.use('ggplot')

In [None]:
users, interactions, hashtags, tweets = builder.full_network()

In [None]:
user_info = builder.user_info()

In [None]:
core = [u for u in users if u[1]['account'] == 'removed']
core_users = [u[0] for u in core]
core_interactions = interactions[interactions['source'].isin(core_users) & interactions['target'].isin(core_users)]

In [None]:
core_info = (core_interactions
             .groupby(['source'])
             ['retweets', 'replies', 'mentions', 'total'].sum()
             .reset_index())
core_info = user_info.merge(core_info, left_on='userid', right_on='source')
core_info.drop(['source','total'], axis=1, inplace=True)
core_info['follower_count'] = core_info['follower_count'].astype(float)
core_info['following_count'] = core_info['following_count'].astype(float)
core_info = core_info.set_index('userid')

In [None]:
del interactions

# Core Graph

In [None]:
self_contact = (core_interactions
                [core_interactions['source'] == core_interactions['target']]
                ['source'].unique())
other_contact = (core_interactions
                 [core_interactions['source'] != core_interactions['target']]
                 ['source'].unique())
self_contact = [u for u in self_contact if u not in other_contact]
self_contact = [u for u in self_contact if u not in other_contact]

In [None]:
core_interactions = core_interactions[~core_interactions['source'].isin(self_contact)]

In [None]:
undirected_core = nx.Graph()
undirected_core.add_weighted_edges_from(core_interactions[['source','target','total']].values, weight='total')

In [None]:
plt.figure(num=None, figsize=(20, 20), dpi=80)
fig = plt.figure(1)
pos = nx.draw_kamada_kawai(undirected_core, node_size=5, node_color='#ffffff')
plt.axis('off')
plt.savefig('core.png',bbox_inches="tight", transparent=True)
plt.close()

### Louvain Communities

In [None]:
part, mod = na.louvain(undirected_core)
partitions = pd.Series(part).reset_index()
partitions.columns = ['userid','community']

In [None]:
user_info = (user_info[~user_info['userid'].isin(self_contact)]
             .drop(['follower_count','following_count',
                    'account_creation_date','account_language'], axis=1))
user_info = user_info.merge(partitions, on='userid')

In [None]:
community_info = user_info.groupby(['country','community'])['userid'].count().unstack('country')
community_info = community_info.fillna(0)
community_info['total'] = community_info['iran'] + community_info['russia']

In [None]:
fig, ax = plt.subplots(1,1,figsize=(15,12))
community_info['total'].plot(kind='hist', ax=ax)
plt.title('Distribution of Community Size - Before Consolidation')
ax.set_xlabel('Community Size')
plt.savefig('communities_before_consolidation.png', bbox_inches='tight')
plt.close()

In [None]:
communities = community_info[community_info['total'] > 50].index.values

In [None]:
com = 0
mapping = {}
for c in communities:
    mapping[c] = com
    com += 1

In [None]:
user_info['label'] = user_info['community'].map(mapping)
user_info.loc[((~user_info['community'].isin(communities)) & 
               (user_info['country'] == 'iran')), 'label'] = com
com += 1
user_info.loc[((~user_info['community'].isin(communities)) & 
               (user_info['country'] == 'russia')), 'label'] = com

In [None]:
fig, ax = plt.subplots(1,1,figsize=(15,12))
user_info.groupby(['label'])['userid'].count().plot(kind='hist')
plt.title('Distribution of Community Size - After Consolidation')
ax.set_xlabel('Community Size')
plt.savefig('communities_after_consolidation.png', bbox_inches='tight')
plt.close()

In [None]:
mapping = user_info[['userid','label']].set_index('userid')['label'].to_dict()
nodes = [u for u in undirected_core.nodes() if u in mapping.keys()]
undirected_plot = undirected_core.subgraph(nodes)

In [None]:
old_title = mpl.rcParams['text.color']
mpl.rcParams['text.color'] = 'w'

In [None]:
plt.figure(num=None, figsize=(20, 20), dpi=80)
fig = plt.figure(1)
pos = nx.kamada_kawai_layout(undirected_plot)
drn = nx.draw_networkx_nodes(undirected_plot, pos=pos, nodelist=nodes, node_color=list(mapping.values()), node_size=5, cmap=plt.cm.tab20)
dre = nx.draw_networkx_edges(undirected_plot, pos=pos, alpha=0.2, edge_color='#ffffff')
plt.axis('off')
plt.title('Russian & Iranian Suspended Twitter Accounts Grouped by Community')
plt.savefig('core_communities.png',bbox_inches="tight",transparent=True)
plt.close()

In [None]:
mpl.rcParams['text.color'] = old_title

# Full Graph

In [None]:
full_graph = nx.Graph()
full_graph.add_weighted_edges_from(interactions[['source','target','total']].values, weight='total')

In [None]:
part, mod = louvain(full_graph)

In [None]:
user_info.drop(['follower_count','following_count','account_creation_date','account_language'], axis=1, inplace=True)

In [None]:
interaction_info = interactions[['source','country']].drop_duplicates()
interaction_info.columns = ['userid','country']

In [None]:
i2 = interactions[['target','country']].drop_duplicates()
i2.columns = ['userid','country']

In [None]:
interaction_info = interaction_info.append(i2).drop_duplicates()

In [None]:
user_info = user_info.append(interaction_info).drop_duplicates()

In [None]:
partitions = pd.Series(part).reset_index()
partitions.columns = ['userid','community']

In [None]:
user_info = user_info.merge(partitions, on='userid')
community_info = user_info.groupby(['community','country'])['userid'].count().unstack('country')
community_info = community_info.fillna(0)
community_info['total'] = community_info['iran'] + community_info['russia']
community_info = community_info[community_info['total'] > 200]
communities = community_info.index.values

In [None]:
com = 0
mapping = {}
for c in communities:
    mapping[c] = com
    com += 1

In [None]:
user_info['label'] = user_info['community'].map(mapping)

In [None]:
user_info.loc[(~user_info['community'].isin(communities)) & (user_info['country'] == 'iran'), 'label'] = com
com += 1
user_info.loc[(~user_info['community'].isin(communities)) & (user_info['country'] == 'russia'), 'label'] = com

In [None]:
mapping = user_info[['userid','label']].set_index('userid')['label'].to_dict()