In [1]:
import os
os.sys.path.append(os.path.dirname(os.path.abspath('.')))

In [2]:
import networkx as nx
import pandas as pd
from matplotlib import pyplot as plt
import matplotlib as mpl
from src import builder, network_analysis as na
plt.style.use('ggplot')

In [3]:
users, interactions, hashtags, tweets = builder.full_network()

In [4]:
user_info = builder.user_info()

In [5]:
core = [u for u in users if u[1]['account'] == 'removed']
core_users = [u[0] for u in core]
core_interactions = interactions[interactions['source'].isin(core_users) & interactions['target'].isin(core_users)]

In [6]:
core_info = (core_interactions
             .groupby(['source'])
             ['retweets', 'replies', 'mentions', 'total'].sum()
             .reset_index())
core_info = user_info.merge(core_info, left_on='userid', right_on='source')
core_info.drop(['source','total'], axis=1, inplace=True)
core_info['follower_count'] = core_info['follower_count'].astype(float)
core_info['following_count'] = core_info['following_count'].astype(float)
core_info = core_info.set_index('userid')

In [7]:
del interactions

# Core Graph

In [8]:
self_contact = (core_interactions
                [core_interactions['source'] == core_interactions['target']]
                ['source'].unique())
other_contact = (core_interactions
                 [core_interactions['source'] != core_interactions['target']]
                 ['source'].unique())
self_contact = [u for u in self_contact if u not in other_contact]
self_contact = [u for u in self_contact if u not in other_contact]

In [9]:
core_interactions = core_interactions[~core_interactions['source'].isin(self_contact)]

In [10]:
core_self = [u for u in core if u[0] in core_interactions['source'].unique() or u[0] in core_interactions['target'].unique()]

In [11]:
undirected_core = nx.Graph()
undirected_core.add_nodes_from(core_self)
undirected_core.add_weighted_edges_from(core_interactions[['source','target','total']].values, weight='total')

In [None]:
plt.figure(num=None, figsize=(20, 20), dpi=80)
fig = plt.figure(1)
pos = nx.draw_kamada_kawai(undirected_core, node_size=5)
plt.axis('off')
plt.savefig('../img/core.png',bbox_inches="tight")
plt.close()

### Louvain Communities

In [12]:
part, mod = na.louvain(undirected_core)
partitions = pd.Series(part).reset_index()
partitions.columns = ['userid','community']

In [13]:
user_info = (user_info[~user_info['userid'].isin(self_contact)]
             .drop(['follower_count','following_count',
                    'account_creation_date','account_language'], axis=1))
user_info = user_info.merge(partitions, on='userid')

In [14]:
community_info = user_info.groupby(['country','community'])['userid'].count().unstack('country')
community_info = community_info.fillna(0)
community_info['total'] = community_info['iran'] + community_info['russia']

In [19]:
fig, ax = plt.subplots(1,1,figsize=(15,12))
community_info['total'].plot(kind='hist', ax=ax)
ax.set_xlabel('Community Size')
plt.savefig('communities_before_consolidation.png', bbox_inches='tight')
plt.close()

In [21]:
community_info['russian_percent'] = community_info['russia'] / community_info['total']
community_info['iran_percent'] = community_info['iran'] / community_info['total']

#### Consolidation

In [None]:
communities = community_info[community_info['total'] > 50].index.values
com = 0
mapping = {}
for c in communities:
    mapping[c] = com
    com += 1
user_info['label'] = user_info['community'].map(mapping)
user_info.loc[((~user_info['community'].isin(communities)) & 
               (user_info['country'] == 'iran')), 'label'] = com
com += 1
user_info.loc[((~user_info['community'].isin(communities)) & 
               (user_info['country'] == 'russia')), 'label'] = com

In [None]:
fig, ax = plt.subplots(1,1,figsize=(15,12))
user_info.groupby(['label'])['userid'].count().plot(kind='hist')
plt.title('Distribution of Community Size - After Consolidation')
ax.set_xlabel('Community Size')
plt.savefig('communities_after_consolidation.png', bbox_inches='tight')
plt.close()

#### If no consolidation, pick up here

In [30]:
# Only if no consolidation, otherwise skip to next cell
user_info['label'] = user_info['community']

In [31]:
mapping = user_info[['userid','label']].set_index('userid')['label'].to_dict()
nodes = [u for u in undirected_core.nodes() if u in mapping.keys()]
undirected_plot = undirected_core.subgraph(nodes)

In [None]:
plt.figure(num=None, figsize=(20, 20), dpi=80)
fig = plt.figure(1)
pos = nx.kamada_kawai_layout(undirected_plot)
drn = nx.draw_networkx_nodes(undirected_plot, pos=pos, nodelist=nodes, node_color=list(mapping.values()), node_size=5, cmap=plt.cm.hsv)
dre = nx.draw_networkx_edges(undirected_plot, pos=pos, alpha=0.2)
plt.axis('off')
plt.savefig('../img/core_communities.png',bbox_inches="tight",transparent=True)
plt.close()

In [36]:
large_communities = community_info[community_info['total'] > 100].index.values

In [37]:
large_mapping = user_info[user_info['label'].isin(large_communities)][['userid','label']].set_index('userid')['label'].to_dict()
large_nodes = [u for u in undirected_core.nodes() if u in large_mapping.keys()]
undirected_plot = undirected_core.subgraph(large_nodes)

In [38]:
plt.figure(num=None, figsize=(20, 20), dpi=80)
fig = plt.figure(1)
pos = nx.kamada_kawai_layout(undirected_plot)
drn = nx.draw_networkx_nodes(undirected_plot, pos=pos, nodelist=large_nodes, node_color=list(large_mapping.values()), node_size=5, cmap=plt.cm.hsv)
dre = nx.draw_networkx_edges(undirected_plot, pos=pos, alpha=0.2)
plt.axis('off')
plt.savefig('../img/large_communities.png',bbox_inches="tight",transparent=True)
plt.close()

KeyboardInterrupt: 

In [None]:
large_nodes