# Generate data files to be ingested by the viz

In [50]:
import networkx as nx
from itertools import combinations
import json

In [95]:
users = pd.read_csv('data/processed/users.csv').set_index('Id')
transactions = pd.read_csv('data/processed/transactions.csv')

users['date_created'] = pd.to_datetime(users['date_created'])
users['date_created_str'] = users.date_created.dt.strftime("%b '%y")
users['num_from'] = transactions.groupby('from').size()
users['num_to'] = transactions.groupby('to').size()

transactions['created_time'] = pd.to_datetime(transactions['created_time'])

## Construct the graph

In [40]:
crawled_users = list(users.query('is_crawled').index)

In [41]:
adj_mat = transactions[(transactions['from'].isin(crawled_users)) &
             (transactions['to'].isin(crawled_users))] \
    .loc[:,['from', 'to']] \
    .pivot_table(index='from', columns='to', aggfunc=len) \
    .fillna(0)

In [26]:
def count_transacts(x, y):
    try:
        return adj_mat.loc[x,y] + adj_mat.loc[y,x]
    except KeyError:
        return 0

In [100]:
%%time
g = nx.Graph()
g.add_nodes_from(crawled_users)

for x, y in combinations(crawled_users, 2):
    transacts = count_transacts(x,y)
    if transacts > 0:
        g.add_edge(x, y, weight=transacts)

CPU times: user 11 s, sys: 93.6 ms, total: 11.1 s
Wall time: 11.2 s


In [35]:
def compute_size(n):
    if n == user:
        return 10
    elif n in special:
        return 8
    else:
        return 5

def get_group(n):
    if n == user:
        return 1
    elif n in special:
        return 2
    else:
        return 3

In [36]:
def get_link_transactions(e):
    return transactions[((transactions['to'] == e[0]) & (transactions['from'] == e[1]))
                       | ((transactions['to'] == e[1]) & (transactions['from'] == e[0]))]

In [102]:
for user in tqdm(crawled_users):
    dft = transactions[(transactions['to'] == user) | (transactions['from'] == user)]
    special = set(dft['from']) | set(dft['to'])

    draw_g = nx.ego_graph(g, user, radius=2)

    d = {'nodes' : [{'id': n, 
                     'group' : get_group(n), 
                     'name' : users.at[n, 'name'], 
                 'created_at' : users.at[n, 'date_created_str'],
                     'num_to' : users.at[n, 'num_to'],
                     'num_from': users.at[n, 'num_from'],
                     'username': users.at[n,"username"], 
                     'size' : compute_size(n)} for n in draw_g.nodes()],
         'links' : [{'id': str(min(e[0], e[1])) + str(max(e[0], e[1])),
                     'source': e[0], 
                     'target': e[1], 
                     'value' : count_transacts(*e),
                     'transactions': [{
                         'id' : i,
                         'caption' : row['message'],
                         'created_time': row['created_time'].strftime("%b %-d, '%y")
                          } for i,row in get_link_transactions(e).iterrows()]}
                    for e in draw_g.edges()],
         'center': user
    }

    with open('site/graph.json', 'w') as f:
        json.dump(d, f)

    with open('site/graphs/%s.json' % user, 'w') as f:
        json.dump(d, f)




KeyboardInterrupt: 

In [97]:
1858485 in g.nodes()

False