In [2]:
%matplotlib inline
from __future__ import division

import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd
import numpy as np
from operator import truediv
from nxpd import draw
from collections import Counter
import itertools

In [36]:
G = nx.DiGraph()

In [37]:
def insert_node(name, email, learn, teach):
    G.add_node(name, email=email, learn=learn, teach=teach)
    
    for node in G.nodes(data=True):
        learn_other = node[1]['learn']
        teach_other = node[1]['teach']
        
        common = [l for l in learn if l in teach_other]
        w = len(common)
        G.add_edge(name, node[0], weight=w, common=common)
        
        common = [l for l in teach if l in learn_other]
        w = len(common)
        G.add_edge(node[0], name, weight=w, common=common)

In [38]:
# Retrieve data from Google Sheet and parse using pandas dataframe
data = pd.read_csv("https://raw.githubusercontent.com/benelson/collaboratr/master/collaboratr.csv")
data = data.replace(np.nan,' ', regex=True)
users = data['1. What is your name?']
emails = data['2. What is your email?']
teach = data['3. What can you help others with?']
learn = data['4. What do you want to collaborate on or learn about?']

## Decision Tree

In [39]:
def find_breakouts(teach, learn):
    """
    Find subjects which deserve breakouts depending 
    on student to teach ratios.
    """
    big_list = []
    for i in range(len(teach)):
        big_list.append(teach[i].split(';'))

    big_list1 = []
    for i in range(len(learn)):
        big_list1.append(learn[i].split(';'))
        
    all_teach = list((itertools.chain.from_iterable(big_list)))
    N_teach = Counter(all_teach).values()
    
    all_learn = list((itertools.chain.from_iterable(big_list1)))
    N_learn = Counter(all_learn).values()
    
    ratios =  map(truediv,N_learn,N_teach[:-1])
    
    breakout_pct = .2
    N = len(users)
    subjects = Counter(all_teach).keys()
    isBreakout = (np.array(ratios) > 2) & (np.array(N_learn > breakout_pct*N))
    
    breakouts = np.array(subjects)[list(np.where(isBreakout)[0])]
    
    return breakouts

In [40]:
breakouts = find_breakouts(teach, learn)
breakouts

array(['GAIA DRI', 'stochastic gradient', 'gaussian processes',
       'databases', 'HTML5, CSS, javascript,GitHub pages', 'autodiff'], 
      dtype='|S35')

## Graphs

In [41]:
for u,e,l,t in zip(users, emails, learn, teach):
    insert_node(u, email=e, learn=l.split(';'), teach=t.split(';'))

In [44]:
participants = G.nodes(data=True)

In [45]:
def assign_users():
    """
    This uses LOTS of nested loops. Work to be done here.
    """
    assign = {}
    i=0
    for p in participants:
        assign_p = {}
        for l in p[1]['learn']:
            if l in breakouts:
                assign_p[l] = 'breakout'
            else:
                edges = G.edges(p[0], data=True)
                for e in edges:
                    if (l in e[2]['common']) and (l!=''):
                        assign_p[l] = e[1]
                    else: 
                        pass
        assign[p[0]] = assign_p
        i+=1
        
    return assign

In [47]:
assign = assign_users()

In [48]:
G_new = nx.DiGraph()

In [49]:
G_new.add_nodes_from(participants)

In [50]:
for p in G_new.nodes():
    mappings = assign[p]
    for m in mappings.keys():
        G_new.add_edge(p, mappings[m], label=m)

In [51]:
pos=nx.spring_layout(G_new)
draw(G_new)

'/var/folders/pc/lq5slw9x3f92zfnsfvj0n82h0000gn/T/nx_Rv4jHu.png'