# Part II: Making the graph. 

In [8]:
import pandas as pd 
import scipy.spatial as sp 
import datetime as dttm

import random
import numpy as np 

import networkx as nx

import matplotlib.pyplot  as plt 
%pylab inline

import tqdm 
import os

# used for making edges. 
THRESHOLD = 3
from random import choices

Populating the interactive namespace from numpy and matplotlib


In [2]:
os.chdir('..')

In [3]:
DATADIR = 'data\merged_raw.csv' # where did dataPrep-I write out the merged csv? 
data =  pd.read_csv(DATADIR)

In [4]:
# we want our data grouped by users.
grpObj = data.groupby('user_id')

In [5]:
OUTDIR = 'data\graph_data'
graph_ind = 'mhealth_graph_indicator.txt' # input
edge_list = 'mhealth_a.txt' # input 
edge_weights = 'mhealth_edge_weights.txt' # input 

In [6]:
def average_slice(df_, NUM_SAMPLE = 128):
    out = []
    num_groups = df_.shape[0] // NUM_SAMPLE
    for i in range(0, df_.shape[0], NUM_SAMPLE): 
        idx = (i , min(df_.shape[0], i + NUM_SAMPLE))    
        tmp = df_.iloc[idx[0]:idx[1], :]
        averaged = pd.DataFrame(tmp.iloc[:, :23].apply(np.mean)).T
        out.append(pd.concat([averaged, tmp.iloc[:1, -3:].reset_index(drop = True)], axis = 1))
    out = pd.concat(out)
    out.index = range(out.shape[0])
    return out

In [13]:
# for coloring. assign each activity to a color. 
import matplotlib.colors as mcolors
cols = choices(list(mcolors.CSS4_COLORS.keys()), k =15)
cols_dict = {}
for i in range(1, 13):
    cols_dict[i] = cols[i]

In [15]:
user_data = {}
for user, df in grpObj:
    averaged_data = (df
                 .groupby('encoded_activity')
                 .apply(average_slice)
                 .reset_index(drop= True))
    user_data[user] = averaged_data
    dist_mat = pd.DataFrame(sp.distance_matrix(user_data[user].iloc[:, :23].values, 
                                               user_data[user].iloc[:, :23].values))
    
    G = nx.Graph() 
    for i, row in user_data[user].iterrows(): 
        G.add_nodes_from([(i+1, {'features': row[:23], 
                              'label': row['encoded_activity'], 
                              'color': cols[row['encoded_activity']]})])
    
    for idx, row in dist_mat.iterrows(): 
        tmp = row.iloc[idx: ]
        # all elements close to row. First is default by itself. 
        neighbors = list(tmp[tmp <= THRESHOLD].index)

        for each_neighbor in neighbors[1: ]: 
            G.add_edge(idx, each_neighbor, weight = row[each_neighbor])
            
    with open(os.path.join(os.path.join('data', 'processed', 'edge_list.txt')), 'w') as f :
        for line in nx.generate_edgelist(G, delimiter = ',', data = False ):
            f.writelines(line)
            f.writelines('\n')
#             counter += 1
        f.close()

In [17]:
dist_mat = pd.DataFrame(sp.distance_matrix(user_data[1].iloc[:, :23].values, 
                                               user_data[1].iloc[:, :23].values))

In [33]:
G = nx.Graph() 

In [34]:
for i, row in user_data[1].iterrows(): 
    G.add_nodes_from([(i+1, {'features': row[:23], 
                          'label': row['encoded_activity'], 
                          'color': cols[row['encoded_activity']]})])

In [None]:
colors= user_data[1]['encoded_activity'].map(cols_dict)

In [35]:
dist_mat

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,266,267,268,269,270,271,272,273,274,275
0,0.000000,0.182390,0.323675,0.211505,0.323665,0.359541,0.314119,0.333958,0.412323,0.324304,...,25.107771,15.165847,15.900592,15.810580,16.212688,14.578220,22.418102,18.626523,15.099651,22.606675
1,0.182390,0.000000,0.334771,0.209419,0.385910,0.352920,0.357649,0.377197,0.403036,0.342767,...,25.077286,15.178291,15.830610,15.819997,16.212160,14.553754,22.400886,18.582447,15.114004,22.501894
2,0.323675,0.334771,0.000000,0.308187,0.460354,0.518743,0.317782,0.455044,0.523361,0.414051,...,25.178835,15.153737,15.807030,15.819837,16.350635,14.561381,22.276077,18.636714,15.105849,22.570376
3,0.211505,0.209419,0.308187,0.000000,0.284808,0.339400,0.247000,0.272228,0.375860,0.335112,...,25.009753,15.168862,15.800612,15.790092,16.188484,14.552009,22.370875,18.518626,15.031507,22.540935
4,0.323665,0.385910,0.460354,0.284808,0.000000,0.254293,0.204699,0.186093,0.266680,0.329299,...,24.989382,15.125654,15.835608,15.720602,16.123645,14.476541,22.352610,18.558960,14.978712,22.515837
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
271,14.578220,14.553754,14.561381,14.552009,14.476541,14.498568,14.479873,14.487515,14.506142,14.442429,...,24.712192,7.103811,11.865729,14.512103,10.538065,0.000000,23.117993,16.675657,10.828523,13.927381
272,22.418102,22.400886,22.276077,22.370875,22.352610,22.320658,22.319332,22.333747,22.266724,22.437582,...,36.309581,24.581053,19.421575,14.489669,27.617785,23.117993,0.000000,26.662848,22.067460,29.066623
273,18.626523,18.582447,18.636714,18.518626,18.558960,18.557153,18.590508,18.532456,18.564404,18.626442,...,25.181584,16.382500,10.151334,18.986114,14.117875,16.675657,26.662848,0.000000,15.311213,20.639164
274,15.099651,15.114004,15.105849,15.031507,14.978712,15.057527,15.003275,14.972322,15.040205,15.057563,...,20.500158,9.530795,12.753372,10.421323,9.825003,10.828523,22.067460,15.311213,0.000000,19.845040


In [36]:
for idx, row in dist_mat.iterrows(): 
    tmp = row.iloc[idx: ]
    # all elements close to row. First is default by itself. 
    neighbors = list(tmp[tmp <= THRESHOLD].index)
    print(neighbors)
    for each_neighbor in neighbors[1: ]: 
        print('adding edge from idx: {0} to neightbor {1}'.format(idx+1, each_neighbor+1))
        G.add_edge(idx + 1, each_neighbor +1, weight = row[each_neighbor])

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
adding edge from idx: 1 to neightbor 2
adding edge from idx: 1 to neightbor 3
adding edge from idx: 1 to neightbor 4
adding edge from idx: 1 to neightbor 5
adding edge from idx: 1 to neightbor 6
adding edge from idx: 1 to neightbor 7
adding edge from idx: 1 to neightbor 8
adding edge from idx: 1 to neightbor 9
adding edge from idx: 1 to neightbor 10
adding edge from idx: 1 to neightbor 11
adding edge from idx: 1 to neightbor 12
adding edge from idx: 1 to neightbor 13
adding edge from idx: 1 to neightbor 14
adding edge from idx: 1 to neightbor 15
adding edge from idx: 1 to neightbor 16
adding edge from idx: 1 to neightbor 17
adding edge from idx: 1 to neightbor 18
adding edge from idx: 1 to neightbor 19
adding edge from idx: 1 to neightbor 20
adding edge from idx: 1 to neightbor 21
adding edge from idx: 1 to neightbor 22
adding edge from idx: 1 to neightbor 23
adding edge from idx: 1 to neightbor 24
[

In [None]:
# plt.figure(figsize=(75, 75))
# nx.draw_networkx(G, node_color = colors, pos= nx.spring_layout(G))
# plt.axis('off')
# plt.savefig('fig.png', dpi = 600)

In [45]:
for line in nx.generate_edgelist(G, delimiter = ',', data = False ):
    print(line)
    print(','.join(line.split(',')[::-1]))

1,2
2,1
1,3
3,1
1,4
4,1
1,5
5,1
1,6
6,1
1,7
7,1
1,8
8,1
1,9
9,1
1,10
10,1
1,11
11,1
1,12
12,1
1,13
13,1
1,14
14,1
1,15
15,1
1,16
16,1
1,17
17,1
1,18
18,1
1,19
19,1
1,20
20,1
1,21
21,1
1,22
22,1
1,23
23,1
1,24
24,1
2,3
3,2
2,4
4,2
2,5
5,2
2,6
6,2
2,7
7,2
2,8
8,2
2,9
9,2
2,10
10,2
2,11
11,2
2,12
12,2
2,13
13,2
2,14
14,2
2,15
15,2
2,16
16,2
2,17
17,2
2,18
18,2
2,19
19,2
2,20
20,2
2,21
21,2
2,22
22,2
2,23
23,2
2,24
24,2
3,4
4,3
3,5
5,3
3,6
6,3
3,7
7,3
3,8
8,3
3,9
9,3
3,10
10,3
3,11
11,3
3,12
12,3
3,13
13,3
3,14
14,3
3,15
15,3
3,16
16,3
3,17
17,3
3,18
18,3
3,19
19,3
3,20
20,3
3,21
21,3
3,22
22,3
3,23
23,3
3,24
24,3
4,5
5,4
4,6
6,4
4,7
7,4
4,8
8,4
4,9
9,4
4,10
10,4
4,11
11,4
4,12
12,4
4,13
13,4
4,14
14,4
4,15
15,4
4,16
16,4
4,17
17,4
4,18
18,4
4,19
19,4
4,20
20,4
4,21
21,4
4,22
22,4
4,23
23,4
4,24
24,4
5,6
6,5
5,7
7,5
5,8
8,5
5,9
9,5
5,10
10,5
5,11
11,5
5,12
12,5
5,13
13,5
5,14
14,5
5,15
15,5
5,16
16,5
5,17
17,5
5,18
18,5
5,19
19,5
5,20
20,5
5,21
21,5
5,22
22,5
5,23
23,5
5,24
24,5
6,7
7,6
6,

In [44]:
','.join(line.split(',')[::-1])

'218,216'