In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### Initial values for smoking behavior

Take the data with the information about smoke behavior.

In [2]:
tobacco = pd.read_csv('raw_data/tobacco.csv', index_col=[0])
tobacco.replace(1,0.1,inplace=True)
tobacco.replace(2,0.5,inplace=True)
tobacco.replace(3,0.9,inplace=True)
tobacco.head(5)

Unnamed: 0,t1,t2,t3
s001,0.5,0.1,0.1
s002,0.1,0.1,0.1
s003,0.9,0.9,0.9
s004,0.1,0.1,0.1
s005,0.1,0.1,0.1


In [None]:
tobacco.ix['s048']

In [None]:
's048' in list(tobacco[pd.isnull(tobacco['t1'])].index.values)

In [3]:
# Get the list of nodes with nan values for tobacco
list_nans = list(tobacco[pd.isnull(tobacco['t1'])].index.values)
list_nans = list_nans + list(tobacco[pd.isnull(tobacco['t2'])].index.values)
list_nans = list_nans + list(tobacco[pd.isnull(tobacco['t3'])].index.values)

In [None]:
's048' in list_nans

In [4]:
status = tobacco.dropna()['t1']
status_temp = status.to_dict()
status_dict = {}
for key,value in status_temp.items():
    status_dict[key] = float(value)
#type(status_dict['s048'])

In [None]:
status_dict['s002']

### Loading the networks

The first step is to read the raw data with the friendship levels and convert them to our model's format, normalizing it.

In [5]:
friendship1 = pd.read_csv('raw_data/friendship.1.csv', index_col=[0])
friendship1.head()

Unnamed: 0,s001,s002,s003,s004,s005,s006,s007,s008,s009,s010,...,s151,s152,s153,s154,s155,s156,s157,s158,s159,s160
s001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,10,10,10,10,0.0,10,10,10
s002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,10,10,10,10,0.0,10,10,10
s003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,10,10,10,10,0.0,10,10,10
s004,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,...,0.0,0.0,10,10,10,10,0.0,10,10,10
s005,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0,...,0.0,0.0,10,10,10,10,0.0,10,10,10


In [6]:
friendship1.replace(1, 0.9, inplace=True)
friendship1.replace(2, 0.5, inplace=True)
friendship1.replace(0, 0.1, inplace=True)
friendship1.replace(10, 0, inplace=True)
friendship1.head()

Unnamed: 0,s001,s002,s003,s004,s005,s006,s007,s008,s009,s010,...,s151,s152,s153,s154,s155,s156,s157,s158,s159,s160
s001,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,...,0.1,0.1,0,0,0,0,0.1,0,0,0
s002,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,...,0.1,0.1,0,0,0,0,0.1,0,0,0
s003,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,...,0.1,0.1,0,0,0,0,0.1,0,0,0
s004,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.5,0.1,0.1,...,0.1,0.1,0,0,0,0,0.1,0,0,0
s005,0.1,0.1,0.1,0.1,0.1,0.5,0.1,0.1,0.1,0.5,...,0.1,0.1,0,0,0,0,0.1,0,0,0


#### Cleaning up the list of nans

In [7]:
# df.drop(['x','y'], axis=1)
network = friendship1.drop(list_nans).drop(list_nans, axis=1)

In [8]:
#network.isnull()
network = network.fillna(0)
#network
#tobacco[pd.isnull(tobacco['t3'])].index.values
#network[pd.isnull(network.columns)]

In [9]:
#network.ix['s004']

In [None]:
type(network.values[0][0])

### Set up

Setting up the initial values for time, number of agents

In [10]:
time = 50

### Creating the network for gephi

Creates a graph from the adjacency matrix

In [11]:
import networkx as nx

In [12]:
network_ = (network.values).astype(np.float32)
type(network_)
network_

array([[ 0.1       ,  0.1       ,  0.1       , ...,  0.1       ,
         0.1       ,  0.1       ],
       [ 0.1       ,  0.1       ,  0.1       , ...,  0.1       ,
         0.1       ,  0.1       ],
       [ 0.1       ,  0.1       ,  0.1       , ...,  0.1       ,
         0.1       ,  0.1       ],
       ..., 
       [ 0.1       ,  0.1       ,  0.1       , ...,  0.1       ,
         0.89999998,  0.1       ],
       [ 0.1       ,  0.1       ,  0.1       , ...,  0.89999998,
         0.1       ,  0.1       ],
       [ 0.1       ,  0.1       ,  0.1       , ...,  0.1       ,
         0.1       ,  0.1       ]], dtype=float32)

In [13]:
# Create the edges
np.fill_diagonal(network_, 0)
print network_ 

[[ 0.          0.1         0.1        ...,  0.1         0.1         0.1       ]
 [ 0.1         0.          0.1        ...,  0.1         0.1         0.1       ]
 [ 0.1         0.1         0.         ...,  0.1         0.1         0.1       ]
 ..., 
 [ 0.1         0.1         0.1        ...,  0.          0.89999998  0.1       ]
 [ 0.1         0.1         0.1        ...,  0.89999998  0.          0.1       ]
 [ 0.1         0.1         0.1        ...,  0.1         0.1         0.        ]]


In [14]:
# We transposed the matrix network_ because the origin and target of the edges is inverted for our purposes
g = nx.from_numpy_matrix(network_.T, create_using=nx.MultiDiGraph())

In [15]:
# Just for check if the nodes are labeled correctly
old_labels = list(g.nodes())

In [16]:
# Save the old values for checking
dict_attributes = dict(zip(old_labels, old_labels))

dictlist = [dict() for x in range(len(old_labels))]
dictActivityTimeLine = dict(zip(old_labels, dictlist))
nx.set_node_attributes(g, 'old_id', dict_attributes)
nx.set_node_attributes(g, 'activityTimeLine', dictActivityTimeLine)

#### Edges

In [None]:
# Creating a dict on the edges
dictlist2 = [(x,y,{0:g.get_edge_data(x,y).values()[0]['weight']}) for x, y in g.edges()]
weightslist = [g.get_edge_data(x,y).values()[0]['weight'] for x, y in g.edges()]

In [None]:
#weightslist
dictWeightTimeLine = dict(zip(dictlist2,weightslist))

In [None]:
dictWeightTimeLine = dict(zip(dictlist2,weightslist))
#dictlist2
dictWeightTimeLine

In [None]:
nx.set_edge_attributes(g, name='weightTimeLine', values=dictWeightTimeLine)

In [None]:
g.edges()

In [None]:
g.node[30]

In [None]:
relabel_dict = dict(zip(old_labels, network.columns.values))

In [None]:
g = nx.relabel_nodes(g, relabel_dict)

In [None]:
g.node[1]

In [None]:
g.node['s003']

In [None]:
#states = map(float, states)
#dict_states = dict(zip(old_labels, states))
nx.set_node_attributes(g, 'state', status_dict)

In [None]:
for node in g.nodes():
    state_node = g.node[node]['state']
    g.node[node]['activityTimeLine'].update({0:state_node})

In [None]:
print type(g.node['s003']['state'])
g.node['s003']

In [None]:
nx.write_gexf(g, 'graph_teen.gexf')

## Updating the values of edges and states over time
Change the values over time

In [None]:
#import edges_update as eu
import states_update as su

In [None]:
reload(su)

In [None]:
#g_old = g 
g_temp = g.copy()
for t in range(1,time):
    #g_temp = eu.edges_update(g_old, t)
    g_new = su.states_update(g_temp, t, 'logistic')
    g_temp = g_new

In [None]:
plt.figure(figsize=(20,10))
for node in g_new.nodes():
    state_tuples = g_new.node[node]['activityTimeLine'].items()
    plt.plot(*zip(*state_tuples))
    #plt.show()

In [None]:
g_new.node['s070']

In [None]:
# s004 is the source, and s007 is the target
g.get_edge_data('s004', 's007').values()[0]['weight']

In [None]:
g['s070']['s088'][0]['weight'] = 11.9

In [None]:
g['s070']['s055'][0]

In [None]:
for source, target in g.edges():
    if g.get_edge_data(source, target).values()[0]['weight'] > 0.11:
        print source, target, g.get_edge_data(source, target).values()[0]['weight']