# StellarGraph

## Libraries

In [185]:
import pandas as pd
import geopandas as gpd
import numpy as np
import networkx as nx
import shapely as shp

## Network 

We use networkx to create our network.

In [186]:
df_tryout = gpd.read_file('../Datasets/ultimate_move_df.geojson')
df_tryout.head()

Unnamed: 0,shape_leng,shape__area,shape__length,O3_p932,O3_SOMO35,NO2_avg,weight_urb,weight_tr,NO2_avg_ut,O3_S10,...,121_150_m2,150_180_m2,above_180_m2,Income,srf_tot,srf_housin,pob_casa,pob_sale,name_celda,geometry
0,60389.288218,235584400.0,60389.288218,119.616222,8269.44141,6.193526,0.02547,0.01597,16.823389,25689.569904,...,140.0,15.0,55.0,9066.485226,1759992.0,827333.0,4868.0,1259.0,Abanilla,"POLYGON ((-1.05858 38.34556, -1.05858 38.34556..."
1,71793.715211,114928000.0,71793.715211,120.054658,8237.781754,6.520018,0.048772,0.014369,18.486018,25638.982336,...,0.0,0.0,50.0,8912.600076,2233992.0,1266719.0,11210.0,1754.0,Abarán,"POLYGON ((-1.19809 38.28787, -1.20045 38.28562..."
2,60256.648515,84220530.0,60256.648516,104.861964,3319.757143,4.628167,0.064077,0.034542,16.772488,19753.378534,...,260.0,60.0,95.0,11013.135965,1201722.0,763196.0,3149.0,2257.0,Abegondo,"POLYGON ((-8.27051 43.28381, -8.27050 43.27841..."
3,243007.987861,1270814000.0,243007.987861,119.047064,8139.331804,4.927379,0.004869,0.005272,24.036815,25490.301209,...,305.0,110.0,195.0,8783.054078,1853827.0,881982.0,3744.0,1292.0,Abenójar y otros municipios,"POLYGON ((-4.49641 38.99369, -4.48571 38.98113..."
4,23602.276713,20059990.0,23602.276713,120.82,6078.39,16.765111,0.412222,0.027119,31.8575,21904.122287,...,270.0,320.0,170.0,13371.511234,2090596.0,832182.0,9076.0,3413.0,Abrera,"POLYGON ((1.93249 41.53670, 1.93238 41.53643, ..."


In [187]:
df_tryout['tot_build_perc'] = df_tryout.srf_tot/df_tryout.shape__area
df_tryout['housing_perc'] = df_tryout.srf_housin/df_tryout.shape__area
df_tryout['shape__are'] = df_tryout.shape__area /1000000

df_tryout['pop_per_km2'] = df_tryout.tot_pop/df_tryout.shape__are
df_tryout['male_female_ratio'] = df_tryout.Male/df_tryout.Female
df_tryout['young_per_km2'] = df_tryout.sub_16_age / df_tryout.shape__are
df_tryout['middle_per_km2'] = df_tryout['16_to_64_age'] / df_tryout.shape__are
df_tryout['old_per_km2'] = df_tryout['64_more_age'] / df_tryout.shape__are

df_tryout = df_tryout.drop(['shape_leng','shape_leng','NO2_avg_ut'
                           ,'sub_16_age','16_to_64_age','64_more_age','weight_urb','weight_tr','Male',
                            'Female','srf_housin','srf_tot'], axis = 1)

df_tryout['less45m2_per_km2'] = (df_tryout['less_30_m2'] + df_tryout['30_45_m2'])/df_tryout.shape__are
df_tryout['46m2_50m2_per_km2'] = df_tryout['46_60_m2']/df_tryout.shape__are
df_tryout['61m2_90m2_per_km2'] = (df_tryout['61_75_m2'] + df_tryout['76_90_m2'] )/df_tryout.shape__are
df_tryout['more90m2_per_km2'] = ( df_tryout['91_105_m2']+df_tryout['106_120_m2'] +  df_tryout['121_150_m2'] + df_tryout['150_180_m2'] + df_tryout['above_180_m2'])/df_tryout.shape__area

df_tryout = df_tryout.drop(['less_30_m2' ,'30_45_m2','46_60_m2', '61_75_m2','76_90_m2','91_105_m2','106_120_m2','121_150_m2','150_180_m2','O3_p932', 'O3_SOMO35','O3_S10',
                            'above_180_m2','pob_casa','pob_sale','first_home', 'shape__are', 'pop_per_km2','tot_house','tot_pop','shape__area', 'shape__length','second_home','vacation_home'], axis = 1)

In [188]:
df_tryout.head()

Unnamed: 0,NO2_avg,Income,name_celda,geometry,tot_build_perc,housing_perc,male_female_ratio,young_per_km2,middle_per_km2,old_per_km2,less45m2_per_km2,46m2_50m2_per_km2,61m2_90m2_per_km2,more90m2_per_km2
0,6.193526,9066.485226,Abanilla,"POLYGON ((-1.05858 38.34556, -1.05858 38.34556...",0.007471,0.003512,1.025197,4.011302,16.957832,6.345922,0.0,0.0,3.33214,5e-06
1,6.520018,8912.600076,Abarán,"POLYGON ((-1.19809 38.28787, -1.20045 38.28562...",0.019438,0.011022,1.028571,18.9249,75.39506,19.577482,0.0,0.0,19.577482,8e-06
2,4.628167,11013.135965,Abegondo,"POLYGON ((-8.27051 43.28381, -8.27050 43.27841...",0.014269,0.009062,0.982578,7.777202,41.854405,18.047857,0.0,0.0,3.502709,9e-06
3,4.927379,8783.054078,Abenójar y otros municipios,"POLYGON ((-4.49641 38.99369, -4.48571 38.98113...",0.001459,0.000694,1.054645,0.444597,2.695122,1.306249,0.031476,0.043279,0.539024,1e-06
4,16.765111,13371.511234,Abrera,"POLYGON ((1.93249 41.53670, 1.93238 41.53643, ...",0.104217,0.041485,1.05309,116.400835,404.536522,67.54738,0.0,5.732803,94.217379,0.0001


#### Nodes

In [189]:
movements = gpd.read_file('../Datasets/movements.geojson')

In [190]:
nodes = movements.copy()
nodes = nodes[~nodes.nombre_celda_destino.isin(list(set(nodes['nombre_celda_destino']) - set(nodes['nombre_celda_origen'])))]

In [191]:
nodes_l = nodes.drop([ 'celda_destino', 'celda_origen', 'flujo', 'n_destino', 'nombre_celda_destino', 'p_pob_casa', 'pob_casa', 'pob_sale','p_pob_sale', 'pob_resid', 'geometry'], axis = 1)
nodes_l['geometry'] = movements.geometry.boundary.explode()[:,0]
nodes_l = nodes_l.drop_duplicates()
nodes_l.head()

Unnamed: 0,nombre_celda_origen,geometry
0,"Montesinos, Los y Algorfa",POINT (-0.77723 38.04169)
8,Sant Joan de Vilatorrada y otros municipios,POINT (1.74088 41.76983)
17,"Real de la Jara, El y otros municipios",POINT (-6.18708 37.77972)
19,Daimús y otros municipios,POINT (-0.14416 38.96415)
24,Formentera del Segura y otros municipios,POINT (-0.74723 38.09734)


In [192]:
df_merged = gpd.sjoin(df_tryout, nodes_l, how="inner", op='intersects')
df_merged = df_merged.drop(['name_celda', 'index_right', 'geometry'], axis = 1)
df_merged.head()

Unnamed: 0,NO2_avg,Income,tot_build_perc,housing_perc,male_female_ratio,young_per_km2,middle_per_km2,old_per_km2,less45m2_per_km2,46m2_50m2_per_km2,61m2_90m2_per_km2,more90m2_per_km2,nombre_celda_origen
0,6.193526,9066.485226,0.007471,0.003512,1.025197,4.011302,16.957832,6.345922,0.0,0.0,3.33214,5e-06,Abanilla
1,6.520018,8912.600076,0.019438,0.011022,1.028571,18.9249,75.39506,19.577482,0.0,0.0,19.577482,8e-06,Abarán
2,4.628167,11013.135965,0.014269,0.009062,0.982578,7.777202,41.854405,18.047857,0.0,0.0,3.502709,9e-06,Abegondo
3,4.927379,8783.054078,0.001459,0.000694,1.054645,0.444597,2.695122,1.306249,0.031476,0.043279,0.539024,1e-06,Abenójar y otros municipios
4,16.765111,13371.511234,0.104217,0.041485,1.05309,116.400835,404.536522,67.54738,0.0,5.732803,94.217379,0.0001,Abrera


#### Edges 

In [193]:
edge = nodes[['nombre_celda_origen', 'nombre_celda_destino','flujo']]

In [194]:
edge.columns = ['source','target','weight']
edge.head()

Unnamed: 0,source,target,weight
0,"Montesinos, Los y Algorfa",Formentera del Segura y otros municipios,107
1,"Montesinos, Los y Algorfa",Almoradí,463
2,"Montesinos, Los y Algorfa",Rojales,983
3,"Montesinos, Los y Algorfa",San Miguel de Salinas,213
4,"Montesinos, Los y Algorfa",Orihuela (distrito 05),167


### Graph

In [204]:
#Initialize the graph
G = nx.from_pandas_edgelist(edge, source='source', target= 'target', edge_attr= 'weight')

#See graph info
print('Graph Info:\n', nx.info(G))

Graph Info:
 Name: 
Type: Graph
Number of nodes: 3136
Number of edges: 16325
Average degree:  10.4114


In [205]:
node_attr = df_merged.set_index('nombre_celda_origen').to_dict('index')
nx.set_node_attributes(G, node_attr)

In [207]:
#Inspect the node features
print('\nGraph Edges weights: ', G['Rojales']['Montesinos, Los y Algorfa'])
print('\nGraph Nodes: ', G.nodes['Rojales'])


Graph Edges weights:  {'weight': 241}

Graph Nodes:  {'NO2_avg': 12.64, 'Income': 7342.00306065665, 'tot_build_perc': 0.10225941019706217, 'housing_perc': 0.07261906943101344, 'male_female_ratio': 1.0513698630136987, 'young_per_km2': 45.09258924582822, 'middle_per_km2': 394.74198085763334, 'old_per_km2': 213.82614900441123, 'less45m2_per_km2': 19.81892027336805, '46m2_50m2_per_km2': 13.636871747730309, '61m2_90m2_per_km2': 97.45817675711261, 'more90m2_per_km2': 0.0001596423119267628}


## Model

In [15]:
import os
import sys
import stellargraph as sg
from stellargraph.mapper import FullBatchNodeGenerator
from stellargraph.layer import GCN

from tensorflow.keras import layers, optimizers, losses, metrics, Model
from sklearn import preprocessing, model_selection
from IPython.display import display, HTML
import matplotlib.pyplot as plt
%matplotlib inline

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [None]:
https://colab.research.google.com/github/stellargraph/stellargraph/blob/master/demos/node-classification/gcn-node-classification.ipynb#scrollTo=0QaHvX5UL2ke

Unnamed: 0,source,target,weight
0,"Montesinos, Los y Algorfa",Formentera del Segura y otros municipios,107
1,"Montesinos, Los y Algorfa",Almoradí,463
2,"Montesinos, Los y Algorfa",Rojales,983
3,"Montesinos, Los y Algorfa",San Miguel de Salinas,213
4,"Montesinos, Los y Algorfa",Orihuela (distrito 05),167


In [60]:
edge[edge.source == 'Montesinos, Los y Algorfa']

Unnamed: 0,source,target,weight
0,"Montesinos, Los y Algorfa",Formentera del Segura y otros municipios,107
1,"Montesinos, Los y Algorfa",Almoradí,463
2,"Montesinos, Los y Algorfa",Rojales,983
3,"Montesinos, Los y Algorfa",San Miguel de Salinas,213
4,"Montesinos, Los y Algorfa",Orihuela (distrito 05),167
5,"Montesinos, Los y Algorfa",Torrevieja (distrito 01),294
6,"Montesinos, Los y Algorfa",Torrevieja (distrito 02),175
7,"Montesinos, Los y Algorfa",Torrevieja (distrito 03),105


In [61]:
edge[edge.target == 'Montesinos, Los y Algorfa']

Unnamed: 0,source,target,weight
591,Almoradí,"Montesinos, Los y Algorfa",110
2086,Rojales,"Montesinos, Los y Algorfa",241
4336,Torrevieja (distrito 01),"Montesinos, Los y Algorfa",143


In [55]:
nodes_index = df_merged.set_index('nombre_celda_origen')
nodes_index = pd.DataFrame(nodes_index)

In [81]:
a = list(nodes_index.index.values)

In [184]:
Gx = sg.StellarGraph.from_networkx(G, node_features = nodes_index) #, edge_weight_attr = "flujo")

ValueError: node_features['default']: expected feature node IDs to exactly match nodes in graph; found: missing from data ('source', 'target')

In [175]:
print(Gx.info())

StellarGraph: Undirected multigraph
 Nodes: 2622, Edges: 12289

 Node types:
  default: [2622]
    Edge types: default-default->default

 Edge types:
    default-default->default: [12289]
