In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
from scipy.spatial import distance
from tqdm import tqdm

import spektral
import matplotlib.pyplot as plt

from spektral.layers.convolutional import GraphSageConv, GCNConv
from spektral.layers.pooling import GlobalAttentionPool
from spektral import utils

import tensorflow as tf
# Set CPU as available physical device
#my_devices = tf.config.experimental.list_physical_devices(device_type='CPU')
#tf.config.experimental.set_visible_devices(devices= my_devices, device_type='CPU')
#tf.config.set_visible_devices([], 'CPU')

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dropout, Flatten, Concatenate
from tensorflow.keras import backend as K
from tensorflow.keras.metrics import mean_squared_error
from tensorflow.keras.utils import multi_gpu_model
from tensorflow.keras.optimizers import Adam

In [2]:
#source = '/media/storage_3/abiricz/Mobilcell/TimeIntervalGraphs/'
source = '/media/Data_storage/Mobilcell/TimeIntervalGraphs_spektral/'
source_pol = '/media/Data_storage/Mobilcell/DayPolygonData/'
files = np.array( sorted([ i for i in os.listdir(source) ]) )
files[:10]

array(['graphNN_linegraph_samples_series-3_target-1_20181203.npz',
       'graphNN_samples_series-3_target-1_20181203.npz',
       'graphNN_samples_series-3_target-1_20181204.npz',
       'graphNN_samples_series-3_target-1_20181205.npz',
       'graphNN_samples_series-3_target-1_20181206.npz',
       'graphNN_samples_series-3_target-1_20181207.npz',
       'graphNN_samples_series-3_target-1_20181210.npz',
       'graphNN_samples_series-3_target-1_20181211.npz',
       'graphNN_samples_series-3_target-1_20181212.npz',
       'graphNN_samples_series-3_target-1_20181213.npz'], dtype='<U56')

In [3]:
dates = np.unique( [ j.split('_')[4][:-4] for j in files ] )
dates

array(['20181203', '20181204', '20181205', '20181206', '20181207',
       '20181210', '20181211', '20181212', '20181213', '20181214',
       '20181217', '20181218', '20181219', '20181220', '20181221',
       '20181224', '20181225', '20181226', '20181227', '20181228',
       '20181231', '20190101', '20190102', '20190103', '20190104',
       '20190107', '20190108', '20190109', '20190110', '20190111',
       '20190114', '20190115', '20190116', '20190117', '20190118',
       '20190121', '20190122', '20190123', '20190124', '20190125',
       '20190128', '20190129', '20190130', '20190131', '20190201',
       '20190204', '20190205', '20190206', '20190207', '20190208',
       '20190211', '20190212', '20190213', '20190214', '20190215',
       '20190218', '20190219', '20190220', '20190221', '20190222',
       '20190225', '20190226', '20190227', '20190228', '20190301',
       '20190304', '20190305', '20190306', '20190307', '20190308'],
      dtype='<U8')

In [4]:
tower_info = pd.read_csv( source_pol+'fixed_merged-40_tower_locations.csv' ) ## CHANGED
sort_idx = np.argsort( tower_info.tower_id.values )
tower_info = tower_info.iloc[ sort_idx ]
tower_info.reset_index(inplace=True)

coords = np.unique( tower_info.iloc[:,2:], axis=0 )[:,1:]
coords.shape

(8622, 2)

#### Adjacency matrix as decaying distance matrix

In [5]:
adj_mat = np.exp( -distance.cdist( coords, coords, 'euclidean' ) / 20000 )
adj_mat = adj_mat*(1*adj_mat < 1e-3) # zero at 140 km away

### Ideas:
- node prediction: 
    - use node features
    
    
- link prediction the same way using line graph

- try spektral edge conditioned conv / graph conv to predict node features 
    - stack 3 consequtive steps as node and edge feature sets

### Data loader 

In [6]:
daynum = 0
loaded = np.load( source+'graphNN_samples_series-3_target-1_'+dates[daynum]+'.npz' )
list( loaded.keys() )
nfs = loaded['nf']
ef_mats = loaded['ef_mat']
nf_targets = loaded['nf_target']
node_filt = loaded['filt']
adj_mat = adj_mat[ np.ix_(node_filt, node_filt) ]
adj_mat = np.repeat( np.expand_dims(adj_mat, 0), 21, 0 )

KeyboardInterrupt: 

In [None]:
adj_mat.shape, nfs.shape, nf_targets.shape, ef_mats.shape

### ML

In [None]:
# Parameters
N = nfs.shape[-2]          # Number of nodes in the graphs
F = nfs.shape[-1]          # Node features dimensionality
#N_line = nf_train_line.shape[-2]          # Number of nodes in the graphs
#F_line = nf_train_line.shape[-1]          # Node features dimensionality
# Dimensionalities of the targets
n_out = nf_targets.shape[-2]
learning_rate = 1e-3      # Learning rate for SGD
epochs = 10              # Number of training epochs
batch_size = 1         # Batch size
es_patience = 5           # Patience fot early stopping

In [None]:
# Model definition
nf_in = Input(shape=(N, F))
adj_in = Input(shape=(N, N))

gc0 = GCNConv( 32, activation='relu')([nf_in, adj_in])
gc0 = GCNConv( 32, activation='relu')([gc0, adj_in])
gc0 = GCNConv( 1, activation='relu')([gc0, adj_in])

outp = Flatten()(gc0)
#outp = GlobalAttentionPool()(gc0)

In [None]:
# Build model
model = Model( inputs=[nf_in, adj_in], outputs=outp )
#model = multi_gpu_model(model, 2)
optimizer = Adam( lr=learning_rate, amsgrad=True, decay=1e-6 )
model.compile( optimizer=optimizer, loss='mae' )
model.summary()

In [None]:
# Train model
# [nf_in, adj_in, ef_in, dm_in, cm_in]
history = model.fit([nfs, adj_mat],
          nf_targets,
          batch_size=batch_size,
          validation_split=0.1,
          epochs=epochs)