# Hotel 50-k Graph Construction
In this notebook we will construct the two graphs for our supervised and unsupervised data for the Hotel-50k dataset.

In [1]:
import numpy as np
import pandas as pd
import torch as th
import dgl
import scipy
import networkx as nx

Using backend: pytorch


In [15]:
#load in our data - ~1mil rows per dataset
sup_neighbors = np.load('hotel50k_graph_data/neighbors_sup.npy')
unsup_neighbors = np.load('hotel50k_graph_data/neighbors_unsup.npy')
node_names = np.load('hotel50k_graph_data/train_images_name.npy')
node_features = np.load('hotel50k_graph_data/db_vectors.npy')

## Graph Construction

In [3]:
#initialize the number of nodes and neighbors as two variables for ease of reference
#these dimensions are the same for the sup and unsup matrices
n = unsup_neighbors.shape[0]
m = unsup_neighbors.shape[1]
index = list(range(n))

### Unuspervised Graph

In [4]:
#use pandas to create an adjacency list as a dict of lists --> {0:[76,23,90], ...}
unsup_df = pd.DataFrame(data = unsup_neighbors, index=index)

#filter from 100 neighbors to 25 neighbors
unsup_df = unsup_df.iloc[:,0:25]
unsup_adj_list = unsup_df.T.to_dict('list')

In [5]:
#construct unsupervised dgl graph with intermediate networkx graph representation
unsup_dgl_graph = dgl.from_networkx(nx.from_dict_of_lists(unsup_adj_list,nx.DiGraph))

### Supervised Graph

In [6]:
#use pandas to create an adjacency list as a dict of lists --> {0:[76,23,90], ...}
sup_df = pd.DataFrame(data = sup_neighbors, index=index)

#filter from 100 neighbors to 25 neighbors
sup_df = sup_df.iloc[:,0:25]
sup_adj_list = sup_df.T.to_dict('list')

In [7]:
#some classes do not have 25 neighbors and have a -1 --> problem
#filter out the -1s in our adjacency matrix before constructing graph
for k,v in sup_adj_list.items():
    sup_adj_list[k] = [x for x in v if x!= -1]

In [8]:
#construct supervised dgl graph with intermediate networkx graph representation
sup_dgl_graph = dgl.from_networkx(nx.from_dict_of_lists(sup_adj_list, nx.DiGraph))

### Save Graphs

In [9]:
#save each graph object
from dgl.data.utils import save_graphs
graph_labels = {"glabel": th.tensor([0,1])}
save_graphs("./data.bin", [unsup_dgl_graph, sup_dgl_graph], graph_labels)

In [11]:
from dgl.data.utils import load_graphs
glist, label_dict = load_graphs("./data.bin")

In [12]:
glist

[Graph(num_nodes=1085862, num_edges=27146550,
       ndata_schemes={}
       edata_schemes={}),
 Graph(num_nodes=1085862, num_edges=27146400,
       ndata_schemes={}
       edata_schemes={})]

### Add Features and Labels to Graphs

In [32]:
#pass in features to graph as a tensor
#features extracted via MobileNet
unsup_dgl_graph.ndata['features'] = th.tensor(node_features)
sup_dgl_graph.ndata['features'] = th.tensor(node_features)

In [38]:
node_names

array(['82/87997/traffickcam/3908818.jpg',
       '82/87997/travel_website/6546706.jpg',
       '82/87997/travel_website/6546695.jpg', ...,
       '88/34589/travel_website/7311681.jpg',
       '88/34589/travel_website/7311687.jpg',
       '88/34589/travel_website/7311689.jpg'], dtype='<U37')

In [45]:
#extract node labels from image string
temp_node_names = node_names.tolist()
temp_node_names = [int(s.split('/')[0]) for s in temp_node_names]

set(temp_node_names)

{-1,
 0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92}

In [44]:
temp = np.array(temp_node_names)
temp[temp<0]

array([-1, -1, -1, ..., -1, -1, -1])

In [46]:
node_names

array(['82/87997/traffickcam/3908818.jpg',
       '82/87997/travel_website/6546706.jpg',
       '82/87997/travel_website/6546695.jpg', ...,
       '88/34589/travel_website/7311681.jpg',
       '88/34589/travel_website/7311687.jpg',
       '88/34589/travel_website/7311689.jpg'], dtype='<U37')