In [1]:
# ====================================================#
# Import Package & Setting Environment
# ====================================================#

import dgl as d
import torch as t
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
from utils import QoSGraphDataset,FeatureLookup
import logging
import numpy as np
from tqdm import *
import dgl.function as fn
log = logging.getLogger('log')
t.set_default_tensor_type(t.FloatTensor)

def Metrics(realVec, estiVec):
    realVec = np.array(realVec)
    estiVec = np.array(estiVec)

    absError = np.abs(estiVec - realVec)
    mae = np.mean(absError)
    nmae = mae / (np.sum(realVec) / absError.shape[0])
    rmse = np.linalg.norm(absError) / np.sqrt(absError.shape[0])
    relativeError = absError / realVec
    mre = np.percentile(relativeError, 50)
    npre = np.percentile(relativeError, 90)

    return np.array([mae, nmae, rmse, mre, npre])

# ====================================================#
# Experiment Setting
# ====================================================#
density = 0.025
# In[3]:


# ====================================================#
# Import Dataset & Create Feature Lookup
# ====================================================#

dataset = QoSGraphDataset('rtds.txt',type='rt')
#
trainsize = int(density * 339*5825)
testsize = len(dataset) - trainsize
trainset, testset = random_split(dataset, [trainsize, testsize])
testset, _ = random_split(testset, [100000, len(testset) - 100000])
# ====================================================#
# Create Graph
# ====================================================#
def get_predefine_graph(dataset):

    userg = d.DGLGraph()
    servg = d.DGLGraph()
    user_lookup = FeatureLookup()
    serv_lookup = FeatureLookup()
    for i in range(339):
        user_lookup.register('User',i)
    for j in range(5825):
        serv_lookup.register('Serv', j)
    for ure in dataset.data[:,3]:
        user_lookup.register('URE', ure)
    for uas in dataset.data[:, 4]:
        user_lookup.register('UAS', uas)
    for sre in dataset.data[:, 5]:
        serv_lookup.register('SRE', sre)
    for spr in dataset.data[:, 6]:
        serv_lookup.register('SPR', spr)
    for sas in dataset.data[:, 7]:
        serv_lookup.register('SAS', sas)

    print('Registered')
    userg.add_nodes(len(user_lookup))
    servg.add_nodes(len(serv_lookup))
    print('Nodes')

    for line in tqdm(dataset.data):
        uid, sid, val, ure, uas, sre, spr, sas = line

        ure = user_lookup.query_id(ure)
        if not userg.has_edges_between(uid,ure):
            userg.add_edges(uid, ure)

        uas = user_lookup.query_id(uas)
        if not userg.has_edges_between(uid,uas):
            userg.add_edges(uid, uas)

        sre = serv_lookup.query_id(sre)
        if not servg.has_edges_between(sid,sre):
            servg.add_edges(sid, sre)

        sas = serv_lookup.query_id(sas)
        if not servg.has_edges_between(sid,sas):
            servg.add_edges(sid, sas)

        spr = serv_lookup.query_id(spr)
        if not servg.has_edges_between(sid,spr):
            servg.add_edges(sid, spr)

    userg = d.add_self_loop(userg)
    userg = d.to_bidirected(userg)
    servg = d.add_self_loop(servg)
    servg = d.to_bidirected(servg)


def get_one_graph(dataset):
    extg = d.DGLGraph()
    extlookup = FeatureLookup()

    for i in range(339):
        extlookup.register('User',i)
    for j in range(5825):
        extlookup.register('Serv', j)
    for ure in dataset.data[:,2]:
        extlookup.register('RE', ure)
    for uas in dataset.data[:, 4]:
        extlookup.register('UAS', uas)

    for sre in dataset.data[:, 5]:
        extlookup.register('RE', sre)
    for spr in dataset.data[:, 6]:
        extlookup.register('SPR', spr)
    for sas in dataset.data[:, 7]:
        extlookup.register('SAS', sas)

    print('Registered')
    extg.add_nodes(len(extlookup))
    print('Nodes')

    for line in tqdm(dataset.data):
        uid, sid, val, ure, uas, sre, spr, sas = line
        uid = int(uid)
        sid = int(sid) + 339

        if not extg.has_edges_between(uid,sid):
            extg.add_edges(uid, sid)

        ure = extlookup.query_id(ure)
        if not extg.has_edges_between(uid,ure):
            extg.add_edges(uid, ure)

        uas = extlookup.query_id(uas)
        if not extg.has_edges_between(uid,uas):
            extg.add_edges(uid, uas)

        sre = extlookup.query_id(sre)
        if not extg.has_edges_between(sid,sre):
            extg.add_edges(sid, sre)

        sas = extlookup.query_id(sas)
        if not extg.has_edges_between(sid,sas):
            extg.add_edges(sid, sas)

        spr = extlookup.query_id(spr)
        if not extg.has_edges_between(sid,spr):
            extg.add_edges(sid, spr)

    extg = d.add_self_loop(extg)
    extg = d.to_bidirected(extg)

extg = d.DGLGraph()
extlookup = FeatureLookup()

for i in range(339):
    extlookup.register('User',i)
for j in range(5825):
    extlookup.register('Serv', j)
for ure in dataset.data[:,2]:
    extlookup.register('RE', ure)
for uas in dataset.data[:, 4]:
    extlookup.register('UAS', uas)

for sre in dataset.data[:, 5]:
    extlookup.register('RE', sre)
for spr in dataset.data[:, 6]:
    extlookup.register('SPR', spr)
for sas in dataset.data[:, 7]:
    extlookup.register('SAS', sas)

print('Registered')
extg.add_nodes(len(extlookup))
print('Nodes')

for line in tqdm(dataset.data):
    uid, sid, val, ure, uas, sre, spr, sas = line
    uid = int(uid)
    sid = int(sid) + 339

    if not extg.has_edges_between(uid,sid):
        extg.add_edges(uid, sid)

    ure = extlookup.query_id(ure)
    if not extg.has_edges_between(uid,ure):
        extg.add_edges(uid, ure)

    uas = extlookup.query_id(uas)
    if not extg.has_edges_between(uid,uas):
        extg.add_edges(uid, uas)

    sre = extlookup.query_id(sre)
    if not extg.has_edges_between(sid,sre):
        extg.add_edges(sid, sre)

    sas = extlookup.query_id(sas)
    if not extg.has_edges_between(sid,sas):
        extg.add_edges(sid, sas)

    spr = extlookup.query_id(spr)
    if not extg.has_edges_between(sid,spr):
        extg.add_edges(sid, spr)

extg = d.add_self_loop(extg)
extg = d.to_bidirected(extg)
# ====================================================#
# Create DataLoader
# ====================================================#
testset, _ = random_split(testset, [40000, len(testset) - 40000])
trainLoader = DataLoader(trainset, 256, shuffle=True,num_workers=6,pin_memory=True)
testLoader = DataLoader(testset, 256, shuffle=True,num_workers=6,pin_memory=True)
dataset.switch()

# ====================================================#
# Create Model
# ====================================================#
from dgl.nn import SAGEConv,GATConv

class NeuralCF(t.nn.Module):

    def __init__(self, input_size):
        super(NeuralCF, self).__init__()
        self.layers = t.nn.Sequential(
            t.nn.Linear(input_size, 128),
            t.nn.LayerNorm(128),
            t.nn.ReLU(),
            t.nn.Linear(128, 128),
            t.nn.LayerNorm(128),
            t.nn.ReLU(),
            t.nn.Linear(128, 1)
        )

    def forward(self, input):
        return self.layers(input)


class GraphSAGEConv(t.nn.Module):

    def __init__(self, graph, dim, order=3):
        super(GraphSAGEConv, self).__init__()
        self.order = order
        self.graph = graph
        self.embedding = t.nn.Parameter(t.Tensor(self.graph.number_of_nodes(), dim))
        t.nn.init.kaiming_normal_(self.embedding)
        self.graph.ndata['L0'] = self.embedding
        self.layers = t.nn.ModuleList([SAGEConv(dim, dim, aggregator_type='gcn') for _ in range(order)])
        #self.layers = t.nn.ModuleList([GATConv(dim, dim, num_heads=1) for _ in range(order)])
        self.norms = t.nn.ModuleList([t.nn.LayerNorm(dim) for _ in range(order)])
        self.acts = t.nn.ModuleList([t.nn.ReLU() for _ in range(order)])

    def forward(self,uid):
        g = self.graph
        feats = g.ndata['L0']
        for i, (layer, norm, act) in enumerate(zip(self.layers, self.norms, self.acts)):
            feats = layer(g,feats).squeeze()
            feats = norm(feats)
            feats = act(feats)
            g.ndata[f'L{i + 1}'] = feats
        embeds = g.ndata['L0'][uid]
        for i in range(self.order):
            embeds = t.cat((embeds, g.ndata[f'L{i + 1}'][uid]), dim=-1)

        return embeds

class GraphSAGEConvSG(t.nn.Module):

    def __init__(self, graph, dim, order=3):
        super(GraphSAGEConvSG, self).__init__()
        self.order = order
        self.graph = graph
        self.embedding = t.nn.Parameter(t.Tensor(self.graph.number_of_nodes(), dim))
        t.nn.init.kaiming_normal_(self.embedding)
        self.graph.ndata['L0'] = self.embedding
        self.layers = t.nn.ModuleList([SAGEConv(dim, dim, aggregator_type='gcn') for _ in range(order)])
        #self.layers = t.nn.ModuleList([GATConv(dim, dim, num_heads=1) for _ in range(order)])
        self.norms = t.nn.ModuleList([t.nn.LayerNorm(dim) for _ in range(order)])
        self.acts = t.nn.ModuleList([t.nn.ReLU() for _ in range(order)])

    def forward(self,uid,sid):
        g = self.graph
        feats = g.ndata['L0']
        for i, (layer, norm, act) in enumerate(zip(self.layers, self.norms, self.acts)):
            feats = layer(g,feats).squeeze()
            feats = norm(feats)
            feats = act(feats)
            g.ndata[f'L{i + 1}'] = feats
        user_embeds = g.ndata['L0'][uid]
        serv_embeds = g.ndata['L0'][sid+339]

        for i in range(self.order):
            user_embeds = t.cat((user_embeds, g.ndata[f'L{i + 1}'][uid]), dim=-1)
            serv_embeds = t.cat((serv_embeds, g.ndata[f'L{i + 1}'][sid+339]), dim=-1)

        return user_embeds,serv_embeds


class NeuGraphMF(t.nn.Module):

    def __init__(self, usergraph, servgraph, dim, order=3):
        super(NeuGraphMF, self).__init__()
        self.usergraph = usergraph
        self.servgraph = servgraph
        self.dim = dim
        self.order = order
        self.UserEmbedding = GraphSAGEConv(usergraph,dim, order)
        self.ServEmbedding = GraphSAGEConv(servgraph,dim, order)
        self.ncf = NeuralCF(2 * (order + 1) * dim)

    def forward(self, uid, sid):
        user_embeds = self.UserEmbedding(uid)
        serv_embeds = self.ServEmbedding(sid)
        #return t.sum(user_embeds * serv_embeds,-1).sigmoid()
        return self.ncf(t.cat((user_embeds, serv_embeds), dim=-1)).sigmoid()

class NeuGraphMF_SG(t.nn.Module):

    def __init__(self, graph, dim, order=3):
        super(NeuGraphMF_SG, self).__init__()
        self.graph = graph
        self.dim = dim
        self.order = order
        self.GlobalEmbedding = GraphSAGEConvSG(graph,dim, order)
        self.ncf = NeuralCF(2 * (order + 1) * dim)

    def forward(self, uid, sid):
        user_embeds, serv_embeds = self.GlobalEmbedding(uid,sid)
        #return t.sum(user_embeds * serv_embeds,-1).sigmoid()
        return self.ncf(t.cat((user_embeds, serv_embeds), dim=-1)).sigmoid()
cuda = t.cuda.is_available()
#model = NeuGraphMF(userg, servg, dim=32, order=2)
model = NeuGraphMF_SG(extg, dim=32, order=2)

if cuda:
    model = NeuGraphMF(userg, servg, dim=32, order=0).cuda()
    # model = PureMF(dim=64).cuda()

loss = t.nn.L1Loss().cuda()
lr = 1e-3
optimizer = t.optim.AdamW(model.parameters(),lr=lr)

for epoch in range(15):
    total_loss = 0
    if epoch % 5 == 0:
        lr /= 2
        optimizer = t.optim.AdamW(model.parameters(),lr=lr,weight_decay=1e-3)

    model.train()

    try:
        with tqdm(trainLoader) as dataiter:
            for batch in dataiter:
                optimizer.zero_grad()
                uid,sid,label = batch
                if cuda:
                    uid = uid.cuda()
                    sid = sid.cuda()
                    label = label.cuda()
                val = model(uid,sid)
                val = val.reshape(label.shape)
                trainloss = loss(val,label)
                trainloss.backward()
                optimizer.step()
                total_loss += trainloss
    except KeyboardInterrupt:
        dataiter.close()
        raise
    arr0 = np.zeros((5,))
    model.eval()
    try:
        with tqdm(testLoader) as dataiter:
            for batch in dataiter:
                uid,sid,label = batch
                if cuda:
                    uid = uid.cuda()
                    sid = sid.cuda()
                val = model(uid,sid)
                val = val.reshape(label.shape).detach().cpu().numpy()
                val,label = val * 19.9,label * 19.9
                label = label.detach().numpy()
                arr0 += Metrics(val,label)
    except KeyboardInterrupt:
        dataiter.close()
        raise
    arr0 /= len(testLoader)
    print(f'Epochs {epoch}: Loss={total_loss / len(trainLoader)}')
    print('Epochs %d: MAE:%.4f NMAE:%.4f RMSE:%.4f MRE:%.4f NPRE:%.4f' %(epoch,*arr0))


import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, init='pca', random_state=0) #使用TSNE对特征降到二维
result = tsne.fit_transform(userg.ndata['L0'][:339].detach().numpy()) #降维后的数据
plt.scatter(result[:,0],result[:,1])
plt.show()

# Attention Model


class SAGEConv(nn.Module):
    def __init__(self,
                 in_feats,
                 out_feats,
                 aggregator_type,
                 feat_drop=0.,
                 bias=True,
                 norm=None,
                 activation=None):
        super(SAGEConv, self).__init__()

        #self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats)
        self._out_feats = out_feats
        self._aggre_type = aggregator_type
        self.norm = norm
        self.feat_drop = nn.Dropout(feat_drop)
        self.activation = activation
        # aggregator type: mean/pool/lstm/gcn
        if aggregator_type == 'pool':
            self.fc_pool = nn.Linear(self._in_src_feats, self._in_src_feats)
        if aggregator_type == 'lstm':
            self.lstm = nn.LSTM(self._in_src_feats, self._in_src_feats, batch_first=True)
        if aggregator_type != 'gcn':
            self.fc_self = nn.Linear(self._in_dst_feats, out_feats, bias=bias)
        self.fc_neigh = nn.Linear(self._in_src_feats, out_feats, bias=bias)

        self.attention = nn.MultiheadAttention(dim,num_heads=1)

        self.reset_parameters()

    def reset_parameters(self):
        """Reinitialize learnable parameters."""
        gain = nn.init.calculate_gain('relu')
        if self._aggre_type == 'pool':
            nn.init.xavier_uniform_(self.fc_pool.weight, gain=gain)
        if self._aggre_type == 'lstm':
            self.lstm.reset_parameters()
        if self._aggre_type != 'gcn':
            nn.init.xavier_uniform_(self.fc_self.weight, gain=gain)
        nn.init.xavier_uniform_(self.fc_neigh.weight, gain=gain)

    def _lstm_reducer(self, nodes):
        """LSTM reducer
        NOTE(zihao): lstm reducer with default schedule (degree bucketing)
        is slow, we could accelerate this with degree padding in the future.
        """
        m = nodes.mailbox['m'] # (B, L, D)
        batch_size = m.shape[0]
        h = (m.new_zeros((1, batch_size, self._in_src_feats)),
             m.new_zeros((1, batch_size, self._in_src_feats)))
        _, (rst, _) = self.lstm(m, h)
        return {'neigh': rst.squeeze(0)}

    def forward(self, graph, feat):
        r"""Compute GraphSAGE layer.

        Parameters
        ----------
        graph : DGLGraph
            The graph.
        feat : torch.Tensor or pair of torch.Tensor
            If a torch.Tensor is given, the input feature of shape :math:`(N, D_{in})` where
            :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes.
            If a pair of torch.Tensor is given, the pair must contain two tensors of shape
            :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`.

        Returns
        -------
        torch.Tensor
            The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}`
            is size of output feature.
        """
        graph = graph.local_var()

        if isinstance(feat, tuple):
            feat_src = self.feat_drop(feat[0])
            feat_dst = self.feat_drop(feat[1])
        else:
            feat_src = feat_dst = self.feat_drop(feat)

        h_self = feat_dst

        if self._aggre_type == 'mean':
            graph.srcdata['h'] = feat_src
            graph.update_all(fn.copy_src('h', 'm'), fn.mean('m', 'neigh'))
            h_neigh = graph.dstdata['neigh']
        elif self._aggre_type == 'gcn':
            #check_eq_shape(feat)
            graph.srcdata['h'] = feat_src
            graph.dstdata['h'] = feat_dst     # same as above if homogeneous
            graph.update_all(fn.copy_src('h', 'm'), fn.sum('m', 'neigh'))
            # divide in_degrees
            degs = graph.in_degrees().to(feat_dst)
            h_neigh = (graph.dstdata['neigh'] + graph.dstdata['h']) / (degs.unsqueeze(-1) + 1)
        elif self._aggre_type == 'pool':
            graph.srcdata['h'] = F.relu(self.fc_pool(feat_src))
            graph.update_all(fn.copy_src('h', 'm'), fn.max('m', 'neigh'))
            h_neigh = graph.dstdata['neigh']
        elif self._aggre_type == 'lstm':
            graph.srcdata['h'] = feat_src
            graph.update_all(fn.copy_src('h', 'm'), self._lstm_reducer)
            h_neigh = graph.dstdata['neigh']
        else:
            raise KeyError('Aggregator type {} not recognized.'.format(self._aggre_type))

        # GraphSAGE GCN does not require fc_self.
        if self._aggre_type == 'gcn':
            rst = self.fc_neigh(h_neigh)
        else:
            rst = self.fc_self(h_self) + self.fc_neigh(h_neigh)
        # activation
        if self.activation is not None:
            rst = self.activation(rst)
        # normalization
        if self.norm is not None:
            rst = self.norm(rst)
        return rst
extg.in_degrees()


Loading Dataset..



100%|██████████| 1873745/1873745 [00:02<00:00, 637746.88it/s]


Loading Finish..

Registered
Nodes


 38%|███▊      | 717167/1873745 [19:26<31:20, 614.88it/s]


KeyError: 'Hong Kong'