In [1]:
import torch

a = torch.randn(5, 3)
a

tensor([[-3.9737e-04, -7.6730e-01,  1.3382e+00],
        [ 5.6111e-01, -3.6587e-01,  9.8626e-01],
        [ 1.0307e+00, -5.0896e-01, -6.4211e-02],
        [ 6.6230e-01, -1.3097e+00,  4.5422e-02],
        [-6.0291e-01,  4.3131e-01,  1.2166e+00]])

In [3]:
b = torch.randn(3, 3)
b

tensor([[-0.6398, -1.3036,  0.2860],
        [-1.0804,  1.4608,  0.2246],
        [-1.0245,  0.1208, -0.6985]])

In [4]:
c = []
c.append(a)
c.append(b)
c

[tensor([[-3.9737e-04, -7.6730e-01,  1.3382e+00],
         [ 5.6111e-01, -3.6587e-01,  9.8626e-01],
         [ 1.0307e+00, -5.0896e-01, -6.4211e-02],
         [ 6.6230e-01, -1.3097e+00,  4.5422e-02],
         [-6.0291e-01,  4.3131e-01,  1.2166e+00]]),
 tensor([[-0.6398, -1.3036,  0.2860],
         [-1.0804,  1.4608,  0.2246],
         [-1.0245,  0.1208, -0.6985]])]

In [10]:
import numpy as np

np.vstack(c).shape

(8, 3)

In [12]:
import numpy as np

a = [0.1, 0.3, 0.6]
a

[0.1, 0.3, 0.6]

In [14]:
a = np.array(a)
a

array([0.1, 0.3, 0.6])

In [15]:
b = []
b.append([0.1,0.2])
b.append([0.3,0.5])
b

[[0.1, 0.2], [0.3, 0.5]]

In [16]:
b = np.array(b)
b

array([[0.1, 0.2],
       [0.3, 0.5]])

In [18]:
b.transpose()

array([[0.1, 0.3],
       [0.2, 0.5]])

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
import math
import dgl
import dgl.nn as dglnn
from dgl.dataloading import DataLoader as DGLDataLoader, NeighborSampler, EdgeDataLoader, negative_sampler
import dgl.function as fn

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import os
import argparse
import random
import ast
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import datetime
import time as clock

from sklearn.preprocessing import LabelEncoder

In [3]:
# random seed
seed = 2021
random.seed(seed)
torch.manual_seed(seed)
trainval_split = 0.125

In [5]:
def prep_time(t):
    t = t[:-2]  # 去除分钟
    weekday = datetime.datetime.strptime(t[:-2], '%Y%m%d').weekday()
    if weekday >= 5:
        weekday = '1'
    else:
        weekday = '0'
    return '{}_{}'.format(weekday, t[-2:])  # 取周末/工作日；小时

In [6]:
# device 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device: {}".format(device))
if torch.cuda.device_count() > 0:
    torch.cuda.manual_seed_all(seed)

df_usage = pd.read_csv('../data/baseline_loc_time.txt', sep='\t')
df_usage['app_seq'] = df_usage['app_seq'].apply(ast.literal_eval)
df_usage['time_seq'] = df_usage['time_seq'].apply(ast.literal_eval)
df_usage['time'] = df_usage['time'].apply(lambda x: str(x))
# time的转换 [示例：0_13，周末为1，工作日为0；13表示13点]
df_usage['time'] = df_usage['time'].apply(lambda x: prep_time(x))

# encoder
user_encoder = LabelEncoder()
time_encoder = LabelEncoder()
app_encoder = LabelEncoder()
loc_encoder = LabelEncoder()

# 特征编码
user_encoder.fit(df_usage['user'].unique())
time_encoder.fit(df_usage['time'].unique())
all_apps = list(df_usage['app'].unique())
for app_seq in df_usage['app_seq']:
    all_apps.extend(app_seq)
app_encoder.fit(list(set(all_apps)))
loc_encoder.fit(df_usage['location'].unique())

stratify_seed = df_usage['user']
df_usage['user'] = user_encoder.transform(df_usage['user'])
df_usage['time'] = time_encoder.transform(df_usage['time'])
df_usage['app'] = app_encoder.transform(df_usage['app'])
df_usage['app_seq'] = df_usage['app_seq'].apply(lambda x: app_encoder.transform(x))
df_usage['location'] = loc_encoder.transform(df_usage['location'])

# 输入特征向量的维度
num_users = len(df_usage['user'].unique())
num_times = len(df_usage['time'].unique())
num_apps = len(app_encoder.classes_)
num_locs = len(df_usage['location'].unique())

print(num_users)
print(num_times)
print(num_locs)
print(num_apps)

Device: cuda
748
48
6291
1518


In [7]:
print(len(df_usage))

908770


In [None]:
# split: trian/val/test
trainval, test = train_test_split(df_usage, test_size=0.2, random_state=2021, stratify=df_usage['user'])
train, val = train_test_split(trainval, test_size=trainval_split, random_state=2021, stratify=trainval['user'])    


# 构建user-time-location-app异质图
edge_list = df_usage[['user', 'time', 'location', 'app']]

user_node = edge_list['user']
time_node = edge_list['time']
loc_node = edge_list['location']
app_node = edge_list['app']

hetero_graph = dgl.heterograph({
    ('user', 'click', 'app'): (user_node, app_node),
    ('app', 'clicked-by', 'user'): (app_node, user_node),
    ('location', 'open', 'app'): (loc_node, app_node),
    ('app', 'opened-by', 'location'): (app_node, loc_node),
})
user_node_feat = hetero_graph.nodes('user')
loc_node_feat = hetero_graph.nodes('location')
app_node_feat = hetero_graph.nodes('app')
hetero_graph.nodes['user'].data['feature'] = user_node_feat
hetero_graph.nodes['location'].data['feature'] = loc_node_feat
hetero_graph.nodes['app'].data['feature'] = app_node_feat

# 异质图转简单图
g = dgl.to_homogeneous(hetero_graph, ndata=['feature'])
g = dgl.add_self_loop(g)

In [121]:
list(test.groupby(['user', 'time']))

[((0, 8),
        user  time   app  location              app_seq               time_seq
  917      0     8   290      1432    [290, 2, 27, 290]   [4.0, 4.0, 4.0, 0.0]
  910      0     8   516      5179      [3, 20, 2, 445]   [1.0, 0.0, 0.0, 0.0]
  981      0     8   139      5232   [158, 223, 6, 242]   [1.0, 1.0, 1.0, 0.0]
  261      0     8   290      5179       [0, 290, 0, 2]   [6.0, 6.0, 5.0, 0.0]
  281      0     8   289      5179   [2, 445, 516, 241]   [0.0, 0.0, 0.0, 0.0]
  269      0     8   311      5179   [18, 23, 309, 314]   [0.0, 0.0, 0.0, 0.0]
  321      0     8   288      1242       [303, 3, 0, 3]   [1.0, 1.0, 1.0, 0.0]
  925      0     8   912      1432  [290, 74, 406, 474]   [0.0, 0.0, 0.0, 0.0]
  252      0     8     6      5232   [516, 289, 223, 6]   [0.0, 0.0, 0.0, 0.0]
  31       0     8   289      1181   [2, 445, 516, 241]   [0.0, 0.0, 0.0, 0.0]
  996      0     8   126      5232   [25, 138, 139, 20]   [0.0, 0.0, 0.0, 0.0]
  322      0     8     3      1242       [

In [114]:
test[:10]

Unnamed: 0,user,time,app,location,app_seq,time_seq
324759,520,6,20,5416,"[311, 314, 316, 18]","[0.0, 0.0, 0.0, 0.0]"
276396,482,18,388,4242,"[248, 388, 50, 388]","[4.0, 1.0, 1.0, 0.0]"
659336,698,9,311,6146,"[145, 18, 309, 316]","[0.0, 0.0, 0.0, 0.0]"
902536,742,17,3,250,"[307, 49, 581, 494]","[2.0, 2.0, 2.0, 0.0]"
246135,442,37,54,5778,"[52, 87, 307, 49]","[0.0, 0.0, 0.0, 0.0]"
483350,585,45,0,4999,"[7, 146, 424, 7]","[4.0, 4.0, 4.0, 0.0]"
367945,548,46,27,5901,"[43, 65, 7, 2]","[2.0, 1.0, 0.0, 0.0]"
446376,569,37,30,4117,"[7, 220, 30, 30]","[2.0, 2.0, 2.0, 0.0]"
425372,565,36,177,3943,"[255, 177, 757, 1106]","[1.0, 1.0, 0.0, 0.0]"
541771,623,16,544,4701,"[657, 544, 657, 525]","[1.0, 1.0, 0.0, 0.0]"


In [14]:
g

Graph(num_nodes=8557, num_edges=3643637,
      ndata_schemes={'feature': Scheme(shape=(), dtype=torch.int64), '_ID': Scheme(shape=(), dtype=torch.int64), '_TYPE': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64), '_TYPE': Scheme(shape=(), dtype=torch.int64)})

In [8]:
print(num_users)
print(num_times)
print(num_locs)
print(num_apps)

748
48
6291
1518


In [None]:
### [app, time, user]

In [18]:
print(g.ndata['feature'][:num_apps])

tensor([   0,    1,    2,  ..., 1515, 1516, 1517])


In [19]:
print(g.ndata['feature'][num_apps:num_apps+num_times])

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
        36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47])


In [11]:
print(g.ndata['feature'][num_apps+num_times:])

tensor([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
         14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,
         28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,
         42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,
         56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,
         70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,
         84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,
         98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
        112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125,
        126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
        140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153,
        154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167,
        168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 1

In [73]:
import torch
import torch.nn as nn
class GraphTFIntHgDataset(Dataset):
    def __init__(self, df):
        self.df = df

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        user = self.df.iloc[idx]['user']
        time = self.df.iloc[idx]['time']
        loc = self.df.iloc[idx]['location']
        app = self.df.iloc[idx]['app']
        app_seq = self.df.iloc[idx]['app_seq']
        return (torch.LongTensor([user]), torch.LongTensor([time]), torch.LongTensor([loc]), torch.LongTensor([app]), torch.LongTensor(app_seq))

class DotProductPredictor(nn.Module):
    def forward(self, graph, h):
        with graph.local_scope():
            graph.ndata['h'] = h
            graph.apply_edges(fn.u_dot_v('h', 'h', 'score'))
            return graph.edata['score']

class GraphTFIntHg(nn.Module):
    def __init__(self, n_users, n_times, n_locs, n_apps, hidden_dim, dim, seq_length, graph, device):
        super(GraphTFIntHg, self).__init__()

        #self.user_app_emb = nn.Linear(1, dim)
        self.utla_emb = nn.Embedding(n_users+n_times+n_locs+n_apps, dim)
        self.time_app_emb = nn.Embedding(n_times+n_apps, dim)
        self.user_emb = nn.Embedding(n_users, dim)
        self.time_emb = nn.Embedding(n_times, dim)
        self.loc_emb = nn.Embedding(n_locs, dim)
        self.app_emb = nn.Embedding(n_apps, dim)
        self.dim = dim
        self.seq_length = seq_length
        self.n_users = n_users
        self.n_times = n_times
        self.n_locs = n_locs
        self.n_apps = n_apps

        self.input_dim = self.dim * 4
        self.hidden_dim = hidden_dim
        #self.hidden_dim = self.input_dim
        self.input_layer = nn.Linear(self.input_dim, self.hidden_dim)
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=self.hidden_dim, nhead=8, batch_first=True)
        #self.encoder_layer = nn.TransformerEncoderLayer(d_model=self.hidden_dim, nhead=8, dim_feedforward=self.hidden_dim*3, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=1)
        self.time_linear = nn.Linear(self.seq_length, 1)
        self.classifier = nn.Linear(self.hidden_dim, n_apps)

        # graph section
        self.graph = graph.to(device)
        self.device = device
        self.layers = nn.ModuleList()
        # three-layer GraphSAGE-mean
        #self.layers.append(dglnn.GATConv(dim, dim, num_heads=8, allow_zero_in_degree=True))
        #self.layers.append(dglnn.SAGEConv(10, 10, 'mean'))
        #self.layers.append(dglnn.SAGEConv(10, 10, 'mean'))
        self.layers.append(dglnn.SAGEConv(dim, dim, 'mean'))
        self.layers.append(dglnn.SAGEConv(dim, dim, 'mean'))
        self.headmerge = nn.Linear(8, 1)
        self.linear = nn.Linear(10, dim)
        
        self.pred = DotProductPredictor()

    def construct_negative_graph(self, graph, k):
        src, dst = graph.edges()

        neg_src = src.repeat_interleave(k)
        neg_dst = torch.randint(0, graph.num_nodes(), (len(src) * k,)).to(self.device)
        return dgl.graph((neg_src, neg_dst), num_nodes=graph.num_nodes(), device=self.device)


    def graph_layer(self, nodes_idx_uniq):
        sg = dgl.node_subgraph(self.graph, nodes_idx_uniq)
        """
        print("--------sub graph---------")
        print(sg)
        print("--------------------------")
        """
        x = sg.srcdata['feature']

        h = self.utla_emb(x)

        for l, layer in enumerate(self.layers):
            h = layer(sg, h)
            #h = F.group_norm(h, 1)
            h = F.relu(h)

        neg_sg = self.construct_negative_graph(sg, k=5)
        pos_score = self.pred(sg, h)
        neg_score = self.pred(neg_sg, h)

        return h, pos_score, neg_score
    
    def hyperEageLoss(self, u_vec, t_vec, l_vec, a_seq_vec):
        u_vec = u_vec / u_vec.norm(dim=-1, keepdim=True)
        t_vec = t_vec / t_vec.norm(dim=-1, keepdim=True)
        l_vec = l_vec / l_vec.norm(dim=-1, keepdim=True)
        a_seq_vec = a_seq_vec / a_seq_vec.norm(dim=-1, keepdim=True)

        best_vec = u_vec + t_vec + l_vec + a_seq_vec

        hyper_score = self.cosineSim(u_vec, best_vec) + self.cosineSim(t_vec, best_vec) + self.cosineSim(l_vec, best_vec) + self.cosineSim(a_seq_vec, best_vec)

    def forward(self, users, times, locs, app_seq):
        # users [batch_size, 1]
        # times [batch_size, 1]
        # app_seq [batch_size, seq_length]
 
        # node_idx: [app, loc, time, user]
        # 获取所有node
        loc_node_idx = (self.n_apps + locs)
        time_node_idx = (self.n_apps + self.n_locs + times)
        user_node_idx = (self.n_apps + self.n_locs + self.n_times + users)
        app_nodes_idx = app_seq
        
        nodes_idx = torch.cat([user_node_idx, time_node_idx, loc_node_idx, app_nodes_idx], dim=1)
        print(nodes_idx.shape)

        nodes_idx = nodes_idx.reshape(-1)
        node_nums = nodes_idx.shape[0]  # 原始节点数目
        nodes_idx_uniq = nodes_idx.unique()

        h_g, pos_score, neg_score = self.graph_layer(nodes_idx_uniq)
        
        node_hid = h_g.shape[0]
        h_g = h_g.permute(1, 0)
        m = nn.Linear(node_hid, node_nums, device=self.device)
        h_g = m(h_g)
        h_g = h_g.permute(1, 0)

        h = self.utla_emb(nodes_idx)

        batch_size = users.size(0)
        h = h.reshape(batch_size, self.seq_length+3, self.dim)

        user_vector = h[:, 0:1, :]
        time_vector = h[:, 1:2, :]
        loc_vector = h[:, 2:3, :]
        app_seq_vector = h[:, 3:, :]

        

        """
        # [batch_size, seq_length, input_dim]
        input_vector = torch.cat([user_vector.repeat(1, self.seq_length, 1), time_vector.repeat(1, self.seq_length, 1), loc_vector.repeat(1, self.seq_length, 1), app_seq_vector], axis=2)

        
        # graph部分
        h_g = h_g.reshape(batch_size, self.seq_length+3, self.dim)
        user_vector_g = h_g[:, 0:1, :]
        time_vector_g = h_g[:, 1:2, :]
        loc_vector_g = h_g[:, 2:3, :]
        app_seq_vector_g = h_g[:, 3:, :]
        # [batch_size, seq_length, input_dim]
        input_vector_g = torch.cat([user_vector_g.repeat(1, self.seq_length, 1), time_vector.repeat(1, self.seq_length, 1), loc_vector.repeat(1, self.seq_length, 1), app_seq_vector_g], axis=2)

        x1 = self.input_layer(input_vector)
        xg = self.input_layer(input_vector_g)
        #x = input_vector
        x = self.transformer_encoder(x1)

        # 残差，加强graph sage的影响
        x = x + xg
        # 只考虑graph sage
        #x = xg
        # [B, T, H] -> [B, H, T] -> [B, H, 1]
        x = x.permute(0, 2, 1)
        x = self.time_linear(x)
        x = x.squeeze(2)
        
        #x = hidden_last.squeeze(0)
        return self.classifier(x), pos_score, neg_score
        """

In [76]:
def trainModel(name, mode, train, val, n_users, n_times, n_locs, n_apps, g, device):

    train_dataset = GraphTFIntHgDataset(train)   # (636139, 5)
    val_dataset = GraphTFIntHgDataset(val)       # (90877, 5)

    train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=1, shuffle=True)
    print('Model Training Started ...', clock.ctime())
    
    # model & optimizer
    model = GraphTFIntHg(n_users, n_times, n_locs, n_apps, 128, 50, 4, g, device)
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.0005)
    criterion = nn.CrossEntropyLoss()
    
    # train & val
    min_val_loss = np.inf
    wait = 0
    p_itr = 500
    loss_list = []
    for epoch in range(1):
        starttime = datetime.datetime.now()
        loss_sum, n = 0.0, 0
        model.train()
        for i, (user, time, loc, app, app_seq) in enumerate(train_loader):
            user = user.to(device)
            time = time.to(device)
            loc = loc.to(device)
            target = app.to(device)
            app_seq = app_seq.to(device)

            optimizer.zero_grad()
            scores, pos_score, neg_score = model(user, time, loc, app_seq) # [batch_size, num_apps]
            print(scores)
            print(pos_score)
            print(neg_score)
            break

In [39]:
MODELNAME="GraphTFIntHg"

In [77]:
trainModel(MODELNAME, 'train', train, val, num_users, num_times, num_locs, num_apps, g, device)

Model Training Started ... Sat Jan 28 17:26:40 2023
torch.Size([256, 7])
tensor([[-1.1149,  0.0734, -0.3659,  ...,  0.0364, -0.0880, -0.0453],
        [ 1.1394, -2.6206,  0.9374,  ...,  0.4893, -0.7246,  1.3873],
        [ 0.0530, -0.8164,  1.8786,  ..., -1.7048, -1.2715, -0.2885],
        ...,
        [-0.7070,  0.6386,  0.1207,  ..., -2.7060, -0.5422, -0.5834],
        [ 0.8775, -2.0123, -0.6946,  ..., -0.9615, -0.1584, -0.8044],
        [ 1.5589, -0.5905, -1.4956,  ...,  1.3281,  0.2325,  0.4997]],
       device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[75.8655],
        [75.8655],
        [75.8655],
        ...,
        [52.6835],
        [52.6835],
        [61.8915]], device='cuda:0', grad_fn=<GSDDMMBackward>)
tensor([[52.2565],
        [57.4721],
        [28.1887],
        ...,
        [36.4590],
        [43.6305],
        [22.7560]], device='cuda:0', grad_fn=<GSDDMMBackward>)


In [54]:
a = torch.tensor([[3,4], [1,2]], dtype=float)

In [56]:
b = (a * a).sum(axis=1)

In [57]:
b = pow(b, 1/2)

In [58]:
b

tensor([5.0000, 2.2361], dtype=torch.float64)

In [59]:
e = a / b
e

tensor([[0.6000, 1.7889],
        [0.2000, 0.8944]], dtype=torch.float64)

In [42]:
c = torch.tensor([1, 2])

In [45]:
c + e

tensor([[1.6000, 3.7889],
        [1.2000, 2.8944]])

In [61]:
n = a.norm(dim=-1, keepdim=True)
n

tensor([[5.0000],
        [2.2361]], dtype=torch.float64)

In [62]:
b

tensor([5.0000, 2.2361], dtype=torch.float64)

In [72]:
a/n

tensor([[0.6000, 0.8000],
        [0.4472, 0.8944]], dtype=torch.float64)

In [77]:
def cosineSim(vec1, vec2):
    norm1 = vec1 / vec1.norm(dim=-1, keepdim=True)
    norm2 = vec2 / vec2.norm(dim=-1, keepdim=True)
    return (norm1 * norm2).sum(dim=-1)

In [89]:
def hyperEageLoss(u_vec, t_vec, l_vec, a_seq_vec):
    u_vec = u_vec / u_vec.norm(dim=-1, keepdim=True)
    t_vec = t_vec / t_vec.norm(dim=-1, keepdim=True)
    l_vec = l_vec / l_vec.norm(dim=-1, keepdim=True)
    a_seq_vec = a_seq_vec / a_seq_vec.norm(dim=-1, keepdim=True)
    print(u_vec)
    print(t_vec)
    print(l_vec)
    print(a_seq_vec)
    best_vec = (u_vec + t_vec + l_vec + a_seq_vec) / 4
    print(best_vec)

    hyper_score = cosineSim(u_vec, best_vec) + cosineSim(t_vec, best_vec) + cosineSim(l_vec, best_vec) + cosineSim(a_seq_vec, best_vec)
    return hyper_score.sum() / 4

In [90]:
u = torch.tensor([1, 2], dtype=float)
t = torch.tensor([2, 3], dtype=float)
l = torch.tensor([3, 4], dtype=float)
a = torch.tensor([[4,5], [5,6], [6,7], [7,8]], dtype=float)
hyperEageLoss(u, t, l ,a)

tensor([0.4472, 0.8944], dtype=torch.float64)
tensor([0.5547, 0.8321], dtype=torch.float64)
tensor([0.6000, 0.8000], dtype=torch.float64)
tensor([[0.6247, 0.7809],
        [0.6402, 0.7682],
        [0.6508, 0.7593],
        [0.6585, 0.7526]], dtype=torch.float64)
tensor([[0.5567, 0.8268],
        [0.5605, 0.8237],
        [0.5632, 0.8214],
        [0.5651, 0.8198]], dtype=torch.float64)


tensor(3.9847, dtype=torch.float64)

In [88]:
torch.randint(0, 100, (2,2))

tensor([[84, 69],
        [33, 32]])

In [91]:
# 构建time-location-app异质图
edge_list = df_usage[['time', 'location', 'app']]

time_node = edge_list['time']
loc_node = edge_list['location']
app_node = edge_list['app']

hetero_graph = dgl.heterograph({
    ('time', 'use', 'app'): (time_node, app_node),
    ('app', 'used-by', 'time'): (app_node, time_node),
    ('time', 'visit', 'location'): (time_node, loc_node),
    ('location', 'visited-by', 'time'): (loc_node, time_node),
    ('location', 'open', 'app'): (loc_node, app_node),
    ('app', 'opened-by', 'location'): (app_node, loc_node)
})
time_node_feat = hetero_graph.nodes('time')
loc_node_feat = hetero_graph.nodes('location')
app_node_feat = hetero_graph.nodes('app')
hetero_graph.nodes['time'].data['feature'] = time_node_feat
hetero_graph.nodes['location'].data['feature'] = loc_node_feat
hetero_graph.nodes['app'].data['feature'] = app_node_feat

# 异质图转简单图
g = dgl.to_homogeneous(hetero_graph, ndata=['feature'])
g = dgl.add_self_loop(g)

In [92]:
g

Graph(num_nodes=7857, num_edges=5460477,
      ndata_schemes={'feature': Scheme(shape=(), dtype=torch.int64), '_ID': Scheme(shape=(), dtype=torch.int64), '_TYPE': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64), '_TYPE': Scheme(shape=(), dtype=torch.int64)})

In [93]:
print(num_users)
print(num_times)
print(num_locs)
print(num_apps)

748
48
6291
1518


In [None]:
### [app, loc, time]

In [96]:
print(g.ndata['feature'][:num_apps])

tensor([   0,    1,    2,  ..., 1515, 1516, 1517])


In [101]:
print(g.ndata['feature'][num_apps:num_apps+num_locs+2])

tensor([   0,    1,    2,  ..., 6290,    0,    1])


In [103]:
print(g.ndata['feature'][num_apps+num_locs:])

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
        36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47])


In [104]:
u = torch.tensor([1, 2], dtype=float)
t = torch.tensor([2, 3], dtype=float)
l = torch.tensor([3, 4], dtype=float)
a = torch.tensor([[4,5], [5,6], [6,7], [7,8]], dtype=float)

In [106]:
def cosineSim( vec1, vec2):
    norm1 = vec1 / vec1.norm(dim=-1, keepdim=True)
    norm2 = vec2 / vec2.norm(dim=-1, keepdim=True)
    return (norm1 * norm2).sum(dim=-1)

In [110]:
cosineSim(t,a)

tensor([0.9962, 0.9943, 0.9927, 0.9915], dtype=torch.float64)

In [108]:
cosineSim(t, (l + a) / 2)

tensor([0.9973, 0.9962, 0.9952, 0.9943], dtype=torch.float64)

In [112]:
cosineSim(a, (t + l) / 2).sum(-1)

tensor(3.9870, dtype=torch.float64)

In [125]:
num_apps

1518

In [126]:
torch.range(0, num_apps - 1, dtype=int)

  torch.range(0, num_apps - 1, dtype=int)


tensor([   0,    1,    2,  ..., 1515, 1516, 1517])

In [160]:
for name, group in test.groupby(['user', 'time']):
    print(name)
    target = torch.LongTensor(group['app'].to_list())
    app_seq = torch.Tensor(group['app_seq'].to_list()).reshape(-1, 4)
    time_diff_seq = torch.Tensor(group['time_seq'].to_list()).reshape(-1, 4)
    print('==================')
    #print(users)
    #print(times)
    print(target.shape)
    print(app_seq.shape)
    print(time_diff_seq)


(0, 8)
torch.Size([44])
torch.Size([44, 4])
tensor([[ 4.,  4.,  4.,  0.],
        [ 1.,  0.,  0.,  0.],
        [ 1.,  1.,  1.,  0.],
        [ 6.,  6.,  5.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 1.,  1.,  1.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 1.,  1.,  0.,  0.],
        [ 1.,  1.,  1.,  0.],
        [ 5.,  5.,  1.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 1.,  1.,  1.,  0.],
        [ 2.,  1.,  0.,  0.],
        [ 1.,  1.,  1.,  0.],
        [ 1.,  0.,  0.,  0.],
        [ 3.,  0.,  0.,  0.],
        [ 1.,  1.,  0.,  0.],
        [ 2.,  1.,  0.,  0.],
        [ 1.,  1.,  1.,  0.],
        [ 2.,  2.,  1.,  0.],
        [ 1.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 1.,  1.,  1.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 3.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0

KeyboardInterrupt: 

In [165]:
torch.exp(torch.zeros(3))

tensor([1., 1., 1.])

In [78]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
import math
import dgl
import dgl.nn as dglnn
from dgl.dataloading import DataLoader as DGLDataLoader, NeighborSampler, EdgeDataLoader, negative_sampler
import dgl.function as fn

class GNNDataset(Dataset):
    def __init__(self, df):
        self.df = df

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        user = self.df.iloc[idx]['user']
        time = self.df.iloc[idx]['time']
        loc = self.df.iloc[idx]['location']
        app = self.df.iloc[idx]['app']
        app_seq = self.df.iloc[idx]['app_seq']
        time_seq = self.df.iloc[idx]['time_seq']
        return (torch.LongTensor([user]), torch.LongTensor([time]), torch.LongTensor([loc]), torch.LongTensor([app]), torch.LongTensor(app_seq), torch.LongTensor(time_seq))


class GNN(nn.Module):
    def __init__(self, n_users, n_times, n_locs, n_apps, hidden_dim, dim, seq_length, graph, device):
        super(GNN, self).__init__()

        #self.user_app_emb = nn.Linear(1, dim)
        self.tla_emb = nn.Embedding(n_times+n_locs+n_apps, dim)
        self.user_emb = nn.Embedding(n_users, dim)
        self.time_emb = nn.Embedding(n_times, dim)
        self.loc_emb = nn.Embedding(n_locs, dim)
        self.app_emb = nn.Embedding(n_apps, dim)
        self.dim = dim
        self.seq_length = seq_length
        self.n_users = n_users
        self.n_times = n_times
        self.n_locs = n_locs
        self.n_apps = n_apps

        self.all_app_vector = torch.nan

        self.input_dim = self.dim * 4
        self.hidden_dim = hidden_dim
        #self.hidden_dim = self.input_dim

        # graph section
        self.graph = graph.to(device)
        self.device = device
        self.layers = nn.ModuleList()
        # three-layer GraphSAGE-mean
        #self.layers.append(dglnn.GATConv(dim, dim, num_heads=8, allow_zero_in_degree=True))
        #self.layers.append(dglnn.SAGEConv(10, 10, 'mean'))
        #self.layers.append(dglnn.SAGEConv(10, 10, 'mean'))
        self.layers.append(dglnn.SAGEConv(dim, dim, 'mean'))
        self.layers.append(dglnn.SAGEConv(dim, dim, 'mean'))
        self.headmerge = nn.Linear(8, 1)
        self.linear = nn.Linear(10, dim)
        

    def graph_layer(self, nodes_idx_uniq):
        sg = dgl.node_subgraph(self.graph, nodes_idx_uniq)
        """
        print("--------sub graph---------")
        print(sg)
        print("--------------------------")
        """
        x = sg.srcdata['feature']

        h = self.tla_emb(x)

        for l, layer in enumerate(self.layers):
            h = layer(sg, h)
            #h = F.group_norm(h, 1)
            h = F.relu(h)

        sg.srcdata['vec'] = h
        return h

    def cosineSim(self, vec1, vec2):
        norm1 = vec1 / vec1.norm(dim=-1, keepdim=True)
        norm2 = vec2 / vec2.norm(dim=-1, keepdim=True)
        return (norm1 * norm2).sum(dim=-1)

    def computeSim(self, h, batch_size):
        h = h.reshape(batch_size, self.seq_length+2, self.dim)
        time_vector = h[:, 0:1, :]
        loc_vector = h[:, 1:2, :]
        app_seq_vector = h[:, 2:, :]

        sim_t = self.cosineSim(time_vector, (loc_vector + app_seq_vector) / 2).sum(-1)
        sim_l = self.cosineSim(loc_vector, (time_vector + app_seq_vector) / 2).sum(-1)
        sim_a = self.cosineSim(app_seq_vector, (time_vector + loc_vector) / 2).sum(-1)
        loss = (sim_t + sim_l + sim_a)
        return loss


    def forward(self, users, times, locs, app_seq):
        # users [batch_size, 1]
        # times [batch_size, 1]
        # app_seq [batch_size, seq_length]

        # nodes_idx: [app, loc, time]
        # 获取所有node
        loc_node_idx = (self.n_apps + locs)
        time_node_idx = (self.n_apps + self.n_locs + times)
        app_nodes_idx = app_seq
        
        # time-loc-app图
        nodes_idx = torch.cat([time_node_idx, loc_node_idx, app_nodes_idx], dim=1)
        
        # print(nodes_idx.shape)
        nodes_idx = nodes_idx.reshape(-1)     
        
        batch_size = users.size(0)
        
        # 负采样
        neg_u = torch.randint(0, self.n_users, (batch_size, 1)).long().to(self.device)
        neg_t = torch.randint(0, self.n_times, (batch_size, 1)).long().to(self.device)
        neg_l = torch.randint(0, self.n_locs, (batch_size, 1)).long().to(self.device)
        neg_a = torch.randint(0, self.n_apps, (batch_size, self.seq_length)).long().to(self.device)
        # 获取所有node
        neg_loc_node_idx = (self.n_apps + neg_l)
        neg_time_node_idx = (self.n_apps + self.n_locs + neg_t)
        neg_app_nodes_idx = neg_a
        # time-loc-app负图
        neg_nodes_idx = torch.cat([neg_time_node_idx, neg_loc_node_idx, neg_app_nodes_idx], dim=1)
        neg_nodes_idx = neg_nodes_idx.reshape(-1)

        batch_size = users.size(0)

        # 正图
        pos_h = self.graph_layer(nodes_idx)
        pos_loss = self.computeSim(pos_h, batch_size)

        # 负图
        neg_h = self.graph_layer(neg_nodes_idx)
        neg_loss = self.computeSim(neg_h, batch_size)

        return pos_loss, neg_loss

    def appEmbedding(self):
        # app nodes [0, num_apps-1]
        all_app_nodes_idx = torch.range(0, self.n_apps - 1, dtype=int).to(self.device)
        self.all_app_vector = self.graph_layer(all_app_nodes_idx)
        
    def inference(self, times, locs, app_seq, time_diff_seq, beta):
        loc_node_idx = (self.n_apps + locs)
        time_node_idx = (self.n_apps + self.n_locs + times)
        app_seq_nodes_idx = app_seq

        
        # time-loc-app图
        nodes_idx = torch.cat([time_node_idx, loc_node_idx, app_seq_nodes_idx], dim=1)
        nodes_idx = nodes_idx.reshape(-1)
        pos_h = self.graph_layer(nodes_idx)

        batch_size = times.size(0)

        h = pos_h.reshape(batch_size, self.seq_length+2, self.dim)
        time_vector = h[:, 0:1, :]
        loc_vector = h[:, 1:2, :]
        app_seq_vector = h[:, 2:self.seq_length+2, :]


        minitues = 7
        time_diff = torch.exp(- time_diff_seq / minitues).unsqueeze(-1)
        loc_vec = loc_vector.repeat(1, self.seq_length, 1)
        loc_sum_vec = (time_diff * loc_vec).sum(dim=1)
        app_sum_vec = (time_diff * app_seq_vector).sum(dim=1)
        user_vec = beta * loc_sum_vec + (1 - beta) * app_sum_vec
        user_vec = user_vec.unsqueeze(1)
        print(user_vec.shape)
        print(self.all_app_vector.shape)
        scores = self.cosineSim(user_vec, self.all_app_vector)
        print(scores.shape)
        return scores


In [2]:
epoch = 1
batch_size = 32
dim = 50
seq_length = 4
hidden = 128
lr = 0.0005
seed = 2021
trainval_split = 0.125
patience = 5
alpha = 3
topk = 1
beta = 0.5

In [3]:
def prep_time(t):
    t = t[:-2]  # 去除分钟
    weekday = datetime.datetime.strptime(t[:-2], '%Y%m%d').weekday()
    if weekday >= 5:
        weekday = '1'
    else:
        weekday = '0'
    return '{}_{}'.format(weekday, t[-2:])  # 取周末/工作日；小时

In [4]:
def computeLoss(pos_loss, neg_loss):
    # 交叉熵损失
    pos_loss = torch.sigmoid(pos_loss)
    neg_loss = torch.sigmoid(-neg_loss)
    print(pos_loss)
    print(neg_loss)
    loss = (-torch.log(pos_loss) - torch.log(neg_loss)).sum()
    print(loss)
    return loss

In [5]:
def evaluateModel(model, criterion, data_loader, device):
    model.eval()
    l_sum, n = 0.0, 0
    with torch.no_grad():
        for user, time, loc, app, app_seq, time_seq in data_loader:
            user = user.to(device)
            time = time.to(device)
            loc = loc.to(device)
            target = app.to(device)
            app_seq = app_seq.to(device)

            pos_score, neg_score = model(user, time, loc, app_seq) # [batch_size, num_apps]
            l = computeLoss(pos_score, neg_score)
            #l = criterion(scores, target.view(-1))
            l_sum += l.item() * target.shape[0]
            n += target.shape[0]
        return l_sum / n

In [22]:
def predictModel(model, data_loader, device, beta):
    model.eval()
    Ks = [1, 5, 10]
    corrects = [0, 0, 0]
    with torch.no_grad():
        for user, time, loc, app, app_seq, time_seq in data_loader:
            user = user.to(device)
            time = time.to(device)
            loc = loc.to(device)
            target = app.to(device)
            app_seq = app_seq.to(device)
            time_seq = time_seq.to(device)
#===================================================================================================
            model.appEmbedding()
            scores = model.inference(time, loc, app_seq, time_seq, beta)
            for idx, k in enumerate(Ks):
                correct = torch.sum(torch.eq(torch.topk(scores, dim=1, k=k).indices, target)).item()
                corrects[idx] += correct
    return corrects

In [12]:
# random seed
random.seed(seed)
torch.manual_seed(seed)

# device 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device: {}".format(device))
if torch.cuda.device_count() > 0:
    torch.cuda.manual_seed_all(seed)

df_usage = pd.read_csv('../data/baseline_loc_time.txt', sep='\t')
df_usage['app_seq'] = df_usage['app_seq'].apply(ast.literal_eval)
df_usage['time_seq'] = df_usage['time_seq'].apply(ast.literal_eval)
df_usage['time'] = df_usage['time'].apply(lambda x: str(x))
# time的转换 [示例：0_13，周末为1，工作日为0；13表示13点]
df_usage['time'] = df_usage['time'].apply(lambda x: prep_time(x))

Device: cuda


In [79]:
def trainModel(name, mode, train, val, n_users, n_times, n_locs, n_apps, g, device, batch_size, hidden, dim, seq_length, lr, beta, epoch, patience):

    train_dataset = GNNDataset(train)   # (636139, 5)
    val_dataset = GNNDataset(val)       # (90877, 5)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
    print('Model Training Started ...', clock.ctime())
    
    # model & optimizer
    model = GNN(n_users, n_times, n_locs, n_apps, hidden, dim, seq_length, g, device)
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    
    # train & val
    min_val_loss = np.inf
    wait = 0
    p_itr = 500
    loss_list = []
    for epoch in range(epoch):
        starttime = datetime.datetime.now()
        loss_sum, n = 0.0, 0
        model.train()
        for i, (user, time, loc, app, app_seq, time_seq) in enumerate(train_loader):
            user = user.to(device)
            time = time.to(device)
            loc = loc.to(device)
            target = app.to(device)
            app_seq = app_seq.to(device)

            optimizer.zero_grad()
            pos_score, neg_score = model(user, time, loc, app_seq) # [batch_size, num_apps]
            # n类: scores表示C类的概率[c1,c2,...,cn]; target表示groundtruth的类下标
            loss = computeLoss(pos_score, neg_score)
            #loss = criterion(scores, target.view(-1))
            loss.backward()
            optimizer.step()
            loss_sum += loss.item() * target.shape[0]
            # 便于观察训练进度
            n += target.shape[0]
            if (i+1) % p_itr == 0:
                print("[TRAIN] Epoch: {} / Iter: {} Loss - {}".format(epoch+1, i+1, loss_sum/n))
        train_loss = loss_sum / n
        val_loss = evaluateModel(model, criterion, val_loader, device)
        loss_list.append([train_loss, val_loss])
        if val_loss < min_val_loss:
            wait = 0
            min_val_loss = val_loss
        else:
            wait += 1
            if wait == patience:
                print('Early stopping at epoch: %d' % epoch)
                break
        endtime = datetime.datetime.now()
        epoch_time = (endtime - starttime).seconds
        print("epoch", epoch, "time used:", epoch_time, "seconds", "train loss:", train_loss, "validation loss:", val_loss)
        val_corrects = predictModel(model, val_loader, device, beta)
        val_accs = [x/len(val) for x in val_corrects]
        print("[Train] Val: - Acc: {:.5f} / {:.5f} / {:.5f}".format(val_accs[0], val_accs[1], val_accs[2]))
    print('Model Training Ended ...', clock.ctime())

In [14]:
# encoder
user_encoder = LabelEncoder()
time_encoder = LabelEncoder()
app_encoder = LabelEncoder()
loc_encoder = LabelEncoder()

# 特征编码
user_encoder.fit(df_usage['user'].unique())
time_encoder.fit(df_usage['time'].unique())
all_apps = list(df_usage['app'].unique())
for app_seq in df_usage['app_seq']:
    all_apps.extend(app_seq)
app_encoder.fit(list(set(all_apps)))
loc_encoder.fit(df_usage['location'].unique())

stratify_seed = df_usage['user']
df_usage['user'] = user_encoder.transform(df_usage['user'])
df_usage['time'] = time_encoder.transform(df_usage['time'])
df_usage['app'] = app_encoder.transform(df_usage['app'])
df_usage['app_seq'] = df_usage['app_seq'].apply(lambda x: app_encoder.transform(x))
df_usage['location'] = loc_encoder.transform(df_usage['location'])

# 输入特征向量的维度
num_users = len(df_usage['user'].unique())
num_times = len(df_usage['time'].unique())
num_apps = len(app_encoder.classes_)
num_locs = len(df_usage['location'].unique())

# split: trian/val/test
trainval, test = train_test_split(df_usage, test_size=0.2, random_state=2021, stratify=df_usage['user'])
train, val = train_test_split(trainval, test_size=trainval_split, random_state=2021, stratify=trainval['user'])    

In [15]:
val.head()

Unnamed: 0,user,time,app,location,app_seq,time_seq
696923,698,11,220,1183,"[8, 35, 295, 1143]","[0.0, 0.0, 0.0, 0.0]"
859572,709,16,0,5791,"[517, 56, 337, 0]","[5.0, 5.0, 5.0, 0.0]"
435624,565,22,55,3943,"[1038, 54, 636, 210]","[0.0, 0.0, 0.0, 0.0]"
835299,698,15,41,1430,"[532, 1480, 0, 2]","[0.0, 0.0, 0.0, 0.0]"
776558,698,44,187,1939,"[215, 146, 318, 1]","[1.0, 1.0, 0.0, 0.0]"


In [16]:
# 构建time-location-app异质图
edge_list = df_usage[['time', 'location', 'app']]

time_node = edge_list['time']
loc_node = edge_list['location']
app_node = edge_list['app']

hetero_graph = dgl.heterograph({
    ('time', 'use', 'app'): (time_node, app_node),
    ('app', 'used-by', 'time'): (app_node, time_node),
    ('time', 'visit', 'location'): (time_node, loc_node),
    ('location', 'visited-by', 'time'): (loc_node, time_node),
    ('location', 'open', 'app'): (loc_node, app_node),
    ('app', 'opened-by', 'location'): (app_node, loc_node)
})
time_node_feat = hetero_graph.nodes('time')
loc_node_feat = hetero_graph.nodes('location')
app_node_feat = hetero_graph.nodes('app')
hetero_graph.nodes['time'].data['feature'] = time_node_feat
hetero_graph.nodes['location'].data['feature'] = loc_node_feat
hetero_graph.nodes['app'].data['feature'] = app_node_feat

# 异质图转简单图
g = dgl.to_homogeneous(hetero_graph, ndata=['feature'])
g = dgl.add_self_loop(g)

print(g)

Graph(num_nodes=7857, num_edges=5460477,
      ndata_schemes={'feature': Scheme(shape=(), dtype=torch.int64), '_ID': Scheme(shape=(), dtype=torch.int64), '_TYPE': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64), '_TYPE': Scheme(shape=(), dtype=torch.int64)})


In [17]:
train = train[:batch_size*2]
val = val[:batch_size]

In [18]:
print(train.shape)
print(val.shape)

(64, 6)
(32, 6)


In [20]:
name = 'GNN'

In [80]:
trainModel(name, 'train', train, val, num_users, num_times, num_locs, num_apps, g, device, batch_size, hidden, dim, seq_length, lr, beta, epoch, patience)

Model Training Started ... Mon Feb  6 16:06:49 2023


  return (torch.LongTensor([user]), torch.LongTensor([time]), torch.LongTensor([loc]), torch.LongTensor([app]), torch.LongTensor(app_seq), torch.LongTensor(time_seq))


tensor([0.9997, 0.9997, 0.9999, 0.9996, 0.9992, 0.9993, 0.9996, 0.9998, 0.9992,
        0.9998, 0.9998, 0.9996, 0.9998, 0.9996, 0.9992, 0.9998, 0.9998, 0.9998,
        0.9999, 0.9998, 0.9998, 0.9998, 0.9998, 0.9984, 0.9994, 0.9997, 0.9994,
        0.9993, 0.9995, 0.9997, 0.9990, 0.9996], device='cuda:0',
       grad_fn=<SigmoidBackward0>)
tensor([7.6203e-04, 6.0613e-04, 2.3356e-04, 6.4941e-04, 1.4992e-04, 2.1130e-04,
        4.8931e-04, 5.2741e-04, 4.7514e-04, 3.5034e-04, 2.0265e-04, 3.6979e-04,
        1.5281e-04, 2.4270e-04, 2.9259e-04, 1.1973e-04, 2.1015e-04, 4.6145e-04,
        1.6867e-04, 2.8021e-04, 1.7371e-04, 5.7023e-04, 6.2819e-04, 6.2275e-04,
        8.6734e-04, 8.8227e-04, 2.3166e-04, 5.3182e-04, 2.4003e-04, 2.0136e-04,
        9.5006e-05, 2.6823e-04], device='cuda:0', grad_fn=<SigmoidBackward0>)
tensor(257.2538, device='cuda:0', grad_fn=<SumBackward0>)
tensor([0.9995, 0.9995, 0.9979, 0.9980, 0.9997, 0.9995, 0.9998, 0.9992, 0.9992,
        0.9998, 0.9998, 0.9997, 0.9996, 0.9

  all_app_nodes_idx = torch.range(0, self.n_apps - 1, dtype=int).to(self.device)


torch.Size([32, 1, 50])
torch.Size([1518, 50])
torch.Size([32, 1518])
[Train] Val: - Acc: 0.03125 / 0.03125 / 0.06250
Model Training Ended ... Mon Feb  6 16:07:01 2023


In [47]:
a = torch.Tensor([[0.1595, 0.1394, 0.2471, 0.1724, 0.3183, 0.3112, 0.3317, 0.3225, 0.3444, 0.3536],
[0.3929, 0.3702, 0.5601, 0.4533, 0.597, 0.6123, 0.6289, 0.6151, 0.6657, 0.6715],
[0.5103, 0.474, 0.6844, 0.5601, 0.6966, 0.7153, 0.7325, 0.7169, 0.7703, 0.7775]
])
b = torch.Tensor([0.3564, 0.6768, 0.7835])

In [50]:
b = b.reshape(3, -1)
b.shape

torch.Size([3, 1])

In [51]:
b = b.repeat(1, 10)

In [52]:
b.shape

torch.Size([3, 10])

In [48]:
a.shape

torch.Size([3, 10])

In [54]:
((b - a) / a)*100

tensor([[123.4483, 155.6671,  44.2331, 106.7286,  11.9698,  14.5244,   7.4465,
          10.5116,   3.4843,   0.7919],
        [ 72.2576,  82.8201,  20.8356,  49.3051,  13.3668,  10.5341,   7.6165,
          10.0309,   1.6674,   0.7893],
        [ 53.5371,  65.2954,  14.4798,  39.8857,  12.4749,   9.5345,   6.9625,
           9.2900,   1.7136,   0.7717]])

In [81]:
def cosineSim(vec1, vec2):
    norm1 = vec1 / vec1.norm(dim=-1, keepdim=True)
    norm2 = vec2 / vec2.norm(dim=-1, keepdim=True)
    return (norm1 * norm2).sum(dim=-1)

In [82]:
a = torch.Tensor([1, 2])
b = torch.Tensor([-1, -2])

In [83]:
cosineSim(a, b)

tensor(-1.0000)