In [1]:
import itertools
import os
import os.path as osp
import pickle
import urllib
from collections import namedtuple

import numpy as np
import scipy.sparse as sp
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.nn import Linear 

from matplotlib.ticker import MultipleLocator, FormatStrFormatter
import pandas as pd
from sklearn.cluster import KMeans
from sklearn import metrics
from sklearn.metrics.cluster import adjusted_rand_score
from sklearn.metrics.cluster import normalized_mutual_info_score
# import random


%matplotlib inline


In [2]:
 def tensor_from_numpy(x, device):
    return torch.from_numpy(x).to(device)

In [3]:
# 定义模型
class HalfAutoEncoder(nn.Module):
    def __init__(self,linear5,linear6,linear7,linear8 ):
        super(HalfAutoEncoder,self).__init__()
         
        self.linear5= linear5
        self.linear6= linear6
        self.linear7= linear7
        self.linear8= linear8
    
    def forward(self, h4):
        self.h5  = F.relu(self.linear5( h4))
        self.h5 = F.normalize(self.h5)
        
        self.h6  = F.sigmoid(self.linear6( self.h5))
        self.h6 = F.normalize(self.h6)
        
        self.h7  = F.relu(self.linear7(self.h6))
        self.h7 = F.normalize(self.h7)
        
        self.h8  = self.linear8(self.h7)

        return  self.h8
 

In [4]:
# 超参数定义
LEARNING_RATE = 0
WEIGHT_DACAY = 0
EPOCHS = 1
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [21]:
feature_np = np.load("../model/feacture_np.npy")
 
tensor_x = tensor_from_numpy(feature_np , DEVICE)

model = torch.load( '../model/half_auto_encoder.pt', map_location=torch.device('cpu'))
type(model)

__main__.HalfAutoEncoder

In [6]:
node_feature = np.load('../data/X_np/X.npy')
node_feature2= np.zeros(shape=[850,1879],dtype=np.float32)
node_feature2[:,0:256]=node_feature[:, 0:256]

for i in range(0,850):
    for j in range(0,1623):
        try:
            node_feature2[i][j+256]=max(node_feature[i,256+5*j: 256+j*5+5]) 
        except:
            print(j)
            break
            
node_feature2/=node_feature2.sum(1, keepdims=True)  


In [7]:
tensor_y = tensor_from_numpy(node_feature2, DEVICE)

In [8]:
# 模型定义：Model, Loss, Optimizer

criterion=nn.MSELoss(reduction='mean')
optimizer = optim.Adam(model.parameters(), 
                       lr=LEARNING_RATE, 
                       weight_decay=WEIGHT_DACAY)




In [9]:
tensor_train_mask=[False  for i in range(0,850)]
tensor_train_mask[1]=True


In [22]:
class ZeroOneEncoder():
    def __init__(self,model,tensor_x, tensor_y):
        self.model=model
        self.tensor_x=tensor_x
        self.tensor_mask=self.tensor_x
        self.tensor_y=tensor_y
        self.zero_one_feature=np.zeros(shape=tensor_x.shape)
        self.tensor_train_mask=[False  for i in range(0,850)]
        self.criterion=nn.MSELoss(reduction='mean')
    
    def run(self):
#         for i in range(0,tensor_x.shape[0]):
        for i in range(0,5):
            
            self.tensor_train_mask[i]=True # 一次只取一条数据出来计算loss
            
            logits = self.model(self.tensor_x)  # 前向传播
            train_mask_logits = logits[self.tensor_train_mask]   # 只选择训练节点进行监督
            loss = self.criterion(train_mask_logits, tensor_y[tensor_train_mask])  #计算每一条数据的loss
            
            # 对每一条数据的特征一个个进行mask,看看哪个特征被mask以后引起loss剧烈变化,就置为1, 否则置为0

            for j in range(0,self.tensor_x.shape[1]):
                 
                self.tensor_mask[i][j]=0 #对第i条数据的第j 个特征进行 mask
                logits = self.model(self.tensor_mask)  # 前向传播
                train_mask_logits = logits[self.tensor_train_mask]   # 只选择训练节点进行监督
                mask_loss = self.criterion(train_mask_logits, tensor_y[tensor_train_mask])   #计算每一条数据的loss
                print( 'ok: ',abs(mask_loss.item()-loss.item())/loss.item() )
                if  abs(mask_loss.item()-loss.item())/loss.item() >6e-5:
#                     print( 'ok: ',mask_loss.item()-loss.item() )
                    self.zero_one_feature[i][j]=1
                
                self.tensor_mask[i][j]=self.tensor_x[i][j] #把被mask的地方还原回来
                
            self.tensor_train_mask[i]=False
            print("=====================")
            
        return self.zero_one_feature
    

In [23]:
zero_one_encoder=ZeroOneEncoder(model,tensor_x, tensor_y)

In [24]:
zero_one_feature=zero_one_encoder.run()

ok:  1.3198854999328808e-05
ok:  8.78173818421252e-05
ok:  0.00014428748306084447
ok:  2.4372885652169674e-05
ok:  5.9844808462865845e-05
ok:  4.6495966474908304e-05
ok:  6.141967184346758e-05
ok:  5.107056962808476e-05
ok:  1.5898620794646065e-05
ok:  6.824407982607509e-06
ok:  2.3847931191969098e-05
ok:  2.7672599402001878e-05
ok:  1.3198854999328808e-05
ok:  8.819234931369704e-05
ok:  0.0001442124895665301
ok:  2.4372885652169674e-05
ok:  5.9844808462865845e-05
ok:  4.702092093510888e-05
ok:  6.186963280935379e-05
ok:  5.107056962808476e-05
ok:  1.5898620794646065e-05
ok:  6.824407982607509e-06
ok:  2.3847931191969098e-05
ok:  2.7672599402001878e-05
ok:  1.312386150501444e-05
ok:  8.819234931369704e-05
ok:  0.0001442124895665301
ok:  2.444787914648404e-05
ok:  6.014478244012332e-05
ok:  4.657095996922267e-05
ok:  6.194462630366816e-05
ok:  5.099557613377039e-05
ok:  1.5973614288960432e-05
ok:  6.824407982607509e-06
ok:  2.3847931191969098e-05
ok:  2.7672599402001878e-05
ok:  1.33488

In [17]:
zero_one_feature[0:5]

array([[0., 1., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0.]])

In [None]:
feature_np.shape
from scipy.spatial import distance
a = (1, 2, 3)
b = (4, 5, 6)
dst = distance.euclidean(a, b)

In [None]:
np_adj=np.zeros(shape=[850,850])
for i in range(0,850):
    for j in range(i+1,850):
        np_adj[i][j]=distance.euclidean(feature_np[i], feature_np[j])

In [None]:
        
for i in range(1,850):
    for j in range(0,i):
        np_adj[i][j]=np_adj[j][i]
        

In [None]:
            
np_adj/=np_adj.sum(1, keepdims=True) 

In [None]:
import markov_clustering as mc
 
 
 
matrix =sp.csr_matrix(np_adj)
 


In [None]:
result = mc.run_mcl(matrix,inflation=10)           # run MCL with default parameters
clusters = mc.get_clusters(result)    # get clusters

In [None]:
clusters

In [None]:
for inflation in [i / 10 for i in range(15, 26)]:
    result = mc.run_mcl(matrix, inflation=inflation)
    clusters = mc.get_clusters(result)
    Q = mc.modularity(matrix=result, clusters=clusters)
    print("inflation:", inflation, "modularity:", Q)

In [None]:
numnodes = 3

# generate random positions as a dictionary where the key is the node id and the value
# is a tuple containing 2D coordinates
positions = {i:(random.random() * 2 - 1, random.random() * 2 - 1) for i in range(numnodes)}

# use networkx to generate the graph
network = nx.random_geometric_graph(numnodes, 0.3, pos=positions)

# then get the adjacency matrix (in sparse form)
matrix = nx.to_scipy_sparse_matrix(network)
matrix.toarray()