In [1]:
import process
import argparse
import numpy as np

from sklearn.cluster import KMeans
from sklearn.preprocessing import normalize

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from torch.optim import Adam

import utils
from model import GAT
from evaluation import eva
import copy
import process

In [2]:
dataset = utils.get_dataset("Citeseer")[0]
dataset = utils.data_preprocessing(dataset)

In [3]:
dataset.adj_label

tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [0., 1., 0.,  ..., 0., 0., 0.],
        [0., 0., 1.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.]])

In [4]:
mydata = process.myDataSet()

generating adjacent matrix...


100%|██████████| 9183/9183 [00:00<00:00, 34867.95it/s]


loading data...
getting features - 1st step...
getting features - 2nd step...


100%|██████████| 19/19 [00:11<00:00,  1.63it/s]


scaling...
--------DONE--------


In [7]:
def pretrain(args,device,dataset):
    model = GAT(
        num_features=args.input_dim,
        hidden_size=args.hidden_size,
        embedding_size=args.embedding_size,
        alpha=args.alpha,
    ).to(device)
    print(model)
    optimizer = Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)

    # data process
    adj = dataset.adj.to(device)
    adj_label = dataset.adj_label.to(device)
    M = process.get_M(adj).to(device)

    # data and label
    x = torch.Tensor(dataset.x).to(device)
    # y = dataset.y.cpu().numpy()

    for epoch in range(args.max_epoch):
        model.train()
        A_pred, z = model(x, adj, M)
        loss = F.binary_cross_entropy(A_pred.view(-1), adj_label.view(-1))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # with torch.no_grad():
        #     _, z = model(x, adj, M)
        #     kmeans = KMeans(n_clusters=args.n_clusters, n_init=20).fit(
        #         z.data.cpu().numpy()
        #     )
        #     acc, nmi, ari, f1 = eva(y, kmeans.labels_, epoch)
        if (epoch+1) % 5 == 0:
            print('----------epoch = %d----------'%(epoch+1))
            torch.save(
                model.state_dict(), f"./pretrain/predaegc_{args.name}_{epoch}.pkl"
            )

def myPretrain():
    # mydata = process.myDataSet()
    args = argparse.Namespace()
    args.name = 'MaoXuanlin'
    args.input_dim = mydata.x.shape[1]
    args.hidden_size = 128
    args.embedding_size = 16
    args.alpha = 0.2
    args.max_epoch = 100
    args.lr = 0.001
    args.n_cluster = 6
    args.weight_decay = 5e-3
    args.cuda = torch.cuda.is_available()
    print("use cuda: {}".format(args.cuda))
    device = torch.device("cuda" if args.cuda else "cpu")
    print(args)
    pretrain(args,device,mydata)
    

In [8]:
myPretrain()

use cuda: False
Namespace(alpha=0.2, cuda=False, embedding_size=16, hidden_size=128, input_dim=304, lr=0.001, max_epoch=100, n_cluster=6, name='MaoXuanlin', weight_decay=0.005)
GAT(
  (conv1): GATLayer (304 -> 128)
  (conv2): GATLayer (128 -> 16)
)
----------epoch =	0----------
----------epoch =	5----------
----------epoch =	10----------
----------epoch =	15----------
----------epoch =	20----------
----------epoch =	25----------
----------epoch =	30----------
----------epoch =	35----------
----------epoch =	40----------
----------epoch =	45----------
----------epoch =	50----------
----------epoch =	55----------
----------epoch =	60----------
----------epoch =	65----------
----------epoch =	70----------
----------epoch =	75----------
----------epoch =	80----------
----------epoch =	85----------
----------epoch =	90----------
----------epoch =	95----------


In [5]:
mydata.x

tensor([[-0.2980, -2.1505, -1.5564,  ..., -0.0228, -0.0250, -0.0290],
        [ 0.5931, -1.4245,  0.4911,  ..., -0.0228, -0.0250, -0.0290],
        [ 0.5027, -0.2298, -0.5326,  ..., -0.0228, -0.0250, -0.0290],
        ...,
        [ 0.8150,  1.2486,  0.4911,  ..., -0.0228, -0.0250, -0.0290],
        [ 0.5027, -0.1793,  0.4911,  ..., -0.0228, -0.0250, -0.0290],
        [-0.4152, -2.5522,  0.4911,  ..., -0.0228, -0.0250, -0.0290]])

In [6]:
dataset.x

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])