In [2]:
!pip install dgl-0.3-cp37-cp37m-manylinux1_x86_64.whl

Processing ./dgl-0.3-cp37-cp37m-manylinux1_x86_64.whl
Installing collected packages: dgl
Successfully installed dgl-0.3


In [3]:
!pip uninstall -y networkx

Uninstalling networkx-2.5.1:
  Successfully uninstalled networkx-2.5.1


In [4]:
!pip install networkx-2.5-py3-none-any.whl

Processing ./networkx-2.5-py3-none-any.whl
[31mERROR: albumentations 0.1.12 has requirement imgaug<0.2.7,>=0.2.5, but you'll have imgaug 0.2.9 which is incompatible.[0m
Installing collected packages: networkx
Successfully installed networkx-2.5


In [5]:
!pip uninstall -y imgaug

Uninstalling imgaug-0.2.9:
  Successfully uninstalled imgaug-0.2.9


In [6]:
!pip install imgaug-0.2.7-py3-none-any.whl

Processing ./imgaug-0.2.7-py3-none-any.whl
[31mERROR: albumentations 0.1.12 has requirement imgaug<0.2.7,>=0.2.5, but you'll have imgaug 0.2.7 which is incompatible.[0m
Installing collected packages: imgaug
Successfully installed imgaug-0.2.7


In [7]:
!pip install tensorboardX-2.1-py2.py3-none-any.whl

Processing ./tensorboardX-2.1-py2.py3-none-any.whl
Installing collected packages: tensorboardX
Successfully installed tensorboardX-2.1


In [8]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [9]:
import os
#path="/content/drive/MyDrive"
path="/content/drive/MyDrive/Others"
#path="/content/"
os.chdir(path)
os.listdir(path)
!python utils.py
!python utils_data.py
!python utils_layers.py

In [10]:
import argparse
import json
import os
import time

import dgl.init
import numpy as np
import tensorboardX
import torch as th
import torch.nn.functional as F

import utils_data
from utils_layers import GATNet

In [21]:
args={'model':'GAT_TwoLayers',
  'dataset':'squirrel',
  'num_hidden':48,
  'num_heads_layer_one':8,
  'num_heads_layer_two':1,
  'dropout_rate':0.5,
  'learning_rate':0.05,
  'weight_decay_layer_one':5e-06,
  'weight_decay_layer_two':5e-06,
  'num_epochs_patience':100,
  'num_epochs_max':5000,
  'run_id':0,
  'dataset_split':'splits/squirrel_split_0.6_0.2_0.npz',
  'learning_rate_decay_patience':50,
  'learning_rate_decay_factor':0.8
   }
#python train_GAT.py --dataset cora --num_hidden 8 --num_heads_layer_one 8 --num_heads_layer_two 1 --weight_decay_layer_one 5e-06 --weight_decay_layer_two 5e-06 --learning_rate 0.05 --dropout_rate 0.5 --run_id 0 --dataset_split splits/cora_split_0.6_0.2_0.npz

In [None]:
import pandas as pd

if __name__ == '__main__':
    record = pd.DataFrame(columns=['Train Loss', 'Val Loss', 'Train Acc', 'Val Acc'])

    if args['dataset_split'] == 'jknet':
        g, features, labels, train_mask, val_mask, test_mask, num_features, num_labels = utils_data.load_data(
            args['dataset'], None, 0.6, 0.2)
    else:
        g, features, labels, train_mask, val_mask, test_mask, num_features, num_labels = utils_data.load_data(
            args['dataset'], args['dataset_split'], None, None)

    g.set_n_initializer(dgl.init.zero_initializer)
    g.set_e_initializer(dgl.init.zero_initializer)

    net = GATNet(num_input_features=num_features, num_output_classes=num_labels, num_hidden=args['num_hidden'],
                 dropout_rate=args['dropout_rate'], num_heads_layer_one=args['num_heads_layer_one'], num_heads_layer_two=args['num_heads_layer_two'])

    optimizer = th.optim.Adam([{'params': net.gat1.parameters(), 'weight_decay': args['weight_decay_layer_one']},
                               {'params': net.gat2.parameters(), 'weight_decay': args['weight_decay_layer_two']}],
                              lr=args['learning_rate'])
    learning_rate_scheduler = th.optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer,factor=args['learning_rate_decay_factor'],patience=args['learning_rate_decay_patience'])
    writer = tensorboardX.SummaryWriter(logdir=f'runs/{args["model"]}_{args["run_id"]}')

    features = features.to("cpu")
    labels = labels.to("cpu")
    train_mask = train_mask.to("cpu")
    val_mask = val_mask.to("cpu")
    test_mask = test_mask.to("cpu")

    # Adapted from https://github.com/PetarV-/GAT/blob/master/execute_cora.py
    patience = args['num_epochs_patience']
    vlss_mn = np.inf
    vacc_mx = 0.0
    vacc_early_model = None
    vlss_early_model = None
    state_dict_early_model = None
    curr_step = 0

    # Adapted from https://docs.dgl.ai/tutorials/models/1_gnn/1_gcn.html
    dur = []
    test_time = 0.0
    for epoch in range(500):
        t0 = time.time()

        net.train()
        train_logits = net(g, features)
        train_logp = F.log_softmax(train_logits, 1)
        train_loss = F.nll_loss(train_logp[train_mask], labels[train_mask])
        train_pred = train_logp.argmax(dim=1)
        train_acc = th.eq(train_pred[train_mask], labels[train_mask]).float().mean().item()

        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()

        net.eval()
        with th.no_grad():
            val_logits = net(g, features)
            val_logp = F.log_softmax(val_logits, 1)
            val_loss = F.nll_loss(val_logp[val_mask], labels[val_mask]).item()
            val_pred = val_logp.argmax(dim=1)
            val_acc = th.eq(val_pred[val_mask], labels[val_mask]).float().mean().item()

        learning_rate_scheduler.step(val_loss)

        dur.append(time.time() - t0)

        print(
            "Epoch {:05d} | Train Loss {:.4f} | Train Acc {:.4f} | Val Loss {:.4f} | Val Acc {:.4f} | Time(s) {:.4f}".format(
                epoch, train_loss.item(), train_acc, val_loss, val_acc, sum(dur) / len(dur)))

        writer.add_scalar('Train Loss', train_loss.item(), epoch)
        writer.add_scalar('Val Loss', val_loss, epoch)
        writer.add_scalar('Train Acc', train_acc, epoch)
        writer.add_scalar('Val Acc', val_acc, epoch)

        test_time += (sum(dur) / len(dur))
        new={'Train Loss':train_loss.item(),'Val Loss':val_loss,'Train Acc':train_acc,'Val Acc':val_acc}

        record=record.append(new,ignore_index=True)   # ignore_index=True,表示不按原来的索引，从0开始自动递增

        # Adapted from https://github.com/PetarV-/GAT/blob/master/execute_cora.py
        if val_acc >= vacc_mx or val_loss <= vlss_mn:
            if val_acc >= vacc_mx and val_loss <= vlss_mn:
                vacc_early_model = val_acc
                vlss_early_model = val_loss
                state_dict_early_model = net.state_dict()
            vacc_mx = np.max((val_acc, vacc_mx))
            vlss_mn = np.min((val_loss, vlss_mn))
            curr_step = 0
        else:
            curr_step += 1
            if curr_step >= patience:
                print()
#                break

    record.to_csv('Test_Result/test_{model}_{dataset}_{rec_time}.csv'.format(model=args["model"], dataset=args["dataset"], rec_time=test_time))
    print(test_time)

    net.load_state_dict(state_dict_early_model)
    net.eval()
    with th.no_grad():
        test_logits = net(g, features)
        test_logp = F.log_softmax(test_logits, 1)
        test_loss = F.nll_loss(test_logp[test_mask], labels[test_mask]).item()
        test_pred = test_logp.argmax(dim=1)
        test_acc = th.eq(test_pred[test_mask], labels[test_mask]).float().mean().item()
        test_hidden_features = net.gat1(g, features).cpu().numpy()

        final_train_pred = test_pred[train_mask].cpu().numpy()
        final_val_pred = test_pred[val_mask].cpu().numpy()
        final_test_pred = test_pred[test_mask].cpu().numpy()

    '''
    results_dict = dir(args)
    results_dict['test_loss'] = test_loss
    results_dict['test_acc'] = test_acc
    results_dict['actual_epochs'] = 1 + epoch
    results_dict['val_acc_max'] = vacc_mx
    results_dict['val_loss_min'] = vlss_mn
    results_dict['total_time'] = sum(dur)
    '''

    '''
    with open(os.path.join('runs', f'{args.model}_{args.run_id}_results.txt'), 'w') as outfile:
        outfile.write(json.dumps(results_dict) + '\n')
    np.savez_compressed(os.path.join('runs', f'{args.model}_{args.run_id}_hidden_features.npz'),
                        hidden_features=test_hidden_features)
    np.savez_compressed(os.path.join('runs', f'{args.model}_{args.run_id}_final_train_predictions.npz'),
                        final_train_predictions=final_train_pred)
    np.savez_compressed(os.path.join('runs', f'{args.model}_{args.run_id}_final_val_predictions.npz'),
                        final_val_predictions=final_val_pred)
    np.savez_compressed(os.path.join('runs', f'{args.model}_{args.run_id}_final_test_predictions.npz'),
                        final_test_predictions=final_test_pred)
    '''

test1
test4


  r_inv = np.power(rowsum, -1).flatten()


test5
<class 'scipy.sparse.csr.csr_matrix'>
DGLGraph(num_nodes=5201, num_edges=222134,
         ndata_schemes={}
         edata_schemes={})
<class 'dgl.graph.DGLGraph'>
<class 'dgl.graph.DGLGraph'>
test_end
<class 'dgl.graph.DGLGraph'>
Epoch 00000 | Train Loss 1.8425 | Train Acc 0.2007 | Val Loss 4.4573 | Val Acc 0.2007 | Time(s) 74.1971
Epoch 00001 | Train Loss 6.0624 | Train Acc 0.1907 | Val Loss 2.0964 | Val Acc 0.2007 | Time(s) 76.1317
Epoch 00002 | Train Loss 2.3845 | Train Acc 0.1975 | Val Loss 2.8878 | Val Acc 0.1947 | Time(s) 74.7912
Epoch 00003 | Train Loss 3.5641 | Train Acc 0.2095 | Val Loss 2.2910 | Val Acc 0.1947 | Time(s) 74.0009
Epoch 00004 | Train Loss 2.5014 | Train Acc 0.2059 | Val Loss 2.0392 | Val Acc 0.2043 | Time(s) 73.1615
Epoch 00005 | Train Loss 2.3897 | Train Acc 0.2192 | Val Loss 1.9173 | Val Acc 0.2019 | Time(s) 72.2272
Epoch 00006 | Train Loss 2.2861 | Train Acc 0.2344 | Val Loss 2.0084 | Val Acc 0.1929 | Time(s) 72.1651
Epoch 00007 | Train Loss 2.4484 | Tr