## config

In [1]:
%load_ext autoreload
%autoreload 2
from semilearn import get_config
from semilearn.datasets import get_fracture
from semilearn.datasets import WeightedDistributedSampler
from semilearn.core.utils import get_data_loader
import torch
import torch.nn.functional as F
import numpy as np
from sklearn.metrics import accuracy_score, balanced_accuracy_score, confusion_matrix, roc_curve, auc, classification_report
from semilearn.nets.fusionnet import GCN, MulitDropoutGCN, load_adj
import torch
from torch import nn
from torch import optim
from semilearn.core.criterions import CELoss

config = {
    'algorithm': 'fullysupervised',
    'net': 'GCN',
    'use_pretrain': False, 
    'pretrain_path': 'https://github.com/microsoft/Semi-supervised-learning/releases/download/v.0.0.0/vit_tiny_patch2_32_mlp_im_1k_32.pth',

    # optimization configs
    'epoch': 100,  # set to 100
    'num_train_iter': 1500,  # set to 102400
    #'num_eval_iter': 300,   # set to 1024
    #'num_log_iter': 100,    # set to 256
    'optim': 'AdamW',
    'lr': 5e-4,
    'layer_decay': 0.5,
    'batch_size': 16,
    'eval_batch_size': 16,


    # dataset configs
    'data_dir': 'E:/Project/dataset',
    #'train_sampler': 'RandomSampler',

    # algorithm specific configs
    'hard_label': True,
    'uratio': 1,
    'ulb_loss_ratio': 1.0,

    # device configs
    'gpu': 0,
    'world_size': 1,
    'distributed': False,
    "num_workers": 2,}
args = get_config(config)

  from .autonotebook import tqdm as notebook_tqdm


## Dataset

In [2]:
lb_dset, ulb_dset, eval_dset = get_fracture(args=args, alg='fullysupervised', include_lb_to_ulb=True)
test_dset = None
dataset_dict = {'train_lb': lb_dset, 'train_ulb': ulb_dset, 'eval': eval_dset, 'test': test_dset}

ulb_dest_len = len(dataset_dict['train_ulb']) if dataset_dict['train_ulb'] is not None else 0
lb_dest_len = len(dataset_dict['train_lb'])
print("unlabeled data number: {}, labeled data number {}".format(ulb_dest_len, lb_dest_len))

sample = WeightedDistributedSampler(weights=[1.51,2.86],dataset=dataset_dict['train_lb'],num_replicas=1,rank=0,num_samples=args.batch_size*args.num_train_iter//args.epoch)
loader_dict = {}
loader_dict['train_lb'] = get_data_loader(args,
                                            dataset_dict['train_lb'],
                                            args.batch_size,
                                            data_sampler=sample, #args.train_sampler,
                                            num_iters=args.num_train_iter,
                                            num_epochs=args.epoch,
                                            num_workers=args.num_workers)

loader_dict['eval'] = get_data_loader(args,
                                        dataset_dict['eval'],
                                        args.eval_batch_size,
                                        # make sure data_sampler is None for evaluation
                                        data_sampler=None,
                                        num_workers=args.num_workers,
                                        drop_last=False)

DATASET: data inited
DATASET: data inited
DATASET: data inited
DATASET: data inited
Dataset lb: 240
Dataset ulb: 959
unlabeled data number: 959, labeled data number 240


AttributeError: 'Subset' object has no attribute 'targets'

In [None]:
def evaluate(model,loader_dict,adj):
    model.eval()
    eval_loader = loader_dict['eval']
    total_loss = 0.0
    total_num = 0.0
    y_true = []
    y_pred = []
    y_probs = []
    y_logits = []
    with torch.no_grad():
        for data in eval_loader:
            x = data['x_lb']
            y = data['y_lb']
            
            if isinstance(x, dict):
                x = {k: v.cuda(0) for k, v in x.items()}
            else:
                x = x.cuda(0)
            y = y.cuda(0)

            num_batch = y.shape[0]
            total_num += num_batch

            logits = model(x=data['x_lb_t'].to("cuda:0"), adj=adj.to("cuda:0"))
            #print(logits)
            #print(torch.mean(logits, dim=1))
            pred = torch.nn.functional.softmax(torch.mean(logits, dim=1), dim=1)

            loss = F.cross_entropy(pred, y, reduction='mean', ignore_index=-1)
            
            y_true.extend(y.cpu().tolist())
            y_pred.extend(torch.max(pred, dim=-1)[1].cpu().tolist())
            y_logits.append(logits.cpu().numpy())
            y_probs.extend(torch.softmax(logits, dim=-1).cpu().tolist())
            total_loss += loss.item() * num_batch
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    y_logits = np.concatenate(y_logits)
    #print(y_true, y_pred)
    top1 = accuracy_score(y_true, y_pred)
    print('acc:',top1)
    balanced_top1 = balanced_accuracy_score(y_true, y_pred)
    print('balanced_acc:',balanced_top1)

    fpr, tpr, thresholds = roc_curve(y_true, y_pred, pos_label=1)
    auc_value = auc(fpr, tpr)
    print('AUC:',auc_value)
    report = classification_report(y_true, y_pred)
    print('classification_report:\n',report)

    cf_mat = confusion_matrix(y_true, y_pred, normalize='true')
    print('confusion matrix:\n' + np.array_str(cf_mat))


In [None]:
class GCNLoss(torch.nn.Module):
    def __init__(self):
        super(GCNLoss, self).__init__()
        self.ce_loss = self.ce_loss = CELoss()
    def forward(self, logits, targets): #gt=0
        preds = torch.mean(logits, dim=1)
        preds = torch.nn.functional.softmax(preds, dim=1)
        return self.ce_loss(preds,targets,reduction='mean')

net = GCN(nfeat=1,nclass=2,nhid=[2,4],dropout=0.2)
net.train()
opt = optim.Adam(net.parameters(), lr=args.lr)
adj = load_adj(path="adj.cites", node_num=52)

In [None]:
net.to("cuda:0")
evaluate(net, loader_dict, adj)

In [None]:
epoch=1
it=1
gcnloss = GCNLoss()
net.to("cuda:0")
for epoch in range(0, args.epoch):
    print(f"epoch:{epoch}")
    # prevent the training iterations exceed args.num_train_iter
    if it >= args.num_train_iter:
        break

    for data_lb in loader_dict['train_lb']:
        # prevent the training iterations exceed args.num_train_iter
        if it >= args.num_train_iter:
            break
        out = net(x=data_lb['x_lb_t'].to("cuda:0"), adj=adj.to("cuda:0"))
        train_loss = gcnloss(out, data_lb['y_lb'].to("cuda:0"))
        train_loss.backward()
        opt.step()
        if it%args.num_eval_iter==0:
            evaluate(net, loader_dict, adj)
        it += 1
    epoch += 1