## In this notebook, we are going to show you the embeddings before and after training,
## including the initial embed(before graph embed), graph embed(before MLP) and MLP embed(before output)

### You can also run tSNE_citation.py under the same folder 
### If you want to test on graph datasets, you can also run tSNE_tudataset.py 
### Ideally you do not need to add command line parameters since the program will generate results on given test cases. 

In [22]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import os.path as osp
import statistics
import matplotlib.colors as colors

import torch
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
import torch.nn.functional as F

from optimal_R import option, all_possible_concatenation
from graph_property import G_property, binning
from model.GNN import Net, debug_MLP
from utils import max_len_arr

def train():
    model.train()
    optimizer.zero_grad()
    F.nll_loss(model(data)[data.train_mask], data.y[data.train_mask]).backward()
    optimizer.step()

def test():
    model.eval()
    logits, accs = model(data), []
    for _, mask in data('train_mask', 'val_mask', 'test_mask'):
        pred = logits[mask].max(1)[1]
        acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
        accs.append(acc)
    return accs

In [38]:
path = osp.join('/home/jiaqing/桌面/Fea2Fea/data/')
test_case = [(2, 1),(1, 3)]

dataset_name = ['Cora', 'PubMed', 'Citeseer']
for dataset in dataset_name:
    d_name = dataset
    dataset = Planetoid(path, name = dataset, transform=T.NormalizeFeatures())
    data = dataset[0]
    path = r'/home/jiaqing/桌面/Fea2Fea/Result/Planetoid/'
    name = path + d_name + '_property.txt'
    property_file = pd.read_csv(name, sep = '\t')
    for (j, i) in test_case:
        print(i,j)
        # find optimal graph embedding method according to each
        # input graph feature and output graph feature
        tmp_txt = pd.read_csv(path + d_name + '_optimal_method.txt', sep = '\t', header = None) # array
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        embedding = 0
        best_val_acc = test_acc = 0
        t = 0
        train_accu_plot = []
        epoch_plot = []
        print(tmp_txt)
        print(tmp_txt[1][2])
        # take the optimal embedding method as graph embedding
        model = Net(embedding=tmp_txt[i][j]).to(device) if tmp_txt[i][j] != 'MLP' else debug_MLP().to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.03, weight_decay=5e-4)

        property_i = np.array(property_file.iloc[:,[i]])
        data.x = torch.tensor(property_i).float()

        property_j = np.array(property_file.iloc[:,[j]])
        tmp = binning(property_j, k = 6, data_len = len(data.y))
        data.y = binning(property_j, k = 6, data_len = len(data.y))
        data =  data.to(device)
        for epoch in range(1, 3000):   
            train()
            train_acc, val_acc, tmp_test_acc = test()
            #train_accu_plot.append(train_acc)
            #epoch_plot.append(epoch)
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                test_acc = tmp_test_acc
                embedding = model.latent
                t = 0
            t = t + 1
            if t > 400:
                break   
            log = 'Epoch: {:03d}, Train: {:.4f}, Val: {:.4f}, Test: {:.4f}'
            print(log.format(epoch, train_acc, best_val_acc, test_acc))

        nb_classes = 6
        confusion_matrix = torch.zeros(nb_classes,nb_classes)
        pre_comb = torch.tensor([])
        real_comb = torch.tensor([])

        '''
        #----- print macro-f1 score
        with torch.no_grad():
            logits, accs = model(), []
            for _, mask in data('test_mask'):
                pred = logits[mask].max(1)[1]
                pre_comb = torch.cat((pre_comb, pred), 0)
                real_comb = torch.cat((real_comb, data.y[mask]), 0)

                #print(pred)
                #print(data.y[mask])
                for i in range(len(pred)):
                    confusion_matrix[pred[i]][data.y[mask][i]] = confusion_matrix[pred[i]][data.y[mask][i]]+1
            print(confusion_matrix)#
            print(f1_score(pre_comb.numpy(), real_comb.numpy(), average='macro'))
        '''

        # draw tSNE pictures here:
        x = embedding.detach().numpy()
        #y = np.array(property_j)
        X_tsne = TSNE(n_components=2,random_state=33).fit_transform(x)
        plt.figure(figsize=(6, 6))
        ax = plt.subplot(1,1,1,)

        values = range(6)
        cNorm  = colors.Normalize(vmin=0, vmax=values[-1])
        scaMap = plt.cm.ScalarMappable(norm = cNorm  ,cmap = "coolwarm")

        for k in range(6):  
            colorval = scaMap.to_rgba(values[k])
            ax.scatter(X_tsne[np.where(tmp.numpy() == k), 0], X_tsne[np.where(tmp.numpy() == k), 1] ,label = k, s =3, color = colorval)


        handles,labels = ax.get_legend_handles_labels()
        ax.legend(handles, labels, loc='upper right',fontsize = 7)
        plt.xlabel("tSNE 1",fontsize = 12)
        plt.ylabel("tSNE 2", fontsize = 12)
        plt.tick_params(labelsize=12)
        name2 = r'/home/jiaqing/桌面/FASG_KDD/Result/tSNE/'
        plt.savefig(name2 + str(d_name)+"_"+ str(i)+ "to" + str(j) +"_tSNE.eps", dpi = 800, format = 'eps')
        #plt.show()
        #plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=tmp.numpy(), cmap = "rainbow")
        #plt.legend()




1 2
      0    1     2     3    4
0  SAGE  GIN   GIN   GCN  GIN
1   GIN  MLP  SAGE  SAGE  GIN
2   GIN  GIN  SAGE   GIN  GCN
3   GCN  GIN   GIN   GIN  GIN
4   GIN  GIN   GIN   GIN  MLP
GIN
Epoch: 001, Train: 0.6667, Val: 0.6860, Test: 0.6580
Epoch: 002, Train: 0.4167, Val: 0.6860, Test: 0.6580
Epoch: 003, Train: 0.3917, Val: 0.6860, Test: 0.6580
Epoch: 004, Train: 0.5417, Val: 0.6860, Test: 0.6580
Epoch: 005, Train: 0.6333, Val: 0.6860, Test: 0.6580
Epoch: 006, Train: 0.6333, Val: 0.6860, Test: 0.6580
Epoch: 007, Train: 0.6417, Val: 0.6860, Test: 0.6580
Epoch: 008, Train: 0.6583, Val: 0.6860, Test: 0.6580
Epoch: 009, Train: 0.6667, Val: 0.6860, Test: 0.6580
Epoch: 010, Train: 0.6667, Val: 0.6860, Test: 0.6580
Epoch: 011, Train: 0.6667, Val: 0.6860, Test: 0.6580
Epoch: 012, Train: 0.6667, Val: 0.6860, Test: 0.6580
Epoch: 013, Train: 0.6667, Val: 0.6860, Test: 0.6580
Epoch: 014, Train: 0.6667, Val: 0.6860, Test: 0.6580
Epoch: 015, Train: 0.6667, Val: 0.6860, Test: 0.6580
Epoch: 016, Train

Epoch: 160, Train: 0.7500, Val: 0.7420, Test: 0.7230
Epoch: 161, Train: 0.7417, Val: 0.7420, Test: 0.7230
Epoch: 162, Train: 0.7417, Val: 0.7420, Test: 0.7230
Epoch: 163, Train: 0.7333, Val: 0.7420, Test: 0.7230
Epoch: 164, Train: 0.7250, Val: 0.7420, Test: 0.7230
Epoch: 165, Train: 0.7167, Val: 0.7420, Test: 0.7230
Epoch: 166, Train: 0.7417, Val: 0.7420, Test: 0.7230
Epoch: 167, Train: 0.7333, Val: 0.7420, Test: 0.7230
Epoch: 168, Train: 0.7333, Val: 0.7420, Test: 0.7230
Epoch: 169, Train: 0.7167, Val: 0.7420, Test: 0.7230
Epoch: 170, Train: 0.7083, Val: 0.7420, Test: 0.7230
Epoch: 171, Train: 0.7083, Val: 0.7420, Test: 0.7230
Epoch: 172, Train: 0.7083, Val: 0.7420, Test: 0.7230
Epoch: 173, Train: 0.7167, Val: 0.7420, Test: 0.7230
Epoch: 174, Train: 0.7417, Val: 0.7420, Test: 0.7230
Epoch: 175, Train: 0.7417, Val: 0.7420, Test: 0.7230
Epoch: 176, Train: 0.7333, Val: 0.7420, Test: 0.7230
Epoch: 177, Train: 0.7167, Val: 0.7420, Test: 0.7230
Epoch: 178, Train: 0.7000, Val: 0.7420, Test: 

Epoch: 332, Train: 0.7417, Val: 0.7440, Test: 0.6930
Epoch: 333, Train: 0.7583, Val: 0.7440, Test: 0.6930
Epoch: 334, Train: 0.7500, Val: 0.7440, Test: 0.6930
Epoch: 335, Train: 0.7417, Val: 0.7440, Test: 0.6930
Epoch: 336, Train: 0.7500, Val: 0.7440, Test: 0.6930
Epoch: 337, Train: 0.7500, Val: 0.7440, Test: 0.6930
Epoch: 338, Train: 0.7333, Val: 0.7440, Test: 0.6930
Epoch: 339, Train: 0.7500, Val: 0.7440, Test: 0.6930
Epoch: 340, Train: 0.7417, Val: 0.7440, Test: 0.6930
Epoch: 341, Train: 0.7083, Val: 0.7440, Test: 0.6930
Epoch: 342, Train: 0.7250, Val: 0.7440, Test: 0.6930
Epoch: 343, Train: 0.7667, Val: 0.7440, Test: 0.6930
Epoch: 344, Train: 0.7333, Val: 0.7440, Test: 0.6930
Epoch: 345, Train: 0.7583, Val: 0.7440, Test: 0.6930
Epoch: 346, Train: 0.7667, Val: 0.7440, Test: 0.6930
Epoch: 347, Train: 0.7667, Val: 0.7440, Test: 0.6930
Epoch: 348, Train: 0.7583, Val: 0.7440, Test: 0.6930
Epoch: 349, Train: 0.7500, Val: 0.7440, Test: 0.6930
Epoch: 350, Train: 0.7417, Val: 0.7440, Test: 

Epoch: 499, Train: 0.7583, Val: 0.7460, Test: 0.7120
Epoch: 500, Train: 0.7833, Val: 0.7460, Test: 0.7120
Epoch: 501, Train: 0.7833, Val: 0.7460, Test: 0.7120
Epoch: 502, Train: 0.8000, Val: 0.7460, Test: 0.7120
Epoch: 503, Train: 0.8000, Val: 0.7460, Test: 0.7120
Epoch: 504, Train: 0.8000, Val: 0.7460, Test: 0.7120
Epoch: 505, Train: 0.8000, Val: 0.7460, Test: 0.7120
Epoch: 506, Train: 0.8083, Val: 0.7460, Test: 0.7120
Epoch: 507, Train: 0.7667, Val: 0.7460, Test: 0.7120
Epoch: 508, Train: 0.7750, Val: 0.7460, Test: 0.7120
Epoch: 509, Train: 0.8000, Val: 0.7460, Test: 0.7120
Epoch: 510, Train: 0.7500, Val: 0.7460, Test: 0.7120
Epoch: 511, Train: 0.7500, Val: 0.7460, Test: 0.7120
Epoch: 512, Train: 0.7500, Val: 0.7460, Test: 0.7120
Epoch: 513, Train: 0.7417, Val: 0.7460, Test: 0.7120
Epoch: 514, Train: 0.7500, Val: 0.7460, Test: 0.7120
Epoch: 515, Train: 0.7417, Val: 0.7460, Test: 0.7120
Epoch: 516, Train: 0.7583, Val: 0.7460, Test: 0.7120
Epoch: 517, Train: 0.7583, Val: 0.7460, Test: 

Epoch: 664, Train: 0.7750, Val: 0.7460, Test: 0.7120
Epoch: 665, Train: 0.7917, Val: 0.7460, Test: 0.7120
Epoch: 666, Train: 0.7917, Val: 0.7460, Test: 0.7120
Epoch: 667, Train: 0.7833, Val: 0.7460, Test: 0.7120
Epoch: 668, Train: 0.7917, Val: 0.7460, Test: 0.7120
Epoch: 669, Train: 0.8083, Val: 0.7460, Test: 0.7120
Epoch: 670, Train: 0.8000, Val: 0.7460, Test: 0.7120
Epoch: 671, Train: 0.7833, Val: 0.7460, Test: 0.7120
Epoch: 672, Train: 0.7917, Val: 0.7460, Test: 0.7120
Epoch: 673, Train: 0.7917, Val: 0.7460, Test: 0.7120
Epoch: 674, Train: 0.7833, Val: 0.7460, Test: 0.7120
Epoch: 675, Train: 0.7917, Val: 0.7460, Test: 0.7120
Epoch: 676, Train: 0.8083, Val: 0.7460, Test: 0.7120
Epoch: 677, Train: 0.7917, Val: 0.7460, Test: 0.7120
Epoch: 678, Train: 0.7750, Val: 0.7460, Test: 0.7120
Epoch: 679, Train: 0.7667, Val: 0.7460, Test: 0.7120
Epoch: 680, Train: 0.7833, Val: 0.7460, Test: 0.7120
Epoch: 681, Train: 0.7750, Val: 0.7460, Test: 0.7120
Epoch: 682, Train: 0.7917, Val: 0.7460, Test: 

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.