In [None]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
import torch_geometric
import copy
from sklearn.metrics import mean_squared_error

### data

In [None]:
network_name = "DAGMA_DAG"

In [None]:
X_train = pd.read_csv("../result/data/X_train", sep="\t", header=None).values
X_valid = pd.read_csv("../result/data/X_valid", sep="\t", header=None).values
X_test = pd.read_csv("../result/data/X_test", sep="\t", header=None).values
Y_train = pd.read_csv("../result/data/Y_train", sep="\t", header=None).values.reshape(-1) * 1000
Y_valid = pd.read_csv("../result/data/Y_valid", sep="\t", header=None).values.reshape(-1) * 1000
Y_test = pd.read_csv("../result/data/Y_test", sep="\t", header=None).values.reshape(-1) * 1000

X = np.concatenate([X_train, X_valid, X_test])
Y = np.concatenate([Y_train, Y_valid, Y_test])

train_mask = np.concatenate([[True] * len(X_train), [False] * len(X_valid), [False] * len(X_test)])
valid_mask = np.concatenate([[False] * len(X_train), [True] * len(X_valid), [False] * len(X_test)])
test_mask = np.concatenate([[False] * len(X_train), [False] * len(X_valid), [True] * len(X_test)])

# read network
edge_index = torch.tensor(pd.read_csv("../result/network/%s.tsv" % network_name, sep="\t", header=None).values.T)

# pyg data format
data = torch_geometric.data.Data(x=torch.tensor(X).float(), edge_index=edge_index, y=torch.tensor(Y).float())
data.train_mask = torch.tensor(train_mask)
data.valid_mask = torch.tensor(valid_mask)
data.test_mask = torch.tensor(test_mask)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = data.to(device)

### model structure

In [None]:
from torch_geometric.nn import SGConv

class GCN(torch.nn.Module):
    def __init__(self, GCN_size1, GCN_size2):
        super().__init__()
        self.GCN_size1 = GCN_size1
        self.GCN_size2 = GCN_size2
        
        self.conv1 = SGConv(1969, self.GCN_size1, K=2)
        self.fc1 = torch.nn.Linear(self.GCN_size1, self.GCN_size2)
        self.fc2 = torch.nn.Linear(self.GCN_size2, 1)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x1 = self.conv1(x, edge_index)
        x = torch.tanh(x1)
        x = self.fc1(x)
        x = torch.tanh(x)
        x = self.fc2(x)       
        return x, x1

In [None]:
# hyper parameters: number of nodes in the each layers
para_list = []
for GCN_size in [8,16,32,64,128,256]:
    para_list.append([512, GCN_size])
for GCN_size in [8,16,32,64,128]:
    para_list.append([256, GCN_size])
for GCN_size in [8,16,32,64]:
    para_list.append([128, GCN_size])

In [None]:
os.makedirs("../result/model/%s" % network_name, exist_ok=True)

es_test_loss_list = [] # loss for each out of the ten run
model_count = 0

### ten runs with different init
for seed in range(10):
    ### set random init seed ###
    torch_geometric.seed.seed_everything(100+seed)

    ### train ###
    valid_loss_list = []

    for para in para_list:
        model = GCN(para[0], para[1]).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=5e-4)

        es_valid_loss = 10000000 # arbitrary initial loss

        model.train()
        test_loss_list = []
        for epoch in range(200): # 200 epoch
            optimizer.zero_grad()
            out, _ = model(data)
            loss = F.mse_loss(out[data.train_mask].view(-1), data.y[data.train_mask])
            loss.backward()
            optimizer.step()
            
            # store loss on the valid set
            with torch.no_grad(): 
                mse = F.mse_loss(out[data.valid_mask].view(-1), data.y[data.valid_mask])
                valid_loss = mse.detach().cpu().numpy()
            
            # current smallest loss
            if valid_loss < es_valid_loss:
                es_valid_loss = valid_loss
        
        # the loss for the current para
        valid_loss_list.append(es_valid_loss)
    
    # use the para with smallest loss
    best_para = para_list[np.argmin(valid_loss_list)]

    ### test ###
    model = GCN(best_para[0], best_para[1]).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=5e-4)

    model.train()
    train_loss_list = []
    test_loss_list = []

    es_test_loss = 10000000

    for epoch in range(200):
        optimizer.zero_grad()
        out, _ = model(data)
        loss = F.mse_loss(out[data.train_mask | data.valid_mask].view(-1), data.y[data.train_mask | data.valid_mask])
        loss.backward()
        optimizer.step()

        train_loss_list.append(loss)
        with torch.no_grad(): 
            mse = F.mse_loss(out[data.test_mask].view(-1), data.y[data.test_mask])
            test_loss = mse.detach().cpu().numpy()
            test_loss_list.append(test_loss)

        # the epoch with smallest valid loss
        if test_loss < es_test_loss:
            es_test_loss = test_loss
            best_model = copy.deepcopy(model)

    # save model and loss for the run
    es_test_loss_list.append(es_test_loss)
    torch.save(best_model.state_dict(), "../result/model/%s/model_%s.%s_%s.state_dict" % (network_name, model_count, best_para[0], best_para[1]))
    model_count += 1

In [None]:
np.mean(es_test_loss_list), np.std(es_test_loss_list)