<a href="https://colab.research.google.com/github/eisbetterthanpi/hypergraph/blob/main/hgnn_list.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# @title dgl data
!pip install dgl

# "co-cite" relationship: hyperedge includes all the other papers it cited, as well as the paper itself
# then incidence mat = incidence mat + id
import torch
from dgl.data import CoraGraphDataset # https://github.com/dmlc/dgl/blob/master/python/dgl/data/citation_graph.py

def load_data():
    dataset = CoraGraphDataset()
    graph = dataset[0]
    indices = torch.stack(graph.edges())
    H = torch.sparse_coo_tensor(indices=indices, values=torch.ones(indices.shape[1]),).coalesce()
    id = torch.sparse.spdiags(torch.ones(H.shape[0]),torch.tensor(0),H.shape) # torch.eye(H.shape[0])
    H = (id + H).coalesce() # each vert got its hyperedge, contain all cited and itself, [2708, 2708], incedence matrix, |V| hyperedges

    X = graph.ndata["feat"] #[2708, 1433] num papers, len bag of words
    Y = graph.ndata["label"] # [2708], classiifcation 0-6
    train_mask = graph.ndata["train_mask"]
    val_mask = graph.ndata["val_mask"]
    test_mask = graph.ndata["test_mask"]
    return H, X, Y, dataset.num_classes, train_mask, val_mask, test_mask

H, X, Y, num_classes, train_mask, val_mask, test_mask = load_data()
# print(H.shape, X.shape, Y.shape) # [2708, 2708], [2708, 1433], [2708]

n_v, n_e = H.shape
elst = [[] for id in range(n_e)] # edge list H(E)={e1,e2,e3}={{A,D},{D,E},{A,B,C}}
ilst = [[] for id in range(n_v)] # incidence list {A:{e1,e3}, B:{e3}, C:{e3}, D:{e1,e2}, E:{e2}}
for a,b in H.indices().T.tolist():
    elst[a].append(b)
    ilst[b].append(a)


In [None]:
# @title hg conv and hg attn
import torch
import torch.nn as nn
import torch.nn.functional as F

@torch.no_grad
def hypergraph_laplacian(H):
    N,M = H.shape
    d_V = H.sum(1).to_dense() # node deg
    d_E = H.sum(0).to_dense() # edge deg
    D_v_invsqrt = torch.sparse.spdiags(d_V**-0.5,torch.tensor(0),(N,N)) # torch.diag(d_V**-0.5)
    D_e_inv = torch.sparse.spdiags(d_E**-1,torch.tensor(0),(M,M)) # torch.diag(d_E**-1)
    B = torch.sparse.spdiags(torch.ones(M),torch.tensor(0),(M,M)) # torch.eye(M) # B is id, dim n_edges
    return D_v_invsqrt @ H @ B @ D_e_inv @ H.T @ D_v_invsqrt # Laplacian

class HGNN(nn.Module): # https://github.com/dmlc/dgl/blob/master/notebooks/sparse/hgnn.ipynb
    def __init__(self, H, in_size, out_size, hidden_dims=16):
        super().__init__()
        self.W1 = nn.Linear(in_size, hidden_dims)
        self.W2 = nn.Linear(hidden_dims, out_size)
        self.dropout = nn.Dropout(0.5)
        self.L = hypergraph_laplacian(H)

    def forward(self, H, X):
        X = self.L @ self.W1(self.dropout(X)) # like emb then weighted sum
        X = F.relu(X)
        X = self.L @ self.W2(self.dropout(X))
        return X

# Hypergraph Convolution and Hypergraph Attention https://arxiv.org/pdf/1901.08150.pdf
class HypergraphAttention(nn.Module): # https://github.com/dmlc/dgl/blob/master/examples/sparse/hypergraphatt.py
    def __init__(self, in_size, out_size):
        super().__init__()
        self.P = nn.Linear(in_size, out_size)
        self.a = nn.Linear(2 * out_size, 1)

    def forward(self, H, X, X_edges): # H [2708, 2708] n_vert,n_edge ; X n_vert,vembdim
        Z = self.P(X) # emb verts [n_vert,out_size]
        Z_edges = self.P(X_edges) # emb edges
        sim = self.a(torch.cat([Z[H.indices()[0]], Z_edges[H.indices()[1]]], 1)) #  vertemb,edgeemb(=vertemb)
        sim = F.leaky_relu(sim, 0.2).squeeze(1) # og[13264]
        H_att = torch.sparse_coo_tensor(indices=H.indices(), values=sim,).coalesce()
        H_att = torch.sparse.softmax(H_att,1) # [2708, 2708]
        return hypergraph_laplacian(H_att) @ Z # [2708, 2708], [2708, hidden_size/out_size]

class Net(nn.Module):
    def __init__(self, in_size, out_size, hidden_size=16):
        super().__init__()
        self.layer1 = HypergraphAttention(in_size, hidden_size)
        self.layer2 = HypergraphAttention(hidden_size, out_size)

    def forward(self, H, X):
        Z = self.layer1(H, X, X) # [n_vert, hidden_size]
        Z = F.elu(Z)
        Z = self.layer2(H, Z, Z) # [n_vert, out_size]
        return Z



In [None]:
# @title models down
import torch
import torch.nn as nn
import torch.nn.functional as F
# Hypergraph Convolution and Hypergraph Attention https://arxiv.org/pdf/1901.08150.pdf

@torch.no_grad
def hypergraph_laplacian(H):
    N,M = H.shape # num_verts, num_edges
    d_V = H.sum(1).to_dense() # node deg
    d_E = H.sum(0).to_dense() # edge deg
    d_V[d_V==0] = float('inf')
    d_E[d_E==0] = float('inf')
    D_v_invsqrt = torch.sparse.spdiags(d_V**-0.5,torch.tensor(0),(N,N)) # torch.diag(d_V**-0.5)
    D_e_inv = torch.sparse.spdiags(d_E**-1,torch.tensor(0),(M,M)) # torch.diag(d_E**-1)
    B = torch.sparse.spdiags(torch.ones(M),torch.tensor(0),(M,M)) # torch.eye(M) # B is id, dim n_edges
    return D_v_invsqrt @ H @ B @ D_e_inv @ H.T @ D_v_invsqrt # Laplacian

class HGNN(nn.Module): # https://github.com/dmlc/dgl/blob/master/notebooks/sparse/hgnn.ipynb
    def __init__(self, H, in_size, out_size, hidden_dims=16):
        super().__init__()
        self.W1 = nn.Linear(in_size, hidden_dims)
        # self.W2 = nn.Linear(hidden_dims, out_size)
        self.W2 = nn.Linear(hidden_dims, hidden_dims)
        self.W3 = nn.Linear(hidden_dims, out_size)
        # self.W1 = nn.Linear(in_size, out_size)
        self.dropout = nn.Dropout(0.) # og 0.5
        self.L = hypergraph_laplacian(H)
        # self.adjdrop = AdjDropout(0.3)
        self.adjdrop = AdjDropout(0)
        # self.adjdrop = SparseDropout(0.3)

    def forward(self, H, X):
        # X = self.L @ self.W1(self.dropout(X)) # like emb then weighted sum
        X = hypergraph_laplacian(self.adjdrop(H)) @ self.W1(self.dropout(X))
        # X = F.relu(X)
        # X = self.L @ self.W2(self.dropout(X))
        X = hypergraph_laplacian(self.adjdrop(H)) @ self.W2(self.dropout(X))
        X = hypergraph_laplacian(self.adjdrop(H)) @ self.W3(self.dropout(X))
        return X

# Hypergraph Convolution and Hypergraph Attention https://arxiv.org/pdf/1901.08150.pdf
class HypergraphAttention(nn.Module): # https://github.com/dmlc/dgl/blob/master/examples/sparse/hypergraphatt.py
    def __init__(self, in_size, out_size):
        super().__init__()
        self.P = nn.Linear(in_size, out_size)
        self.a = nn.Linear(2 * out_size, 1) # og

    def forward(self, H, X, X_edges): # H [2708, 2708] n_vert,n_edge ; X n_vert,vembdim
        Z = self.P(X) # emb verts [n_vert,out_size]
        # Z_edges = self.P(X_edges) # emb edges
        # sim = self.a(torch.cat([Z[H.indices()[0]], Z_edges[H.indices()[1]]], 1)) #  vertemb,edgeemb(=vertemb)
        sim = self.a(torch.cat([Z[H.indices()[0]], Z[H.indices()[1]]], 1)) #  vertemb,edgeemb(=vertemb)
        # sim = F.leaky_relu(sim, 0.2).squeeze(1) # og[13264]
        sim = F.relu(sim).squeeze(1) # me
        H_att = torch.sparse_coo_tensor(indices=H.indices(), values=sim,).coalesce()
        H_att = torch.sparse.softmax(H_att,1) # [2708, 2708]
        return hypergraph_laplacian(H_att) @ Z # [2708, 2708], [2708, hidden_size/out_size]

class Net(nn.Module):
    def __init__(self, in_size, out_size, hidden_size=16):
        super().__init__()
        self.layer1 = HypergraphAttention(in_size, hidden_size)
        self.layer2 = HypergraphAttention(hidden_size, out_size)

    def forward(self, H, X):
        Z = self.layer1(H, X, X) # [n_vert, hidden_size]
        # Z = F.elu(Z) # og
        Z = F.relu(Z)
        Z = self.layer2(H, Z, Z) # [n_vert, out_size]
        return Z

model = HGNN(H, X.shape[1], num_classes) # hg conv

# hgconv addrop0.1 200 test loss: 0.88029, Val acc: 78.60, Test acc: 80.90 # 200 test loss: 0.88171, Val acc: 78.20, Test acc: 79.70 # 200 test loss: 0.86425, Val acc: 78.80, Test acc: 79.80
# hgconv addrop0.3 200 test loss: 0.98834, Val acc: 78.80, Test acc: 79.50 # 200 test loss: 0.95182, Val acc: 79.20, Test acc: 79.80 # 200 test loss: 0.98931, Val acc: 76.40, Test acc: 78.00
# hgconv addrop0.5 200 test loss: 1.18654, Val acc: 59.60, Test acc: 58.80 # 200 test loss: 1.16958, Val acc: 77.40, Test acc: 78.30 # 200 test loss: 1.15784, Val acc: 75.20, Test acc: 76.20
# hgconv addrop0.7 200 test loss: 1.67695, Val acc: 46.20, Test acc: 44.30

# hgconv normaddrop0.3 1000 test loss: 0.92683, Val acc: 79.40, Test acc: 79.40

# sparsedrop

# hgconv1 noact 200 train loss: 1.85343, test loss: 1.88946, Test acc: 50.60
# hgconv2 noact200 train loss: 0.86805, test loss: 1.17643, Test acc: 80.20 tensor(1.1822) 80.145
# hgconv3 noact 165 train loss: 0.28337, test loss: 0.80078, Test acc: 75.30 ; 200 train loss: 0.31582, test loss: 0.83998, Test acc: 73.70





In [None]:
# @title theory
@torch.no_grad
def hypergraph_laplacian(H):
    N,M = H.shape # num_verts, num_edges
    d_V = H.sum(1).to_dense() # node deg
    d_E = H.sum(0).to_dense() # edge deg
    D_v_invsqrt = torch.sparse.spdiags(d_V**-0.5,torch.tensor(0),(N,N)) # torch.diag(d_V**-0.5)
    D_e_inv = torch.sparse.spdiags(d_E**-1,torch.tensor(0),(M,M)) # torch.diag(d_E**-1)
    B = torch.sparse.spdiags(torch.ones(M),torch.tensor(0),(M,M)) # torch.eye(M) # B is id, dim n_edges
    return D_v_invsqrt @ H @ B @ D_e_inv @ H.T @ D_v_invsqrt # Laplacian


# @torch.no_grad
# def hypergraph_laplacian1(H, B):
N,M = H.shape # num_verts, num_edges
d_V = H.sum(1).to_dense() # node deg
d_E = H.sum(0).to_dense() # edge deg
D_v_invsqrt = torch.sparse.spdiags(d_V**-0.5,torch.tensor(0),(N,N)) # torch.diag(d_V**-0.5)
D_v_inv = torch.sparse.spdiags(d_V**-1,torch.tensor(0),(N,N)) # torch.diag(d_V**-0.5)
D_e_inv = torch.sparse.spdiags(d_E**-1,torch.tensor(0),(M,M)) # torch.diag(d_E**-1)
D_e_invsqrt = torch.sparse.spdiags(d_E**-0.5,torch.tensor(0),(M,M)) # torch.diag(d_E**-1)
B = torch.sparse.spdiags(torch.ones(M),torch.tensor(0),(M,M)) # torch.eye(M) # B is id, dim n_edges
# return D_v_invsqrt @ H @ B @ D_e_inv @ H.T @ D_v_invsqrt # Laplacian
# return H @ B @ D_e_inv @ H.T @ D_v_inv

# hl=hypergraph_laplacian(H)
# hl1=hypergraph_laplacian1(H)
# print(hl.to_dense()[:5,:5])
# print(hl1.to_dense()[:5,:5])

n_v, n_e = H.shape
d_model=16
vemb=torch.rand(n_v,d_model)
eemb=torch.rand(n_e,d_model)
# B = torch.sparse.spdiags(eemb,torch.tensor(0),(M,M,d_model)) # torch.diag(d_E**-1)


                    # [n_vert,n_edge] @ [num_edge, d_model]
# vemb1 = (D_v_invsqrt @ H @ B @ D_e_inv @ H.T @ D_v_invsqrt @ self.fv(vemb))
out = D_v_invsqrt @ H @ B @ D_e_inv @ H.T @ D_v_invsqrt @ vemb
print(out.to_dense()[:5,:5])

# # out = D_v_invsqrt @ H @ eemb @ D_e_inv @ H.T @ D_v_invsqrt @ vemb
                    # [n_vert,n_edge] @ [num_edge, d_model]
vemb1 = D_v_invsqrt @ H @ D_e_invsqrt @ B @ D_e_invsqrt @ H.T @ D_v_invsqrt @ vemb
                    # [num_edge,n_edge] @ [n_vert, d_model]
eemb1 = D_e_invsqrt @ H.T @ D_v_invsqrt @ V @ D_v_invsqrt @ H @ D_e_invsqrt @ eemb
print(out.to_dense()[:5,:5])


vmsg = self.fv(vemb) # vmsg = self.fv(torch.cat((vemb, semsg), 1))
svmsg = D_e_invsqrt @ H.T @ D_v_invsqrt @ vmsg # [num_edge, d_model]
emsg = self.fw(torch.cat((eemb, svmsg), 1))
semsg = D_v_invsqrt @ H @ D_e_invsqrt @ emsg

vemb1 = self.gv(torch.cat((vemb, semsg), 1))
eemb1 = self.gw(torch.cat((eemb, svmsg), 1))


vmsg = self.fv(torch.cat((vemb, semsg), 1)) # vmsg = self.fv(vemb)
svmsg # H.T @ D_v_invsqrt
emsg = self.fw(torch.cat((eemb, svmsg), 1))
semsg # D_v_invsqrt @ H @ D_e_inv



# vemb1 = D_v_inv @ H @ W @ D_e_inv @ H.T @ lin(vemb) # D^1 H W B^-1 HT X(l) P
# eemb1 = D_e_inv @ H.T @ V @ D_v_inv @ H @ lin(eemb)
        # svmsg = D_e_inv @ self.adjdrop(H).T @ vmsg # [num_edge, d_model]
        # emsg = svmsg + self.fw(torch.cat((eemb, svmsg), 1))
        # semsg = D_v_inv @ self.adjdrop(H) @ emsg





vmsg = self.fv(vemb) # vmsg = self.fv(torch.cat((vemb, semsg), 1))
svmsg = D_e_invsqrt @ H.T @ D_v_invsqrt @ vmsg # [num_edge, d_model]
eemb1 = self.gw(torch.cat((eemb, svmsg), 1))
emsg = self.fw(torch.cat((eemb, svmsg), 1))
semsg = D_v_invsqrt @ H @ D_e_invsqrt @ emsg
vemb1 = self.gv(torch.cat((vemb, semsg), 1))


vmsg = self.fv(vemb) # vmsg = self.fv(torch.cat((vemb, semsg), 1))
svmsg = D_e_invsqrt @ H.T @ D_v_invsqrt @ vmsg # [num_edge, d_model]
eemb1 = self.gw(torch.cat((eemb, svmsg), 1))
emsg = svmsg + self.fw(torch.cat((eemb, svmsg), 1))
semsg = D_v_invsqrt @ H @ D_e_invsqrt @ emsg
vemb1 = self.gv(torch.cat((vemb, semsg), 1))




############### is eemb1, symetrical for eemb

# Vert msg = fv(vert emb) , Sum edge msgs
# Edge emb1 = gw(edge emb, Sum Vert msgs)
# Edge msg = fw(edge emb1?, Sum Vert msgs)
# Vert emb1 = gv(vert emb, Sum edge msgs)

svmsg = H.T @ fv(vemb) # fv(torch.cat((vemb, semsg), 1)))
eemb1 = self.gw(torch.cat((eemb, svmsg), 1))
semsg = H @ self.fw(torch.cat((eemb1, svmsg), 1))
vemb1 = self.gv(torch.cat((vemb, semsg), 1))

vemb1 = D_v_invsqrt @ H @ D_e_invsqrt @ B @ D_e_invsqrt @ H.T @ D_v_invsqrt @ lin(vemb)
svmsg = D_e_invsqrt @ H.T @ D_v_invsqrt @ (fv(vemb)) # in->in
eemb1 = self.gw(torch.cat((eemb, svmsg), 1)) # in+in-> in
semsg = D_v_invsqrt @ H @ D_e_invsqrt @ (svmsg+self.fw(torch.cat((eemb1, svmsg), 1))) # in+in->in
vemb1 = semsg+self.gv(torch.cat((vemb, semsg), 1)) # in+in->out


svmsg = D_e_invsqrt @ H.T @ D_v_invsqrt @ (vemb+self.fv(torch.cat((vemb, semsg), 1))) # in->out
# svmsg = D_e_invsqrt @ H.T @ D_v_invsqrt @ fv(vemb) # in->out
eemb1 = svmsg+self.gw(torch.cat((eemb, svmsg), 1))
semsg = D_v_invsqrt @ H @ D_e_invsqrt @ (eemb1+self.fw(torch.cat((eemb1, svmsg), 1))) # in+in->in
vemb1 = semsg+self.gv(torch.cat((vemb, semsg), 1)) # in+in->out



# res should be
svmsg = D_e_invsqrt @ H.T @ D_v_invsqrt @ vemb+fv(torch.cat((vemb, semsg), 1))) # in+->out
svmsg = D_e_invsqrt @ H.T @ D_v_invsqrt @ fv(torch.cat((vemb, relu(semsg)), 1))) # in+in->out
# svmsg = D_e_invsqrt @ H.T @ D_v_invsqrt @ fv(vemb) # in->out
eemb1 = svmsg+self.fwgw(torch.cat((eemb, svmsg), 1))
semsg = D_v_invsqrt @ H @ D_e_invsqrt @ eemb1+fwgw(torch.cat((eemb1, svmsg), 1)) # in+in->in
vemb1 = semsg+self.gv(torch.cat((vemb, semsg), 1)) # in+in->out


# dont res, create B then mul
                    # [n_vert,n_edge] @ [num_edge, d_model]
vemb1 = D_v_invsqrt @ H @ D_e_invsqrt @ B @ D_e_invsqrt @ H.T @ D_v_invsqrt @ vemb
                    # [num_edge,n_edge] @ [n_vert, d_model]
eemb1 = D_e_invsqrt @ H.T @ D_v_invsqrt @ V @ D_v_invsqrt @ H @ D_e_invsqrt @ eemb

        # svmsg = D_e_invsqrt @ H.T @ D_v_invsqrt @ self.fv(torch.cat((vemb, semsg), 1)) # in->out
        svmsg = D_e_invsqrt @ H.T @ D_v_invsqrt @ self.fv(torch.cat((vemb, relu(semsg)), 1)) # in+in->out
        eemb1 = self.gw(torch.cat((eemb, svmsg), 1)) # in+out->out
        semsg = D_v_invsqrt @ H @ D_e_invsqrt @ self.fw(torch.cat((eemb1, svmsg), 1)) # out+out->out
        vemb1 = self.gv(torch.cat((vemb, semsg), 1)) # in+out->out


vmsg = self.fv(torch.cat((vemb, semsg), 1)) # in+in->out
vmsg = self.fv(vemb) # in+in->out
V = torch.sparse.spdiags(vlin(vmsg)+1, torch.tensor(0),(M,M))
vvmsg = D_e_invsqrt @ H.T @ D_v_invsqrt @ vmsg # in
svmsg = D_e_invsqrt @ H.T @ D_v_invsqrt @ V @ eemsg # in ~vemb
eemb1 = self.gw(torch.cat((eemb, svmsg), 1)) # in+in->out
emsg = self.fw(torch.cat((eemb1, svmsg), 1)) # out+in->in
B = torch.sparse.spdiags(elin(emsg)+1, torch.tensor(0),(M,M)) # torch.ones(M)
semsg = D_v_invsqrt @ H @ D_e_invsqrt @ B @ vvmsg # out
eemsg = D_v_invsqrt @ H @ D_e_invsqrt @ emsg # out
vemb1 = self.gv(torch.cat((vemb, semsg), 1)) # in+in->out




svmsg = D_e_invsqrt @ H.T @ D_v_invsqrt @ fv(vemb) # in->out
semsg = D_v_invsqrt @ H @ D_e_invsqrt @ (svmsg+self.fwgw(torch.cat((eemb1, svmsg), 1))) # in+in->in
# B = self.fwgw(torch.cat((eemb1, svmsg), 1)) # in+in->in
cidx, mask = get_idx(H.T) # [n_cols, num_idx]
svmsg=vmsg[cidx] # [n_cols, num_idx, d_model]
mask=mask.unsqueeze(1).unsqueeze(2) # [n_cols, 1, 1, num_idx]
emsg = self.fw(eemb, svmsg, svmsg, mask) # [n_cols, 1, d_model/1]
B = torch.sparse.spdiags(emsg, torch.tensor(0),(M,M)) # torch.diag(d_E**-1)
vemb1 = semsg+self.gv(torch.cat((vemb, semsg), 1)) # in+in->out



In [None]:
# @title adj, inc drop, transform
# https://arxiv.org/pdf/2203.16995.pdf
import torch
import torch.nn as nn
device = "cuda" if torch.cuda.is_available() else "cpu"

class AdjDropout(nn.Module): # adjacency dropout
    def __init__(self, p=0.7):
        super(AdjDropout, self).__init__()
        self.p=p
    def forward(self, H): # randomly remove hyperedges
        if self.training: # apply AdjDropout only during training
            n_v, n_e = H.shape
            mask = (torch.rand(n_e) >= self.p).float().expand(n_v, n_e) # 1->keep, throw p
            return H*mask # randomly zero out cols(aka hyperedges)
        else: return H


class AdjDropout(nn.Module): # adjacency dropout
    def __init__(self, p=0.7):
        super(AdjDropout, self).__init__()
        self.p=p
    def forward(self, H): # randomly remove hyperedges
        if self.training: # apply AdjDropout only during training
            n_v, n_e = H.shape
            # mask = (torch.rand(n_e) >= self.p).float().expand(n_v, n_e) # 1->keep, throw p
            mask = (torch.rand(n_e) >= self.p).expand(n_v, n_e) # 1->keep, throw p
            rm=(H*((~mask).float())).sum(0)
            tt=H.sum(0)
            # print(rm.shape,tt.shape)
            # div = torch.sparse.spdiags((tt-rm).to_dense()**-1,torch.tensor(0),(n_v,n_v))
            ttrm=(tt-rm).to_dense()
            ttrm[ttrm==0] = float('inf')
            div = torch.diag(ttrm**-1)
            norm=(tt@div)
            # print(norm[:15])
            H=(H*mask.float())*norm
            # print(H.to_dense()[:15,:15])
            return H
            # return H*mask # randomly zero out cols(aka hyperedges)
        else: return H


class SparseDropout(torch.nn.Module): # https://discuss.pytorch.org/t/implementation-of-dropout-for-sparse-input/47720/2
    def __init__(self, p=0.5):
        super(SparseDropout, self).__init__()
        self.kprob=1-p # probability of keeping
    def forward(self, x):
        mask=((torch.rand(x._values().size())+(self.kprob)).floor()).type(torch.bool)
        rc=x._indices()[:,mask]
        val=x._values()[mask]*(1.0/self.kprob)
        return torch.sparse.FloatTensor(rc, val)


class IncDropout(nn.Module):
    def __init__(self, p=0.7):
        super(IncDropout, self).__init__()
        self.p=p
    def forward(self, H): # randomly set incidence to 0
        if self.training:
            Hval = H.values()
            mask = (torch.rand(len(Hval)) >= self.p).float() # 1->keep, throw p
            return  torch.sparse_coo_tensor(indices=H.indices(), values=Hval*mask,).coalesce() # randomly zero out values(ie remove verts from hyperedges)
        else: return H


class TrainTransform(object):
    def __init__(self):
        self.transform = nn.Sequential(IncDropout(0.5), AdjDropout(0.5),)
        self.transform_prime = nn.Sequential(IncDropout(0.5), AdjDropout(0.5),)
    def __call__(self, H):
        return self.transform(H), self.transform_prime(H)
trs=TrainTransform()

# print(H.to_dense()[:5,:5])
# print(trs(H)[1].to_dense()[:5,:5])


In [None]:
# @title HMPNN me H vicreg
# https://arxiv.org/pdf/2203.16995.pdf
import torch
import torch.nn as nn
import torch.nn.functional as F
device = "cuda" if torch.cuda.is_available() else "cpu"
# Vert msg = fv(vert ebd) , Sum edge msgs
# Edge msg = fw(edge emb, Sum Vert msgs)
# Vert emb1 = gv(vert emb, Sum edge msgs)
# Edge emb1 = gw(edge emb, Sum Vert msgs)

def off_diagonal(x):
    n, m = x.shape
    assert n == m
    return x.flatten()[:-1].view(n - 1, n + 1)[:, 1:].flatten()

class ff(nn.Module):
    def __init__(self, in_dim, out_dim):
        super(ff, self).__init__()
        self.lin = nn.Sequential(
            # nn.Linear(in_dim, out_dim), nn.Sigmoid(), nn.Dropout(p=0.5) # best nah
            nn.Linear(in_dim, out_dim)#, nn.Sigmoid()#, nn.Dropout(p=0.5) #
            )
    def forward(self, x):
        return self.lin(x)

class gg(nn.Module):
    def __init__(self, in_dim, out_dim):
        super(gg, self).__init__()
        self.lin = nn.Sequential(
            # nn.Linear(in_dim, out_dim), nn.Sigmoid(), nn.Dropout(p=0.5) # best nah
            # nn.Linear(in_dim, out_dim), nn.Sigmoid()#, nn.Dropout(p=0.5) #
            nn.Sigmoid()
            # nn.ReLU()
            )
    def forward(self, x):
        return self.lin(x)

class MsgPass(nn.Module):
    def __init__(self, d_model, first=False):
        super(MsgPass, self).__init__()
        self.fv = ff(2*d_model, d_model)
        self.fw = ff(2*d_model, d_model)
        self.gv = ff(2*d_model, d_model)
        self.gw = ff(2*d_model, d_model)
        # self.gv = gg(2*d_model, d_model)
        # self.gw = gg(2*d_model, d_model)
        # self.adjdrop = AdjDropout(0.3) # 0.7 "Adjacency dropout must be applied in neighborhood creation steps of Equations 3 through 5"
        self.adjdrop = AdjDropout(0)
        self.first=first

    def forward(self, H, vemb, eemb, semsg=None):
        N,M = H.shape
        d_V = H.sum(1).to_dense() # node deg
        d_E = H.sum(0).to_dense() # edge deg
        d_V[d_V==0] = float('inf')
        d_E[d_E==0] = float('inf')
        D_v_invsqrt = torch.sparse.spdiags(d_V**-0.5,torch.tensor(0),(N,N))
        D_e_invsqrt = torch.sparse.spdiags(d_E**-0.5,torch.tensor(0),(M,M))

        # H = AdjDropout(0.7)(H)

        if semsg == None: semsg = vemb
        vmsg = vemb + self.fv(torch.cat((vemb, semsg), 1))
        svmsg = D_e_invsqrt @ self.adjdrop(H).T @ D_v_invsqrt @ vmsg # [num_edge, d_model]
        emsg = svmsg + self.fw(torch.cat((eemb, svmsg), 1))
        semsg = D_v_invsqrt @ self.adjdrop(H) @ D_e_invsqrt @ emsg

        # vemb1 = semsg + self.gv(torch.cat((vemb, semsg), 1))
        # eemb1 = svmsg + self.gw(torch.cat((eemb, svmsg), 1))

        if self.first:
            # vemb1 = semsg + self.gv(torch.cat((vemb, semsg), 1))
            # eemb1 = svmsg + self.gw(torch.cat((eemb, svmsg), 1))
            # vemb1 = self.gv(semsg)
            # eemb1 = self.gw(svmsg)
            vemb1 = F.sigmoid(semsg)
            eemb1 = F.sigmoid(svmsg)
            return vemb1, eemb1, semsg
        else:
            return semsg, eemb, semsg
        return vemb1, eemb1, semsg

class HMPNN(nn.Module):
    # def __init__(self, in_dim, d_model, out_dim):
    def __init__(self, in_dim, d_model, out_dim, sim_coeff=25.0, std_coeff=25.0, cov_coeff=1.0):
        super(HMPNN, self).__init__()
        self.venc = nn.Linear(in_dim, d_model)
        self.eenc = nn.Linear(in_dim, d_model)
        self.msgpass = MsgPass(d_model, first=True)
        self.msgpass2 = MsgPass(d_model)
        # self.msgpass3 = MsgPass(d_model)
        exp_dim=4
        f=[d_model,exp_dim,exp_dim,exp_dim]
        self.exp = nn.Sequential(
            nn.Linear(f[0], f[1]), nn.BatchNorm1d(f[1]), nn.ReLU(),
            nn.Linear(f[1], f[2]), nn.BatchNorm1d(f[2]), nn.ReLU(),
            nn.Linear(f[-2], f[-1], bias=False)
            )
        self.sim_coeff=sim_coeff
        self.std_coeff=std_coeff
        self.cov_coeff=cov_coeff
        self.classifier = nn.Linear(d_model, out_dim)
        self.drop = nn.Dropout(0.5)
        # for p in self.parameters():
        #     if p.dim() > 1:
        #         nn.init.xavier_normal_(p) # xavier_uniform_ xavier_normal_

    def forward(self, H, X, classify=True):
        vemb = eemb = X
        vemb, eemb = self.drop(vemb), self.drop(eemb)
        vemb, eemb = self.venc(vemb), self.eenc(eemb)
        vemb1, eemb1, semsg = self.msgpass(H, vemb, eemb)
        vemb, eemb = vemb+vemb1, eemb+eemb1
        vemb1, eemb1, semsg = self.msgpass2(H, vemb, eemb, semsg=semsg)
        vemb, eemb = vemb+vemb1, eemb+eemb1
        # vemb1, eemb1, semsg = self.msgpass3(H, vemb, eemb, semsg=semsg)
        # vemb, eemb = vemb+vemb1, eemb+eemb1
        if classify==False: return vemb
        else: return self.classifier(vemb)

    # https://arxiv.org/pdf/2105.04906.pdf
    def vicreg(self, x, y): # https://github.com/facebookresearch/vicreg/blob/main/main_vicreg.py
        # invariance loss
        repr_loss = F.mse_loss(x, y) # s(Z, Z')

        x = x - x.mean(dim=0)
        y = y - y.mean(dim=0)

        # variance loss
        std_x = torch.sqrt(x.var(dim=0) + 0.0001) #ϵ=0.0001
        std_y = torch.sqrt(y.var(dim=0) + 0.0001)
        std_loss = torch.mean(F.relu(1 - std_x)) / 2 + torch.mean(F.relu(1 - std_y)) / 2

        batch_size, num_features = x.shape
        self.sim_coeff=5.0 # 25.0 # λ
        self.std_coeff=10.0 # 25.0 # µ
        self.cov_coeff=1.0 # 1.0 # ν

        if x.dim() == 1: x = x.unsqueeze(0)
        if y.dim() == 1: y = y.unsqueeze(0)

        # # covariance loss
        cov_x = (x.T @ x) / (batch_size - 1) #C(Z)
        cov_y = (y.T @ y) / (batch_size - 1)
        cov_loss = off_diagonal(cov_x).pow_(2).sum().div(num_features)\
         + off_diagonal(cov_y).pow_(2).sum().div(num_features) #c(Z)
        loss = (sim_coeff * repr_loss + std_coeff * std_loss + cov_coeff * cov_loss)
        print("in vicreg ",(sim_coeff * repr_loss).item() , (std_coeff * std_loss).item() , (cov_coeff * cov_loss).item())
        return loss

    def loss(self, H1, H2, X):
        sx, sy = self.forward(H1, X, classify=False), self.forward(H2, X, classify=False)
        vx, vy = self.exp(sx), self.exp(sy)
        loss = self.vicreg(vx,vy)
        return loss

    def classify(self, x):
        return self.classifier(x)

num_v,in_dim=X.shape # 2708, 1433
num_classes=7
model=HMPNN(X.shape[1],16,num_classes)


In [None]:
# @title HMPNN H optuna
# https://arxiv.org/pdf/2203.16995.pdf
import torch
import torch.nn as nn
import torch.nn.functional as F
device = "cuda" if torch.cuda.is_available() else "cpu"
# Vert msg = fv(vert emb) , Sum edge msgs
# Edge msg = fw(edge emb, Sum Vert msgs)
# Vert emb1 = gv(vert emb, Sum edge msgs)
# Edge emb1 = gw(edge emb, Sum Vert msgs)

# Vert msg = fv(vert emb) , Sum edge msgs
# Edge emb1 = gw(edge emb, Sum Vert msgs)
# Edge msg = fw(edge emb1?, Sum Vert msgs)
# Vert emb1 = gv(vert emb, Sum edge msgs)

class ff(nn.Module): # "The choice for messaging and updating functions includes identity function, linear and non-linear functions, or MLP."
    def __init__(self, in_dim, out_dim, drop=0.5, act=2):
        super(ff, self).__init__()
        self.drop = nn.Dropout(p=drop)
        self.lin = nn.Linear(in_dim, out_dim)
        self.act = [nn.Sequential(), nn.ReLU(), nn.Sigmoid(), nn.Tanh()][act]
    def forward(self, x):
        return self.act(self.lin(self.drop(x)))

# "Implementation Details: Our model uses two layers of HMPNN with sigmoid
# activation and a hidden representation of size 2. We use sum as the message
# aggregation functions, with adjacency matrix dropout with rate 0.7, as well as
# dropout with rate 0.5 for vertex and hyperedge representation."


class MsgPass(nn.Module):
    # def __init__(self, in_dim, out_dim, order=False):
    def __init__(self, in_dim, out_dim, drop=[0.5]*4, adjdrop=0.7, act=[2,2,2,2], agg=[0,0]):
        super(MsgPass, self).__init__()
        self.fv1 = ff(in_dim, out_dim, drop[0], act[0])
        # self.fv = ff(2*in_dim, out_dim, drop[0], act[0])
        self.gw = ff(in_dim+out_dim, out_dim, drop[1], act[1])
        # self.fw = ff(2*out_dim, out_dim, drop[2], act[2])
        self.fw = ff(in_dim+out_dim, out_dim, drop[2], act[2])
        self.gv = ff(in_dim+out_dim, out_dim, drop[3], act[3])
        self.adjdrop = AdjDropout(adjdrop) # 0.7 "Adjacency dropout must be applied in neighborhood creation steps of Equations 3 through 5"
        self.agg=agg
        self.drop = nn.Dropout(0.5)

    def forward(self, H, vemb, eemb, semsg=None):
        N,M = H.shape
        d_V = H.sum(1).to_dense() # node deg
        d_E = H.sum(0).to_dense() # edge deg
        D_v_invsqrt = torch.sparse.spdiags(d_V**-0.5,torch.tensor(0),(N,N))
        D_e_invsqrt = torch.sparse.spdiags(d_E**-0.5,torch.tensor(0),(M,M))
        D_v_inv = torch.sparse.spdiags(d_V**-1,torch.tensor(0),(N,N))
        D_e_inv = torch.sparse.spdiags(d_E**-1,torch.tensor(0),(M,M))

        vemb,eemb = self.drop(vemb),self.drop(eemb)
        # if semsg == None:
            # semsg = vemb
        vmsg = self.fv1(vemb) # in->out
        # else: vmsg = self.fv(torch.cat((vemb, semsg), 1)) # lin(one,zero)

        # "The choice for aggregation functions includes mean aggregation, sum aggregation and concatenation."
        if self.agg[0]==0: svmsg = self.adjdrop(H).T @ vmsg # sum [num_edge, d_model]
        elif self.agg[0]==1: svmsg = D_e_inv @ self.adjdrop(H).T @ vmsg # mean [num_edge, d_model]
        else: svmsg = D_e_invsqrt @ self.adjdrop(H).T @ D_v_invsqrt @ vmsg # normalised [num_edge, d_model]

        emsg = self.fw(torch.cat((eemb, svmsg), 1)) # eemb1 # out+out->out
        # emsg = svmsg+self.fw(torch.cat((eemb, svmsg), 1)) # eemb1 # out+out->out
        # emsg = self.fw(torch.cat((eemb1, svmsg), 1)) # eemb1 # out+out->out
        # emsg = svmsg+self.fw(torch.cat((eemb1, svmsg), 1)) # eemb1 # out+out->out

        if self.agg[1]==0: semsg = self.adjdrop(H) @ emsg # sum
        elif self.agg[1]==1: semsg = D_v_inv @ self.adjdrop(H) @ emsg # mean
        else: semsg = D_v_invsqrt @ self.adjdrop(H) @ D_e_invsqrt @ emsg # normalised
        # vemb1 = self.gv(torch.cat((vemb, semsg), 1)) # in+out->out
        # vemb1 = F.relu(semsg)+self.gv(torch.cat((vemb, semsg), 1)) # in+out->out
        vemb1 = self.gv(torch.cat((vemb, semsg), 1)) # in+out->out # relu(zero,one)

        # eemb1 = self.gw(torch.cat((eemb, svmsg), 1)) # in+out->out
        # eemb1 = F.relu(svmsg)+self.gw(torch.cat((eemb, svmsg), 1)) # in+out->out
        eemb1 = self.gw(torch.cat((eemb, svmsg), 1)) # in+out->out # relu(zero,one)

        # if semsg == None: eemsg = vemb
        # else eemsg = semsg
        # vmsg = self.fv(vemb) # in+in->out
        # V = torch.sparse.spdiags(vlin(vmsg)+1, torch.tensor(0),(M,M))
        # vvmsg = D_e_invsqrt @ H.T @ D_v_invsqrt @ vmsg # in
        # svmsg = D_e_invsqrt @ H.T @ D_v_invsqrt @ V @ eemsg # in ~vemb
        # eemb1 = self.gw(torch.cat((eemb, svmsg), 1)) # in+in->out
        # emsg = self.fw(torch.cat((eemb1, svmsg), 1)) # out+in->in
        # B = torch.sparse.spdiags(elin(emsg)+1, torch.tensor(0),(M,M)) # torch.ones(M)
        # semsg = D_v_invsqrt @ H @ D_e_invsqrt @ B @ vvmsg # out
        # eemsg = D_v_invsqrt @ H @ D_e_invsqrt @ emsg # out
        # vemb1 = self.gv(torch.cat((vemb, semsg), 1)) # in+in->out

        return vemb1, eemb1, semsg

# d_model: 16.0, lamb: 1000.0, adjdrop: 0.0, drop: [0.8, 0.2, 0.5, 0.0, 0.4, 0.7, 0.4, 0.2, 0.5, 0.2]

class HMPNN(nn.Module):
    def __init__(self, in_dim, d_model, out_dim, drop=[0.0]*(2*4), adjdrop=0.0, act=[2]*(2*4), agg=[0]*(2*2)):
        super(HMPNN, self).__init__()
        # act=[0,1,2,1,0,1,2,0]
        # act=[0,2,0,2,0,0,0,0]
        act=[0,0,0,0,0,0,0,0]
        # act=[0,1,2,1,3,1,2,0]
        # drop=[0.8,0.6,0.8,0.6,0.5,0.3,0.5,0.8]
        adjdrop=0.2
        agg=[1]*4
        self.msgpass = MsgPass(in_dim, d_model, drop[:4], adjdrop, act[:4], agg[0:2])
        self.msgpass2 = MsgPass(d_model, out_dim, drop[4:8], adjdrop, act[4:8], agg[2:4])
        # self.msgpass = MsgPass(in_dim, d_model)
        # self.msgpass2 = MsgPass(d_model, out_dim)

    def forward(self, H, X):
        vemb = eemb = X
        vemb, eemb, semsg = self.msgpass(H, vemb, eemb)
        vemb, eemb, semsg = self.msgpass2(H, vemb, eemb,semsg)
        return vemb


num_v,in_dim=X.shape # 2708, 1433
num_classes=7
model=HMPNN(X.shape[1],16,num_classes)


# lamb1000 2000 test loss: 500.65387, Val acc: 52.00, Test acc: 50.70
# lamb0 2000 test loss: 1.31980, Val acc: 61.00, Test acc: 60.00

# eemb1 act2 200 test loss: 1.18689, Val acc: 56.00, Test acc: 61.40
# eemb1 invsqrt act2..0 200 test loss: 1.25483, Val acc: 36.80, Test acc: 37.60
# eemb1 sum act2..0 200 test loss: 0.01902, Val acc: 61.80, Test acc: 66.80, 200 test loss: 1.27333, Val acc: 51.40, Test acc: 49.60
# eemb1 mean act2 200 test loss: 1.27075, Val acc: 45.20, Test acc: 43.10 # 200 test loss: 1.21710, Val acc: 64.40, Test acc: 62.40 # 200 test loss: 1.27201, Val acc: 39.40, Test acc: 41.00
# eemb1 invsqrt act2 200 test loss: 1.22294, Val acc: 46.80, Test acc: 48.50 # 200 test loss: 1.22492, Val acc: 43.80, Test acc: 44.70 # 200 test loss: 1.19101, Val acc: 48.80, Test acc: 54.40


# 200 test loss: 1.19794, Val acc: 65.60, Test acc: 63.80
# act[-1]=0 200 test loss: 0.00939, Val acc: 66.40, Test acc: 68.00
# act=[0,1,2,1,0,1,2,0] 200 test loss: 0.00030, Val acc: 72.40, Test acc: 71.70
# noweightdk 200 test loss: 0.00011, Val acc: 70.40, Test acc: 69.00
# +relu 200 test loss: 0.00088, Val acc: 76.20, Test acc: 74.20 # 200 test loss: 0.00079, Val acc: 74.20, Test acc: 75.30
# +res 200 test loss: 0.00038, Val acc: 73.40, Test acc: 73.80 # 200 test loss: 0.00050, Val acc: 72.40, Test acc: 75.00 #200 test loss: 0.00131, Val acc: 75.40, Test acc: 73.70



# fv(torch.cat((vemb, semsg 200 test loss: 0.00029, Val acc: 72.20, Test acc: 72.20 # 200 test loss: 0.00030, Val acc: 75.20, Test acc: 74.80
# fv1fv 200 test loss: 0.00060, Val acc: 73.80, Test acc: 72.70 # 200 test loss: 0.00056, Val acc: 72.00, Test acc: 73.60

# act=[0,1,2,1,3,1,2,0]
# drop=[0.8,0.6,0.8,0.6,0.5,0.3,0.5,0.8]
# adjdrop=0.2
# fv1fv wdk3e-6 200 test loss: 0.84940, Val acc: 74.20, Test acc: 74.80 74-77
# fv1fv wdk3e-3 1000 test loss: 1.34266, Val acc: 67.80, Test acc: 65.70-70


# [I 2024-02-11 17:00:44,155] Trial 97 finished with value: 80.0 and parameters: {'drop0': 0.8, 'drop1': 0.6, 'drop2': 0.7, 'drop3': 0.5, 'drop4': 0.0, 'drop5': 0.2, 'drop6': 0.6, 'drop7': 0.8, 'adjdrop': 0.3}
# [I 2024-02-11 17:01:14,663] Trial 98 finished with value: 80.1 and parameters: {'drop0': 0.8, 'drop1': 0.5, 'drop2': 0.4, 'drop3': 0.5, 'drop4': 0.0, 'drop5': 0.2, 'drop6': 0.8, 'drop7': 0.4, 'adjdrop': 0.3}
# [I 2024-02-11 16:57:37,381] Trial 91 finished with value: 80.7 and parameters: {'drop0': 0.8, 'drop1': 0.7, 'drop2': 0.8, 'drop3': 0.8, 'drop4': 0.1, 'drop5': 0.3, 'drop6': 0.5, 'drop7': 0.6, 'adjdrop': 0.2}
# [I 2024-02-11 16:42:04,810] Trial 62 finished with value: 80.8 and parameters: {'drop0': 0.8, 'drop1': 0.5, 'drop2': 0.8, 'drop3': 0.6, 'drop4': 0.5, 'drop5': 0.2, 'drop6': 0.4, 'drop7': 0.8, 'adjdrop': 0.2}
# [I 2024-02-11 16:37:12,770] Trial 53 finished with value: 80.1 and parameters: {'drop0': 0.8, 'drop1': 0.6, 'drop2': 0.8, 'drop3': 0.7, 'drop4': 0.0, 'drop5': 0.2, 'drop6': 0.5, 'drop7': 0.7, 'adjdrop': 0.2}
# [I 2024-02-11 16:33:42,626] Trial 46 finished with value: 79.3 and parameters: {'drop0': 0.8, 'drop1': 0.7, 'drop2': 0.7, 'drop3': 0.6, 'drop4': 0.3, 'drop5': 0.3, 'drop6': 0.5, 'drop7': 0.8, 'adjdrop': 0.2}
# [I 2024-02-11 16:28:10,495] Trial 35 finished with value: 80.3 and parameters: {'drop0': 0.8, 'drop1': 0.7, 'drop2': 0.8, 'drop3': 0.7, 'drop4': 0.3, 'drop5': 0.2, 'drop6': 0.5, 'drop7': 0.6, 'adjdrop': 0.2}
# [I 2024-02-11 16:10:44,544] Trial 0 finished with value: 80.8 and parameters: {'drop0': 0.1, 'drop1': 0.6, 'drop2': 0.8, 'drop3': 0.1, 'drop4': 0.5, 'drop5': 0.4, 'drop6': 0.8, 'drop7': 0.6, 'adjdrop': 0.6}
# [0.8,0.6,0.8,0.6,0.5,0.3,0.5,0.8],0.2 # 200 test loss: 0.21115, Val acc: 78.20, Test acc: 78.50

# [0.8,0.6,0.8,0.6,0.5,0.3,0.5],0.3

# emsg = svmsg+self.fw(torch.cat((eemb, svmsg), 1)) 3000-4000 empochs ~61, best 65-68
# emsg = self.fw(torch.cat((eemb, svmsg), 1)) 2000 test loss: 1.69581, Val acc: 56.60, Test acc: 55.00


# noact adjdrop0.2 200 train loss: 0.31042, test loss: 1.67238, Test acc: 78.30 tensor(1.7788) 76.66
# noact adjdrop0.7 200 train loss: 0.71443, test loss: 6.25643, Test acc: 72.90 tensor(7.0832) 65.93000000000002
# 200 train loss: 0.96941, test loss: 2.82683, Test acc: 67.30 tensor(2.9238) 63.92499999999999
# noact adjdrop0 agg1 200 train loss: 0.09955, test loss: 0.68146, Test acc: 79.60tensor(0.6848) 79.79999999999998
# noact adjdrop0.2 agg1 lower
# noact adjdrop0 agg1 dmodel2 200 train loss: 0.52758, test loss: 1.35109, Test acc: 69.80 tensor(1.2519) 68.09
# noact adjdrop0 agg1 dmodel16 200 train loss: 0.18712, test loss: 1.27762, Test acc: 76.90 tensor(1.3000) 73.865


In [None]:
# @title HMPNN me reg
# https://arxiv.org/pdf/2203.16995.pdf
import torch
import torch.nn as nn
import torch.nn.functional as F
device = "cuda" if torch.cuda.is_available() else "cpu"
# Vert msg = fv(vert ebd) , Sum edge msgs
# Edge msg = fw(edge emb, Sum Vert msgs)
# Vert emb1 = gv(vert emb, Sum edge msgs)
# Edge emb1 = gw(edge emb, Sum Vert msgs)

class ff(nn.Module):
    def __init__(self, in_dim, out_dim, drop=0.5):
        super(ff, self).__init__()
        self.lin = nn.Sequential(
            # nn.Linear(in_dim, out_dim), nn.Sigmoid() # ReLU GELU Sigmoid Tanh
            nn.Dropout(p=drop), nn.Linear(in_dim, out_dim)#, nn.Sigmoid() # me
            )
    def forward(self, x):
        return self.lin(x)

class MsgPass(nn.Module):
    # def __init__(self, in_dim, out_dim, order=False):
    def __init__(self, d_model, drop=[0.5]*4, adjdrop=0.0, order=False):
        super(MsgPass, self).__init__()
        self.fv = ff(2*d_model, d_model, drop[0])
        self.fw = ff(2*d_model, d_model, drop[1])
        self.gv = ff(2*d_model, d_model, drop[2])
        self.gw = ff(2*d_model, d_model, drop[3])
        self.adjdrop = AdjDropout(adjdrop) # 0.7 "Adjacency dropout must be applied in neighborhood creation steps of Equations 3 through 5"
        # if order==-1: # last MsgPass layer
        #     self.gv = nn.Sequential()
        self.order=order

    def forward(self, H, vemb, eemb, semsg=None):
        N,M = H.shape
        d_V = H.sum(1).to_dense() # node deg
        d_E = H.sum(0).to_dense() # edge deg
        D_v_invsqrt = torch.sparse.spdiags(d_V**-0.5,torch.tensor(0),(N,N))
        D_e_invsqrt = torch.sparse.spdiags(d_E**-0.5,torch.tensor(0),(M,M))

        if semsg == None: semsg = vemb
        # vmsg = vemb + self.fv(torch.cat((vemb, semsg), 1))
        # svmsg = D_e_invsqrt @ self.adjdrop(H).T @ D_v_invsqrt @ vmsg # [num_edge, d_model]
        # emsg = svmsg + self.fw(torch.cat((eemb, svmsg), 1))
        # semsg = D_v_invsqrt @ self.adjdrop(H) @ D_e_invsqrt @ emsg
        # vemb1 = semsg + self.gv(torch.cat((vemb, semsg), 1))
        # eemb1 = svmsg + self.gw(torch.cat((eemb, svmsg), 1))

        # should be eemb1, now symetric about eemb too
        svmsg = D_e_invsqrt @ H.T @ D_v_invsqrt @ (vemb+self.fv(torch.cat((vemb, semsg), 1))) # in->out
        # svmsg = D_e_invsqrt @ H.T @ D_v_invsqrt @ fv(vemb) # in->out
        eemb1 = svmsg+self.gw(torch.cat((eemb, svmsg), 1))
        semsg = D_v_invsqrt @ H @ D_e_invsqrt @ (eemb1+self.fw(torch.cat((eemb1, svmsg), 1))) # in+in->in
        vemb1 = semsg+self.gv(torch.cat((vemb, semsg), 1)) # in+in->out


        # if semsg == None: eemsg = vemb
        # else eemsg = semsg
        # vmsg = self.fv(vemb) # in+in->out
        # V = torch.sparse.spdiags(vlin(vmsg)+1, torch.tensor(0),(M,M))
        # vvmsg = D_e_invsqrt @ H.T @ D_v_invsqrt @ vmsg # in
        # svmsg = D_e_invsqrt @ H.T @ D_v_invsqrt @ V @ eemsg # in ~vemb
        # eemb1 = self.gw(torch.cat((eemb, svmsg), 1)) # in+in->out
        # emsg = self.fw(torch.cat((eemb1, svmsg), 1)) # out+in->in
        # B = torch.sparse.spdiags(elin(emsg)+1, torch.tensor(0),(M,M)) # torch.ones(M)
        # semsg = D_v_invsqrt @ H @ D_e_invsqrt @ B @ vvmsg # out
        # eemsg = D_v_invsqrt @ H @ D_e_invsqrt @ emsg # out
        # vemb1 = self.gv(torch.cat((vemb, semsg), 1)) # in+in->out

        return vemb1, eemb1, semsg

# d_model: 16.0, lamb: 1000.0, adjdrop: 0.0, drop: [0.8, 0.2, 0.5, 0.0, 0.4, 0.7, 0.4, 0.2, 0.5, 0.2]

class HMPNN(nn.Module):
    def __init__(self, in_dim, d_model, out_dim, drop=[0.0]*(2+4*2), adjdrop=0.0):
        super(HMPNN, self).__init__()
        self.venc = nn.Linear(in_dim, d_model)
        self.eenc = nn.Linear(in_dim, d_model)
        self.msgpass = MsgPass(d_model, drop[1:5], adjdrop)
        self.msgpass2 = MsgPass(d_model, drop[5:9], adjdrop)
        # self.msgpass = MsgPass(in_dim, d_model)
        # self.msgpass2 = MsgPass(d_model, out_dim)
        self.classifier = nn.Linear(d_model, out_dim)
        self.drop0 = nn.Dropout(drop[0])
        self.drop3 = nn.Dropout(drop[-1])

    def forward(self, H, X):
        vemb = eemb = X
        vemb, eemb = self.drop0(vemb), self.drop0(eemb)
        vemb, eemb = self.venc(vemb), self.eenc(eemb)
        # eemb = torch.zeros(len(elst),self.eembdim)
        # vemb1, eemb1, semsg = self.msgpass(H, vemb, eemb)
        vemb, eemb, semsg = self.msgpass(H, vemb, eemb)
        # vemb, eemb = vemb+vemb1, eemb+eemb1
        vemb1, eemb1, semsg = self.msgpass2(H, vemb, eemb, semsg=semsg)
        # vemb, eemb = vemb+vemb1, eemb+eemb1
        return self.classifier(self.drop3(vemb1))
        # return vemb1


num_v,in_dim=X.shape # 2708, 1433
num_classes=7
model=HMPNN(X.shape[1],16,num_classes)

# dont end with relu
# end with sig ok? # 200 test loss: 1.52798, Val acc: 79.40, Test acc: 80.80 # 200 test loss: 1.52735, Val acc: 78.00, Test acc: 80.20
# cls sig 200 test loss: 0.03824, Val acc: 77.20, Test acc: 78.60 # 200 test loss: 0.04305, Val acc: 76.60, Test acc: 78.20
# cls sig no_ff_first 200 test loss: 0.11298, Val acc: 79.20, Test acc: 81.30
# nocls sig no_ff_first 200 test loss: 0.28197, Val acc: 77.80, Test acc: 80.50
# msgpass1 cls sig no_ff_first 200 test loss: 0.11226, Val acc: 79.00, Test acc: 79.50 # 200 test loss: 0.03364, Val acc: 73.20, Test acc: 75.00 # 200 test loss: 0.10996, Val acc: 76.20, Test acc: 78.00
# msgpass2 cls sig no_ff_first 200 test loss: 0.02670, Val acc: 78.00, Test acc: 78.50 # 200 test loss: 0.03555, Val acc: 74.40, Test acc: 75.70 # 200 test loss: 0.03364, Val acc: 73.20, Test acc: 75.00
# msgpass2 dropcls sig no_ff_first 200 test loss: 0.12531, Val acc: 73.80, Test acc: 73.40 # 200 test loss: 0.22392, Val acc: 77.00, Test acc: 76.90
# first 200 test loss: 0.24009, Val acc: 78.20, Test acc: 80.50


# 200 test loss: 291.74536, Val acc: 77.60, Test acc: 79.50 # 200 test loss: 315.39655, Val acc: 78.80, Test acc: 81.00 # 200 test loss: 320.45880, Val acc: 77.80, Test acc: 78.60
# [0.8, 0.2, 0.5, 0.0, 0.4, 0.7, 0.4, 0.2, 0.5, 0.2]
# [0.8, 0.2, 0.5, 0.1, 0.8, 0.8, 0.5, 0.0, 0.8, 0.3]
# [0.8, 0.6, 0.3, 0.3, 0.2, 0.7, 0.3, 0.2, 0.5, 0.6]
# [0.8, 0.0, 0.2, 0.0, 0.4, 0.8, 0.8, 0.2, 0.6, 0.3]
# [0.8, 0.5, 0.0, 0.5, 0.2, 0.4, 0.8, 0.5, 0.8, 0.3]
# [0.8, 0.2, 0.5, 0.1, 0.8, 0.8, 0.5, 0.0, 0.8, 0.3]

# 0.8, 0.4, 0.4, 0.2, 0.4, 0.7, 0.5, 0.2, 0.6, 0.3 #me mean # 200 test loss: 331.67874, Val acc: 79.20, Test acc: 79.90 # 200 test loss: 325.93512, Val acc: 76.20, Test acc: 77.30 # 200 test loss: 329.18042, Val acc: 78.80, Test acc: 78.80
# 0.8, 0.2, 0.5, 0.1, 0.3, 0.7, 0.6, 0.2, 0.7, 0.3 #me median # 200 test loss: 315.00992, Val acc: 76.20, Test acc: 77.50 # 200 test loss: 331.16852, Val acc: 76.60, Test acc: 79.40 # 200 test loss: 321.40860, Val acc: 77.20, Test acc: 78.60
# 0.8, 0.5, 0.5, 0.1, 0.3, 0.7, 0.6, 0.2, 0.7, 0.3 #me median






In [None]:
# @title copy attn+
import torch
import torch.nn as nn
import torch.nn.functional as F
# Hypergraph Convolution and Hypergraph Attention https://arxiv.org/pdf/1901.08150.pdf

@torch.no_grad
def hypergraph_laplacian(H):
    N,M = H.shape # num_verts, num_edges
    d_V = H.sum(1).to_dense() # node deg
    d_E = H.sum(0).to_dense() # edge deg
    D_v_invsqrt = torch.sparse.spdiags(d_V**-0.5,torch.tensor(0),(N,N)) # torch.diag(d_V**-0.5)
    D_e_inv = torch.sparse.spdiags(d_E**-1,torch.tensor(0),(M,M)) # torch.diag(d_E**-1)
    B = torch.sparse.spdiags(torch.ones(M),torch.tensor(0),(M,M)) # torch.eye(M) # B is id, dim n_edges
    return D_v_invsqrt @ H @ B @ D_e_inv @ H.T @ D_v_invsqrt # Laplacian

# Hypergraph Convolution and Hypergraph Attention https://arxiv.org/pdf/1901.08150.pdf
class HypergraphAttention(nn.Module): # https://github.com/dmlc/dgl/blob/master/examples/sparse/hypergraphatt.py
    def __init__(self, in_size, out_size):
        super().__init__()
        self.P = nn.Linear(in_size, out_size)
        self.a = nn.Linear(2 * out_size, 1) # og
        self.q = nn.Linear(in_size, out_size)#, bias=False)
        self.k = nn.Linear(in_size, out_size)#, bias=False)
        self.drop = nn.Dropout(0.5)
        # self.lin = nn.Linear(d_model, 1)

    def forward(self, H, vemb, eemb): # H [2708, 2708] n_vert,n_edge ; X n_vert,vembdim
        vemb=self.drop(vemb)
        # vvec = self.P(vemb) # emb verts [n_vert,out_size]
        # sim = self.a(torch.cat([vvec[H.indices()[0]], vvec[H.indices()[1]]], 1)) #  vertemb,edgeemb(=vertemb)
        vvec, evec = self.q(vemb), self.k(eemb)
        Q, K = vvec[H.indices()[0]], evec[H.indices()[1]]
        sim= Q.unsqueeze(1) @ K.unsqueeze(2)
        # sim = F.leaky_relu(sim, 0.2).squeeze(1) # og[13264]
        sim = F.relu(sim.squeeze())
        H_att = torch.sparse_coo_tensor(indices=H.indices(), values=sim,).coalesce()
        H_att = torch.sparse.softmax(H_att,1) # [2708, 2708]
        return hypergraph_laplacian(H_att) @ vvec # [2708, 2708], [2708, hidden_size/out_size]

class Net(nn.Module):
    def __init__(self, in_size, out_size, d_model=16):
        super().__init__()
        self.layer1 = HypergraphAttention(in_size, d_model)
        self.layer2 = HypergraphAttention(d_model, out_size)

    def forward(self, H, X):
        Z = self.layer1(H, X, X) # [n_vert, hidden_size]
        Z = F.relu(Z) # og:elu
        Z = self.layer2(H, Z, Z) # [n_vert, out_size]
        return Z

model = Net(X.shape[1], num_classes) # hg att

# ogatt relu 200 test loss: 0.04677, Val acc: 78.40, Test acc: 79.60, 200 test loss: 0.04678, Val acc: 78.00, Test acc: 79.20 # 200 test loss: 0.03514, Val acc: 78.80, Test acc: 78.20
# ogatt relu drop0.5 200 test loss: 0.12809, Val acc: 77.60, Test acc: 79.70 # 200 test loss: 0.12146, Val acc: 78.80, Test acc: 80.50 # 200 test loss: 0.15017, Val acc: 78.20, Test acc: 80.90

# Q@V 200 test loss: 0.03010, Val acc: 77.20, Test acc: 79.40 # 200 test loss: 0.03118, Val acc: 77.80, Test acc: 78.00 # 200 test loss: 0.03437, Val acc: 76.60, Test acc: 77.70
# Q@V biasT 200 test loss: 0.04530, Val acc: 78.20, Test acc: 78.70 # 200 test loss: 0.04741, Val acc: 76.00, Test acc: 77.20
# Q@V biasT drop0.5 200 test loss: 0.13624, Val acc: 77.40, Test acc: 80.30 # 200 test loss: 0.10379, Val acc: 78.00, Test acc: 80.90 # 200 test loss: 0.09133, Val acc: 78.00, Test acc: 78.60









In [None]:
# @title no act
# https://arxiv.org/pdf/2203.16995.pdf
import torch
import torch.nn as nn
import torch.nn.functional as F
device = "cuda" if torch.cuda.is_available() else "cpu"
# Vert msg = fv(vert ebd) , Sum edge msgs
# Edge msg = fw(edge emb, Sum Vert msgs)
# Vert emb1 = gv(vert emb, Sum edge msgs)
# Edge emb1 = gw(edge emb, Sum Vert msgs)

class ff(nn.Module):
    def __init__(self, in_dim, out_dim):
        super(ff, self).__init__()
        self.lin = nn.Sequential(
            # nn.Linear(in_dim, out_dim), nn.Sigmoid() # ReLU GELU Sigmoid Tanh
            nn.Dropout(p=0.5), nn.Linear(in_dim, out_dim)#, nn.Sigmoid() # me
            # nn.Linear(in_dim, out_dim)
            )
    def forward(self, x):
        return self.lin(x)

class gg(nn.Module):
    def __init__(self, in_dim, out_dim):
        super(gg, self).__init__()
        self.lin = nn.Sequential(
            # nn.Linear(in_dim, out_dim), nn.Sigmoid(), #nn.Dropout(p=0.5), # ReLU LeakyReLU ELU GELU Sigmoid Tanh
            # nn.Dropout(p=0.5), nn.Linear(in_dim, out_dim), nn.Sigmoid() #
            # nn.ReLU()
            nn.Sigmoid()
            )
    def forward(self, x):
        return self.lin(x)

class MsgPass(nn.Module):
    def __init__(self, in_dim, out_dim, first=False):
        super(MsgPass, self).__init__()
        self.fv = ff(1*in_dim, out_dim)
        # self.fw = ff(1*in_dim, out_dim)
        if first:
            self.gv = gg(1*in_dim, out_dim)
            self.gw = gg(1*in_dim, out_dim)
        else:
            self.gv = nn.Sequential()
            self.gw = nn.Sequential()
        self.first=first

    def forward(self, H, vemb, eemb, semsg=None):
        N,M = H.shape
        d_V = H.sum(1).to_dense() # node deg
        d_E = H.sum(0).to_dense() # edge deg
        D_v_invsqrt = torch.sparse.spdiags(d_V**-0.5,torch.tensor(0),(N,N))
        D_e_invsqrt = torch.sparse.spdiags(d_E**-0.5,torch.tensor(0),(M,M))

        # if semsg == None: semsg = vemb

        # vmsg = self.fv(torch.cat((vemb, semsg), 1))
        # vmsg = self.fv(torch.cat((vemb, vemb), 1))
        vmsg = self.fv(vemb)

        svmsg = D_e_invsqrt @ H.T @ D_v_invsqrt @ vmsg # [num_edge, d_model]
        semsg = D_v_invsqrt @ H @ D_e_invsqrt @ svmsg

        # if semsg != None: return semsg, eemb, semsg

        # vemb1 = semsg + self.gv(torch.cat((vemb, semsg), 1))
        vemb1 = self.gv(semsg)
        # eemb1 = svmsg + self.gw(torch.cat((eemb, svmsg), 1))
        eemb1 = self.gw(svmsg)
        return vemb1, eemb1, semsg


class HMPNN(nn.Module):
    def __init__(self, in_dim, d_model, out_dim):
        super(HMPNN, self).__init__()
        # self.msgpass = MsgPass(d_model)
        # self.msgpass2 = MsgPass(d_model)
        # self.msgpass = MsgPass(in_dim, d_model)
        self.msgpass = MsgPass(in_dim, d_model)
        self.msgpass1 = MsgPass(d_model, d_model)
        self.msgpass2 = MsgPass(d_model, out_dim)

    def forward(self, H, X):
        vemb = eemb = X
        vemb, eemb, semsg = self.msgpass(H, vemb, eemb)
        vemb, eemb, semsg = self.msgpass1(H, vemb, eemb, semsg=semsg)
        vemb, eemb, semsg = self.msgpass2(H, vemb, eemb, semsg=semsg)
        return vemb


num_v,in_dim=X.shape # 2708, 1433
num_classes=7
model=HMPNN(X.shape[1],16,num_classes)
# 200 test loss: 0.07347, Val acc: 80.00, Test acc: 79.70

# dont end with relu
# end with sig ok? # 200 test loss: 1.52798, Val acc: 79.40, Test acc: 80.80 # 200 test loss: 1.52735, Val acc: 78.00, Test acc: 80.20
# cls sig 200 test loss: 0.03824, Val acc: 77.20, Test acc: 78.60 # 200 test loss: 0.04305, Val acc: 76.60, Test acc: 78.20
# cls sig no_ff_first 200 test loss: 0.11298, Val acc: 79.20, Test acc: 81.30
# nocls sig no_ff_first 200 test loss: 0.28197, Val acc: 77.80, Test acc: 80.50
# msgpass1 cls sig no_ff_first 200 test loss: 0.11226, Val acc: 79.00, Test acc: 79.50 # 200 test loss: 0.03364, Val acc: 73.20, Test acc: 75.00 # 200 test loss: 0.10996, Val acc: 76.20, Test acc: 78.00
# msgpass2 cls sig no_ff_first 200 test loss: 0.02670, Val acc: 78.00, Test acc: 78.50 # 200 test loss: 0.03555, Val acc: 74.40, Test acc: 75.70 # 200 test loss: 0.03364, Val acc: 73.20, Test acc: 75.00
# msgpass2 dropcls sig no_ff_first 200 test loss: 0.12531, Val acc: 73.80, Test acc: 73.40 # 200 test loss: 0.22392, Val acc: 77.00, Test acc: 76.90
# first 200 test loss: 0.24009, Val acc: 78.20, Test acc: 80.50



# relu 200 train loss: 0.96820, test loss: 1.22426, Test acc: 80.10 tensor(1.2307) 79.41
# sig 200 train loss: 0.87654, test loss: 1.19463, Test acc: 79.40 tensor(1.1846) 80.14500000000001 ; 200 train loss: 0.89456, test loss: 1.19230, Test acc: 79.80 tensor(1.1833) 80.48

# fv(cat) drop0.5 200 train loss: 0.47794, test loss: 0.85524, Test acc: 81.50 tensor(0.8528) 81.27
# fv(vemb) drop0.5
# fv(vemb) drop0 200 train loss: 0.94737, test loss: 1.19208, Test acc: 81.00 tensor(1.2006) 79.925 200 train loss: 0.85702, test loss: 1.16306, Test acc: 79.20
# fv(cat) 200 train loss: 0.44876, test loss: 0.84343, Test acc: 81.00 tensor(0.8442) 81.14


In [None]:
# @title HMPNN copy
# https://arxiv.org/pdf/2203.16995.pdf
import torch
import torch.nn as nn
import torch.nn.functional as F
device = "cuda" if torch.cuda.is_available() else "cpu"
# Vert msg = fv(vert ebd) , Sum edge msgs
# Edge msg = fw(edge emb, Sum Vert msgs)
# Vert emb1 = gv(vert emb, Sum edge msgs)
# Edge emb1 = gw(edge emb, Sum Vert msgs)

class ff(nn.Module):
    def __init__(self, in_dim, out_dim):
        super(ff, self).__init__()
        self.lin = nn.Sequential(
            # nn.Linear(in_dim, out_dim), nn.Sigmoid() # ReLU GELU Sigmoid Tanh
            nn.Dropout(p=0.5), nn.Linear(in_dim, out_dim)#, nn.Sigmoid() # me
            )
    def forward(self, x):
        return self.lin(x)

class gg(nn.Module):
    def __init__(self, in_dim, out_dim):
        super(gg, self).__init__()
        self.lin = nn.Sequential(
            # nn.Linear(in_dim, out_dim), nn.Sigmoid(), #nn.Dropout(p=0.5), # ReLU LeakyReLU ELU GELU Sigmoid Tanh
            # nn.BatchNorm1d(in_dim), nn.Dropout(p=0.5), nn.Linear(in_dim, out_dim), nn.Sigmoid(), # Regular dropout can follow a batch normalization right before updating functions in Equations 3 and 5, as part of the corresponding g functions.
            # nn.Dropout(p=0.5), nn.Linear(in_dim, out_dim), nn.Sigmoid() #
            # nn.ReLU()
            nn.Sigmoid()
            )
    def forward(self, x):
        return self.lin(x)

class MsgPass(nn.Module):
    def __init__(self, in_dim, out_dim, first=False):
        super(MsgPass, self).__init__()
        self.fv = ff(1*in_dim, out_dim)
        self.fw = ff(1*in_dim, out_dim)
        if first:
            self.gv = gg(1*in_dim, out_dim)
            self.gw = gg(1*in_dim, out_dim)
        else:
            self.gv = nn.Sequential()
            self.gw = nn.Sequential()
        self.first=first

    def forward(self, H, vemb, eemb, semsg=None):
        N,M = H.shape
        d_V = H.sum(1).to_dense() # node deg
        d_E = H.sum(0).to_dense() # edge deg
        D_v_invsqrt = torch.sparse.spdiags(d_V**-0.5,torch.tensor(0),(N,N))
        D_e_invsqrt = torch.sparse.spdiags(d_E**-0.5,torch.tensor(0),(M,M))

        # if semsg == None: semsg = vemb
        # vmsg = vemb + self.fv(torch.cat((vemb, semsg), 1))

        vmsg = self.fv(vemb)

        svmsg = D_e_invsqrt @ H.T @ D_v_invsqrt @ vmsg # [num_edge, d_model]
        # emsg = svmsg + self.fw(torch.cat((eemb, svmsg), 1))
        emsg = svmsg
        semsg = D_v_invsqrt @ H @ D_e_invsqrt @ emsg

        # if semsg != None: return semsg, eemb, semsg

        # vemb1 = semsg + self.gv(torch.cat((vemb, semsg), 1))
        vemb1 = self.gv(semsg)
        # eemb1 = svmsg + self.gw(torch.cat((eemb, svmsg), 1))
        eemb1 = self.gw(svmsg)
        return vemb1, eemb1, semsg


class HMPNN(nn.Module):
    def __init__(self, in_dim, d_model, out_dim):
        super(HMPNN, self).__init__()
        # self.msgpass = MsgPass(d_model)
        # self.msgpass2 = MsgPass(d_model)
        # self.msgpass = MsgPass(in_dim, d_model)
        self.msgpass = MsgPass(in_dim, d_model)
        self.msgpass2 = MsgPass(d_model, out_dim)

    def forward(self, H, X):
        vemb = eemb = X
        vemb, eemb, semsg = self.msgpass(H, vemb, eemb)
        vemb, eemb, semsg = self.msgpass2(H, vemb, eemb, semsg=semsg)
        return vemb


num_v,in_dim=X.shape # 2708, 1433
num_classes=7
model=HMPNN(X.shape[1],16,num_classes)
# 200 test loss: 0.07347, Val acc: 80.00, Test acc: 79.70

# dont end with relu
# end with sig ok? # 200 test loss: 1.52798, Val acc: 79.40, Test acc: 80.80 # 200 test loss: 1.52735, Val acc: 78.00, Test acc: 80.20
# cls sig 200 test loss: 0.03824, Val acc: 77.20, Test acc: 78.60 # 200 test loss: 0.04305, Val acc: 76.60, Test acc: 78.20
# cls sig no_ff_first 200 test loss: 0.11298, Val acc: 79.20, Test acc: 81.30
# nocls sig no_ff_first 200 test loss: 0.28197, Val acc: 77.80, Test acc: 80.50
# msgpass1 cls sig no_ff_first 200 test loss: 0.11226, Val acc: 79.00, Test acc: 79.50 # 200 test loss: 0.03364, Val acc: 73.20, Test acc: 75.00 # 200 test loss: 0.10996, Val acc: 76.20, Test acc: 78.00
# msgpass2 cls sig no_ff_first 200 test loss: 0.02670, Val acc: 78.00, Test acc: 78.50 # 200 test loss: 0.03555, Val acc: 74.40, Test acc: 75.70 # 200 test loss: 0.03364, Val acc: 73.20, Test acc: 75.00
# msgpass2 dropcls sig no_ff_first 200 test loss: 0.12531, Val acc: 73.80, Test acc: 73.40 # 200 test loss: 0.22392, Val acc: 77.00, Test acc: 76.90
# first 200 test loss: 0.24009, Val acc: 78.20, Test acc: 80.50



# relu 200 train loss: 0.96820, test loss: 1.22426, Test acc: 80.10 tensor(1.2307) 79.41
# sig 200 train loss: 0.87654, test loss: 1.19463, Test acc: 79.40 tensor(1.1846) 80.14500000000001 ; 200 train loss: 0.89456, test loss: 1.19230, Test acc: 79.80 tensor(1.1833) 80.48



In [None]:
# @title train/ eval
import torch
import torch.nn.functional as F
num_params = sum(p.numel() for p in model.parameters())

def train(model, optimizer, H, X, Y, train_mask, lamb=None):
    global num_params
    model.train()
    Y_hat = model(H, X)
    # print("train",Y_hat[train_mask].shape, Y[train_mask].shape) # loss_fn = nn.CrossEntropyLoss()
    loss = F.cross_entropy(Y_hat[train_mask], Y[train_mask]) # loss_fn = nn.CrossEntropyLoss()
    if lamb: loss+= lamb/num_params *torch.tensor([param.abs().sum() for param in model.parameters()]).sum()
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
#     print("test acc: ",accuracy(Y_hat[train_mask].argmax(1), Y[train_mask]))
    return loss.item()

def victrain(model, optimizer, H, X):
    model.train()
    H1, H2 = trs(H)
    # H1, H2 = H,H
    loss = model.loss(H1, H2, X)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    return loss.item()

def accuracy(yhat, y): return 100*(yhat == y).type(torch.float).sum().item()/y.shape[0]

def evaluate(model, H, X, Y, val_mask, test_mask):
    model.eval()
    with torch.no_grad():
        Y_hat = model(H, X) # model(X)
    # print(Y_hat[val_mask].shape, Y[val_mask].shape)
    # print(Y_hat[val_mask], Y[val_mask])
    # val_acc = accuracy(Y_hat[val_mask].argmax(1), Y[val_mask])
    test_acc = accuracy(Y_hat[test_mask].argmax(1), Y[test_mask])
    # return val_acc, test_acc
    test_loss = F.cross_entropy(Y_hat[test_mask], Y[test_mask]) # loss_fn = nn.CrossEntropyLoss()
    return test_loss, test_acc



In [None]:
# @title run

# model = HGNN(H, X.shape[1], num_classes) # hg conv
# model = Net(X.shape[1], num_classes) # hg att
# model = HMPNN(num_classes, vembdim, eembdim) # hg msg pass
# model = Network(in_channels=X.shape[1], hidden_channels=8, out_channels=num_classes, n_layers=2, task_level="node")
# model=HMPNN(X.shape[1],16,num_classes)
# drop=[0.8, 0.2, 0.5, 0.0, 0.4, 0.7, 0.4, 0.2, 0.5, 0.2] # og 81%
# drop=[0.8, 0.4, 0.4, 0.2, 0.4, 0.7, 0.5, 0.2, 0.6, 0.3] # me mean?
# drop=[0.8, 0.2, 0.5, 0.1, 0.3, 0.7, 0.6, 0.2, 0.7, 0.3] # me median

# drop=[0.8,0.6,0.8,0.6,0.5,0.3,0.5,0.8]
# adjdrop=0.2

# model=HMPNN(X.shape[1],16,num_classes, drop=drop, adjdrop=adjdrop)
# act= [1, 0, 2, 2, 0, 1, 0, 2]
# # agg= [1, 2, 2, 1]
# agg= [1]*4
# model=HMPNN(X.shape[1],16,num_classes,act=act,agg=agg)

import time
start = time.time()

# optimizer = torch.optim.AdamW(model.parameters(), lr=1e-2, betas=(0.9, 0.999), eps=1e-08, weight_decay=3e-6) # vicreg1e-4
# optimizer = torch.optim.Adam(model.parameters(), lr=0.01) # 0.001 # og
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=3e-3) # lr 0.001 # og ,weight_decay=3e-6
# optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) # 0.001 # og
# optimizer.param_groups[0]['lr']=0.01

testloss, testacc = [], []
for epoch in range(200):
    # for _ in range(7):
    #     loss = victrain(model, optimizer, H, X)
    # loss = train(model, optimizer, H, X, Y, train_mask)
    loss = train(model, optimizer, H, X, Y, train_mask, lamb=0)
    # val_acc, test_acc = evaluate(model, H, X, Y, val_mask, test_mask)
    test_loss, test_acc = evaluate(model, H, X, Y, val_mask, test_mask)
    # loss = ctrain(model, optimizer, H, X, Y, train_mask)
    # val_acc, test_acc = vicevaluate(model, H, X, Y, val_mask, test_mask)
    # print(f"{epoch+1} train loss: {loss:.5f}, Val acc: {val_acc:.2f}, Test acc: {test_acc:.2f}")
    print(f"{epoch+1} train loss: {loss:.5f}, test loss: {test_loss:.5f}, Test acc: {test_acc:.2f}")
    testloss.append(test_loss)
    testacc.append(test_acc)

end = time.time()
print("time: ",end - start)
k=20
atest_loss = sum(testloss[-k:]) / k
atest_acc = sum(testacc[-k:]) / k
print(atest_loss, atest_acc)

# dhg
# HGNN 200epoch 14sec test loss: 0.15217, Val acc: 0.79600, Test acc: 0.79600
# HGNN drop0 200 test loss: 0.06214, Val acc: 78.00, Test acc: 79.50 # 200 test loss: 0.06138, Val acc: 79.00, Test acc: 79.80 # 200 test loss: 0.04484, Val acc: 77.40, Test acc: 78.10

# attn lr=0.01 200epoch 27 sec test loss: 0.02389, Val acc: 0.77400, Test acc: 0.78900
# attn Ponce 200 test loss: 0.04812, Val acc: 77.20, Test acc: 76.70

# hmpnn relu 200 epoch test loss: 0.00001, Val acc: 0.53600, Test acc: 0.51900
# hmpnn sigmoid 200 epoch test loss: 0.00885, Val acc: 0.55000, Test acc: 0.52200
# 2hmpnn sigmoid 200 epoch test loss: 1.94591, Val acc: 0.31600, Test acc: 0.31900  Val acc: 0.11400, Test acc: 0.10300
# 2hmpnn 2lin sigmoid 200 epoch test loss: 1.94591, Val acc: 0.11400, Test acc: 0.10300
# 2hmpnn 2lin sigmoid noadjdrop 200 epoch test loss: 1.94591, Val acc: 0.16200, Test acc: 0.14900 Val acc: 0.05800, Test acc: 0.06400
# 2hmpnn 2lin sigmoid noadjdrop res 200 epoch test loss: 0.01424, Val acc: 0.37400, Test acc: 0.39700
# 2hmpnn 2lin sigmoid noadjdrop nodrop res 200 epoch 5m47s test loss: 0.08539, Val acc: 0.31000, Test acc: 0.36100
# 2hmpnn sigmoid noadjdrop nodrop res 200 epoch 11m6s test loss: 0.00635, Val acc: 0.55200, Test acc: 0.53700
# 3hmpnn sigmoid noadjdrop nodrop res 200 epoch test loss: 0.00638, Val acc: 0.54200, Test acc: 0.52900

# request
# HGNN 200epoch 12sec test loss: 0.15115, Val acc: 0.26200, Test acc: 0.23200
# attn 200epoch test loss: 0.00675, Val acc: 0.25000, Test acc: 0.22800
# hmpnn relu 7 epoch test loss: 0.00779, Val acc: 0.53800, Test acc: 0.47500
# hmpnn relu 200 epoch test loss: 0.00000, Val acc: 0.43400, Test acc: 0.41000
# hmpnn 2lin relu 200 epoch test loss: 0.02837, Val acc: 0.44000, Test acc: 0.38000
# hmpnn sigmoid test loss: 0.00064, Val acc: 0.53000, Test acc: 0.45100
# 2hmpnn sigmoid test loss: 1.81603, Val acc: 0.35000, Test acc: 0.29500
# 2hmpnn relu test loss: 0.00000, Val acc: 0.42800, Test acc: 0.45100
# 2hmpnn 2lin relu 200 epoch test loss: 1.81581, Val acc: 0.35000, Test acc: 0.29500
# 2hmpnn 2lin sigmoid 200 epoch test loss: 1.81585, Val acc: 0.35000, Test acc: 0.29500

# @ 3hpnn 1lin relu 400 test loss: 0.04027, Val acc: 0.28, Test acc: 0.28
# @ 3hpnn 2lin relu 200 test loss: 0.09888, Val acc: 0.34, Test acc: 0.36 400 test loss: 0.00260, Val acc: 0.42, Test acc: 0.40
# @ 3hpnn 2lin sigmoid 200 test loss: 0.45359, Val acc: 31.40, Test acc: 28.40
# @ 2hpnn 2lin sigmoid 200 test loss: 0.20277, Val acc: 32.20, Test acc: 32.00
# @ hpnn 2lin sigmoid 200 test loss: 0.09099, Val acc: 42.20, Test acc: 41.50
# @ hpnn 2lin relu 200 test loss: 0.00447, Val acc: 33.60, Test acc: 38.80
# @ hpnn lin relu 200 test loss: 0.15811, Val acc: 33.80, Test acc: 33.50; 400 test loss: 0.01975, Val acc: 36.40, Test acc: 35.00
# @ hpnn lin sigmoid 200 test loss: 0.47640, Val acc: 33.80, Test acc: 33.70; 400test loss: 0.09173, Val acc: 33.60, Test acc: 34.50
# @ hpnn 2lin relu hdim8 200 test loss: 0.00264, Val acc: 46.80, Test acc: 46.70
# @ 3hpnn 2lin relu hdim8 200 test loss: 0.00477, Val acc: 28.00, Test acc: 27.10; 400 test loss: 0.00045, Val acc: 31.00, Test acc: 29.80
# @ hpnn3 2lin relu hdim8 200 test loss: 0.00310, Val acc: 49.20, Test acc: 50.00
# @ hpnn5 2lin relu hdim8 200 test loss: 0.00337, Val acc: 46.20, Test acc: 46.70

# @ hpnn2 2lin relu hdim2 drop0.5 adjdrop0.7 200 test loss: 0.53219, Val acc: 36.00, Test acc: 38.80 ; test loss: 0.21715, Val acc: 38.20, Test acc: 40.40 ;
# @ hpnn2 2lin relu hdim2 adjdrop 200 test loss: 0.01526, Val acc: 33.20, Test acc: 32.70
# @ hpnn2 2lin relu hdim2 adjdrop drop 200 test loss: 0.36543, Val acc: 30.40, Test acc: 35.20 ; test loss: 0.18227, Val acc: 32.80, Test acc: 36.20
# @ hpnn2 2lin sig hdim2 adjdrop drop 200 test loss: 0.28160, Val acc: 31.60, Test acc: 34.60 ; test loss: 0.10077, Val acc: 33.80, Test acc: 38.00
# @ hpnn2 lin sig hdim2 adjdrop drop 200 test loss: 0.77846, Val acc: 28.20, Test acc: 30.00 ; test loss: 0.17251, Val acc: 32.20, Test acc: 33.20
# @ hpnn2 lin sig hdim16 adjdrop drop 200 test loss: 0.00632, Val acc: 50.00, Test acc: 50.70
# @ hpnn2 lin sig hdim16 drop adjdrop 200 test loss: 0.00569, Val acc: 55.20, Test acc: 57.70
# hpnn2 lin sig hdim16 drop0.5 noadjdrop eemb=eenc(vemb) res12 100 test loss: 0.77957, Val acc: 64.40 starts decreasing, Test acc: 63.20
# hpnn2 lin sig hdim16 drop0.5 adjdrop0.7123 eemb=eenc(vemb) res12 200 test loss: 0.38732, Val acc: 64.00 starts decreasing, Test acc: 62.50
# hpnn2 lin sig hdim16 drop0.5 adjdrop0.7 eemb=eenc(vemb) res12 200 test loss: 0.39054, Val acc: 60.40, Test acc: 57.50
# follow noadjdrop 200 test loss: 0.00675, Val acc: 63.60, Test acc: 62.60
# follow noadjdrop normalise 200 test loss: 0.00582, Val acc: 63.40, Test acc: 62.60
# follow linbiasF noadjdrop normalise 200 test loss: 0.00923, Val acc: 61.80, Test acc: 58.70
# res1 nope
# hpnn3 res123 follow noadjdrop normalise 800? test loss: 0.00241, Val acc: 65.20, Test acc: 63.20
# hpnn2 res12 follow noadjdrop 200 test loss: 0.00748, Val acc: 65.60, Test acc: 65.20
# hpnn2 almostcopy noadjdrop 200 test loss: 0.00376, Val acc: 72.40, Test acc: 74.80

# @ hpnn2 lin aggsig hdim16 drop adjdrop 200 test loss: 0.00634, Val acc: 54.80, Test acc: 56.70
# nores test loss: 0.10421, Val acc: 44.00, Test acc: 42.50
# @ hpnn2 lin sigagg hdim16 drop adjdrop 200 test loss: 0.00952, Val acc: 48.00, Test acc: 47.20

# @ hpnn2 lin sigagg hdim16 drop adjdrop relu vembX 100 test loss: 0.00001, Val acc: 50.00, Test acc: 50.20
# @ hpnn2 lin sigagg hdim16 drop adjdrop sig vembX 100 test loss: 0.00071, Val acc: 52.20, Test acc: 50.40

#
# @ hpnn2 lin ggBDLS ffLBDS hdim16 drop adjdrop345 100 test loss: 0.01240, Val acc: 45.60, Test acc: 44.60
# @ hpnn2 lin ggBDLS ffLBDS hdim16 drop adjdrop 100 test loss: 0.03899, Val acc: 40.60, Test acc: 42.50
# @ hpnn2 lin ggffBLSD hdim16 drop adjdrop 100 test loss: 0.00002, Val acc: 28.80, Test acc: 31.10
# @ hpnn2 lin ggffBLSD hdim16 drop0.1 adjdrop0 100 test loss: 0.00000, Val acc: 30.60, Test acc: 31.30
# @ hpnn2 lin hdim16 drop0.1 adjdrop0 100 test loss: 1.87673, Val acc: 12.80, Test acc: 12.10
# @ hpnn2 lin hdim16 drop0.1 adjdrop0.7 100 test loss: 0.92645, Val acc: 36.60, Test acc: 37.60
# @ hpnn2 lin hdim16 drop0.5 adjdrop0.7 100 test loss: 1.09457, Val acc: 42.00, Test acc: 37.10
# adj train~40

# 100 test loss: 0.00412, Val acc: 36.60, Test acc: 39.30

# 10,10,1 noadjdrop 1000 test loss: 6.19388, Val acc: 24.20, Test acc: 25.90
# 15,15,1 trs 0.3 1000 test loss: 0.00289, Val acc: 43.40, Test acc: 39.30
# 15,15,1 trs 0.5 137 test loss: 0.06801, Val acc: 35.00, Test acc: 41.60
# 15,15,1 trs 0.7 357 test loss: 0.00001, Val acc: 28.60, Test acc: 31.00
# 15,15,1 trs 0.1 154 test loss: 0.08117, Val acc: 34.40, Test acc: 35.60

# eemb=vemb 1000 test loss: 0.07657, Val acc: 50.80, Test acc: 54.90

# mha d_model2 828 test loss: 0.00944, Val acc: 27.20, Test acc: 24.90
# mha d_model2 vicreg 26 test loss: 1.94425, Val acc: 7.40, Test acc: 9.90
# mha d_model16 vicreg exp32 54 test loss: 0.00315, Val acc: 48.40, Test acc: 47.30 in vicreg  0.0 0.06320717930793762 1.4348915815353394
# mha d_model16 96 test loss: 0.03089, Val acc: 48.80, Test acc: 48.70 # 276 test loss: 0.00349, Val acc: 47.60, Test acc: 49.20

# mha d_model16 vicreg10,10,1 exp32 2708, 169, 1433
# mha d_model16 nores2 158 test loss: 0.00906, Val acc: 50.20, Test acc: 50.70
# mha d_model16 nores 219 test loss: 0.00544, Val acc: 49.60, Test acc: 49.60
# mha d_model16 nores embbiasT 112 test loss: 0.02248, Val acc: 50.20, Test acc: 48.70

# mha d_model128 nores 18 test loss: 0.45603, Val acc: 51.80, Test acc: 51.20 37 test loss: 0.00388, Val acc: 50.80, Test acc: 51.40

# mha drop0.5 adam1e-2 101 test loss: 0.02396, Val acc: 51.80, Test acc: 50.00
# mhadrop0.5 178 test loss: 1.94971, Val acc: 31.60, Test acc: 31.90

# 2lin drop0.5 adjdrop0.7123 200 test loss: 1.05092, Val acc: 17.80, Test acc: 20.70
# 2lin drop0.5 adjdrop0.7 200 test loss: 0.95048, Val acc: 33.80, Test acc: 28.60
# 2lin gelu drop0.5 adjdrop0.7 200 test loss: 1.03655, Val acc: 19.60, Test acc: 21.10
# 2lin sig drop0.5 adjdrop0.7 200 test loss: 1.43882, Val acc: 16.40 huge variation, Test acc: 17.10

# lin sig 200 drop0.5 adjdrop0.7123 test loss: 1.86198, Val acc: 56.60, Test acc: 60.90
# better than batchnorm ,adjdrop0.7, 2lin
# 3lin sig nodrop noadjdrop 600? test loss: 0.08037, Val acc: 26.00, Test acc: 27.30

# # 3lin sig nodrop noadjdrop in vicreg  0.08287973701953888 0.6470489501953125 1.871908187866211
# test acc:  100.0
# 591 test loss: 0.00912, Val acc: 15.80, Test acc: 17.20
# 5,5,1in vicreg  0.007232982665300369 0.007199370302259922 0.4727526307106018
# test acc:  100.0
# 100 test loss: 0.00160, Val acc: 20.00, Test acc: 19.40

# lin D_e_inv@emsg 100 test loss: 2.38527, Val acc: 36.80, Test acc: 33.40

# res 200 test loss: 0.00703, Val acc: 57.60, Test acc: 54.30
# resfgvw 200 test loss: 0.00358, Val acc: 72.60, Test acc: 74.80
# resfgvw encbiasT 200 test loss: 0.00470, Val acc: 76.40, Test acc: 76.10
# resfgvw encbiasT nodrop 200 test loss: 0.00243, Val acc: 77.40, Test acc: 75.40
# resfgvw semsg=zeros encbiasT nodrop 200 test loss: 0.00329, Val acc: 76.80, Test acc: 76.50
# resfgvw semsg=zeros encbiasT nodrop adjdrop0.7 nope 50+?
# resfgvw semsg=zeros encbiasT nodrop adjdrop0.712 200 test loss: 0.00292, Val acc: 71.20, Test acc: 74.30
# resfgvw semsg=zeros encbiasT nodrop adjdrop0.312 200 test loss: 0.00302, Val acc: 74.60, Test acc: 77.30
# resfgvw 2lin semsg=zeros encbiasT nodrop noadjdrop 200 test loss: 0.00020, Val acc: 74.20, Test acc: 74.80
# resfgvw hmpnn3 1lin semsg=zeros encbiasT nodrop noadjdrop 200 test loss: 0.00543, Val acc: 76.80, Test acc: 77.70 79at49epochs
# inv 200 test loss: 0.00271, Val acc: 54.00, Test acc: 54.40

# resfgvw ls semsg=vemb 200 test loss: 0.00239, Val acc: 78.00, Test acc: 75.80 again 200 test loss: 0.00183, Val acc: 76.60, Test acc: 77.10
# lbsd 200 test loss: 0.00846, Val acc: 67.00, Test acc: 65.60
# lbds 200 test loss: 0.00780, Val acc: 67.00, Test acc: 68.30
# lds 200 test loss: 0.00686, Val acc: 74.00, Test acc: 73.60
# dls 200 test loss: 0.00442, Val acc: 76.60, Test acc: 77.90
# lsd 200 test loss: 0.00501, Val acc: 76.20, Test acc: 75.00
# bls 200 test loss: 0.00167, Val acc: 67.20, Test acc: 68.10
# lsl 200 test loss: 0.00024, Val acc: 73.40, Test acc: 74.30
# lsl hdim*2 200 test loss: 0.00059, Val acc: 74.20, Test acc: 75.90
# lsls hdim*1 200 test loss: 0.00447, Val acc: 76.80, Test acc: 77.80

# xavier_uniform 200 test loss: 0.00451, Val acc: 74.00, Test acc: 74.10 200 test loss: 0.00697, Val acc: 75.80, Test acc: 77.00 200 test loss: 0.00655, Val acc: 77.20, Test acc: 77.10
# xavier_normal 200 test loss: 0.00638, Val acc: 77.20, Test acc: 77.40 200 test loss: 0.00653, Val acc: 77.60, Test acc: 78.90 200 test loss: 0.00394, Val acc: 75.80, Test acc: 76.10
# xavier_normal semsg=zeros 200 test loss: 0.00778, Val acc: 77.40, Test acc: 77.90 200 test loss: 0.00777, Val acc: 74.20, Test acc: 75.90
# xavier_normal semsg=vemb 200 test loss: 0.00669, Val acc: 78.00, Test acc: 77.30 200 test loss: 0.00502, Val acc: 75.20, Test acc: 75.70
# lr
# lg
# le
# ll-r


# 1vic10,10,1 200 test loss: 0.42472, Val acc: 70.80, Test acc: 74.00
# 1vic5,10,1 200 test loss: 0.40446, Val acc: 74.40, Test acc: 75.50 # in vicreg  0.0 0.018776636570692062 0.3856853246688843
# 7vic5,10,1 trs0.1 153 test loss: 0.00000, Val acc: 75.20, Test acc: 75.40 #in vicreg  0.0 0.0 1.8084348596403288e-07
# 7vic5,10,1 trs0.3 200 test loss: 0.00000, Val acc: 72.20, Test acc: 77.00 # in vicreg  0.0 0.0 1.730809202626915e-07
# 7vic5,10,1 trs0.5 200 test loss: 0.00000, Val acc: 71.40, Test acc: 74.10 # in vicreg  0.0 0.0 5.946303360815364e-08
# hpnn3 7vic5,10,1 trs0.5 200 test loss: 0.00000, Val acc: 78.60, Test acc: 79.90 # in vicreg  0.0 0.0 1.24092821351951e-07 # in vicreg  0.0 0.0 6.852277e-08 200 test loss: 0.00000, Val acc: 74.60, Test acc: 76.10
# hpnn3 7vic5,10,1 trs0.5 exp256 200 test loss: 6.25101, Val acc: 72.00, Test acc: 72.60 # in vicreg  0.0 5.1014814376831055 1.1495264768600464
# hpnn2 7vic5,10,1 trs0.5 exp16 200 test loss: 0.00000, Val acc: 75.40, Test acc: 76.50 # in vicreg  0.0 0.0 5.740866981795989e-08
# 200 test loss: 0.00000, Val acc: 76.20, Test acc: 76.30 # in vicreg  0.0 0.0 4.3843346730909616e-08
# hpnn2 7vic5,10,1 trs0.5 dmodel4 exp16 200 test loss: 0.00000, Val acc: 53.00, Test acc: 55.60 in vicreg  0.0 0.0 1.5833e-06 # 200 test loss: 0.00000, Val acc: 58.40, Test acc: 59.80 in vicreg  0.0 0.0 3.21431e-08
# hpnn2 7vic5,10,1 trs0.5 dmodel4 exp4 200 test loss: 0.00000, Val acc: 70.20, Test acc: 73.30 in vicreg  0.0 0.0 2.415e-09 # 200 test loss: 0.00000, Val acc: 71.80, Test acc: 73.70 in vicreg  0.0 0.0 9.7722e-10
# hpnn2 7vic5,10,1 trs0.5 dmodel8 exp4 200 test loss: 0.00000, Val acc: 74.00, Test acc: 74.80 in vicreg  0.0 0.0 1.0572726694135781e-08
# hpnn2 7vic5,10,1 trs0.5 dmodel8 exp8 200 test loss: 0.00000, Val acc: 72.00, Test acc: 74.60 in vicreg  0.0 0.0 2.139416288571283e-08
# hpnn2 7vic5,10,1 trs0.5 dmodel16 exp4 200 test loss: 0.00131, Val acc: 70.40, Test acc: 75.70 in vicreg  0.00130597 0.0 7.97829e-06 200 test loss: 0.00101, Val acc: 70.00, Test acc: 74.40 in vicreg  0.0009512 0.0 5.53794e-05

# dmodel8 200 test loss: 0.00837, Val acc: 74.40, Test acc: 75.00
# dmodel4 200 test loss: 0.03467, Val acc: 67.60, Test acc: 69.60
# dmodel2 200 test loss: 0.43079, Val acc: 51.40, Test acc: 52.80 200 test loss: 0.06613, Val acc: 59.00, Test acc: 58.70 200 test loss: 0.02644, Val acc: 59.40, Test acc: 58.80


# ff:ls gg:bdls 200 test loss: 0.00420, Val acc: 72.20, Test acc: 71.60 # 200 test loss: 0.00699, Val acc: 66.60, Test acc: 68.00 # 200 test loss: 0.00781, Val acc: 68.60, Test acc: 71.40
# ff:ls gg:dls 200 test loss: 0.00438, Val acc: 73.40, Test acc: 74.10 # 200 test loss: 0.00319, Val acc: 73.80, Test acc: 74.50 # 200 test loss: 0.00408, Val acc: 74.20, Test acc: 74.80
# ff:ls gg:ls 200 test loss: 0.00231, Val acc: 76.40, Test acc: 75.70 # 200 test loss: 0.00291, Val acc: 76.80, Test acc: 78.50 # 200 test loss: 0.00370, Val acc: 74.20, Test acc: 75.30
# ff:ls gg:lr 200 test loss: 0.00139, Val acc: 74.00, Test acc: 75.70 # 200 test loss: 0.00033, Val acc: 75.00, Test acc: 73.00 # 200 test loss: 0.00034, Val acc: 73.80, Test acc: 74.70
# ff:ls gg:l(lr) 200 test loss: 0.00022, Val acc: 75.20, Test acc: 75.40 # 200 test loss: 0.00024, Val acc: 67.40, Test acc: 68.70 # 200 test loss: 0.00042, Val acc: 73.60, Test acc: 75.40
# ff:ls gg:le 200 test loss: 0.00020, Val acc: 71.20, Test acc: 71.40 # 200 test loss: 0.00073, Val acc: 72.80, Test acc: 70.60 # 200 test loss: 0.00073, Val acc: 72.20, Test acc: 69.10
# ff:ls gg:lg 200 test loss: 0.00062, Val acc: 73.40, Test acc: 73.60 # 200 test loss: 0.00057, Val acc: 74.80, Test acc: 72.90 # 200 test loss: 0.00044, Val acc: 69.40, Test acc: 70.50
# ff:ls gg:lt 200 test loss: 0.00094, Val acc: 73.80, Test acc: 74.70 # 200 test loss: 0.00076, Val acc: 75.20, Test acc: 73.10 # 200 test loss: 0.00051, Val acc: 77.80, Test acc: 76.40

# ff:l gg:ls 200 test loss: 0.00001, Val acc: 76.40, Test acc: 76.30 # 200 test loss: 0.00001, Val acc: 74.20, Test acc: 73.40 # 200 test loss: 0.00001, Val acc: 73.20, Test acc: 73.40



# ff:bl gg:ls 200 test loss: 0.00002, Val acc: 73.60, Test acc: 77.90 # 200 test loss: 0.00002, Val acc: 74.80, Test acc: 75.40 # 200 test loss: 0.00001, Val acc: 74.60, Test acc: 76.50
# weight decay 200 test loss: 0.00003, Val acc: 75.60, Test acc: 76.90

# ff:bls gg:lr 200 test loss: 0.00175, Val acc: 70.40, Test acc: 72.20, 200 test loss: 0.00077, Val acc: 68.40, Test acc: 69.40
# ff:bdls gg:lr 200 test loss: 0.00126, Val acc: 70.40, Test acc: 71.00 # 200 test loss: 0.00247, Val acc: 71.20, Test acc: 73.20 # 200 test loss: 0.00401, Val acc: 72.80, Test acc: 74.70
# ff:dls gg:lr 200 test loss: 0.00089, Val acc: 73.60, Test acc: 75.90 # 200 test loss: 0.00055, Val acc: 72.80, Test acc: 71.70 # 200 test loss: 0.00089, Val acc: 75.00, Test acc: 75.70
# ff:dlt gg:lr 200 test loss: 0.00004, Val acc: 69.80, Test acc: 70.20 # 200 test loss: 0.00002, Val acc: 74.40, Test acc: 74.70 # 200 test loss: 0.00001, Val acc: 73.00, Test acc: 75.80
# ff:l gg:lr 200 test loss: 0.00000, Val acc: 69.80, Test acc: 70.60 # 200 test loss: 0.00000, Val acc: 69.40, Test acc: 67.00 # 200 test loss: 0.00000, Val acc: 75.80, Test acc: 76.30

# ff:lt gg:lr 200 test loss: 0.00002, Val acc: 75.20, Test acc: 75.10 # 200 test loss: 0.00001, Val acc: 67.60, Test acc: 68.80 # 200 test loss: 0.00002, Val acc: 69.20, Test acc: 68.80
# ff:ls xnorm gg:lr 200 test loss: 0.00108, Val acc: 76.20, Test acc: 75.30 # 200 test loss: 0.00186, Val acc: 73.20, Test acc: 75.00 # 200 test loss: 0.00125, Val acc: 74.20, Test acc: 72.90
# ff:ls xunif gg:lr 200 test loss: 0.00059, Val acc: 74.00, Test acc: 75.80 # 200 test loss: 0.00114, Val acc: 76.00, Test acc: 75.40 # 200 test loss: 0.00127, Val acc: 74.60, Test acc: 72.10
# ff:ls xnorm gg:lr heunif # 200 test loss: 0.00037, Val acc: 73.20, Test acc: 69.10 # 200 test loss: 0.00083, Val acc: 74.00, Test acc: 72.70 # 200 test loss: 0.00204, Val acc: 75.40, Test acc: 75.10
# ff:ls xnorm gg:lr henorm # 200 test loss: 0.00140, Val acc: 76.20, Test acc: 74.90 # 200 test loss: 0.00151, Val acc: 73.20, Test acc: 74.60 # 200 test loss: 0.00047, Val acc: 72.20, Test acc: 71.80


# ff:ls gg:ls dropemb 200 test loss: 0.00402, Val acc: 75.40, Test acc: 77.80
# ff:dls gg:dls dropemb 200 test loss: 0.01175, Val acc: 76.80, Test acc: 76.70 # 200 test loss: 0.00596, Val acc: 76.80, Test acc: 78.00 # 200 test loss: 0.00933, Val acc: 76.40, Test acc: 76.50
# ff:dls gg:dls dropemb adjdrop0.7 200 test loss: 0.02268, Val acc: 72.20, Test acc: 72.80 # 200 test loss: 0.00145, Val acc: 70.60, Test acc: 71.10
# ff:dls gg:dls dropemb adjdrop0.312 200 test loss: 0.02186, Val acc: 74.80, Test acc: 76.40 # 200 test loss: 0.02064, Val acc: 78.20, Test acc: 77.70 200 test loss: 0.00061, Val acc: 77.20, Test acc: 79.50
# ff:dls gg:dls dropemb adjdrop0.512 200 test loss: 0.02568, Val acc: 76.40, Test acc: 77.50 200 test loss: 0.00190, Val acc: 76.40, Test acc: 76.60 # 200 test loss: 0.01694, Val acc: 75.40, Test acc: 77.10
# ff:dls gg:dls dropemb adjdrop0.712 200 test loss: 0.03414, Val acc: 76.20, Test acc: 76.50 # 200 test loss: 0.05957, Val acc: 77.80, Test acc: 78.80 # 200 test loss: 0.02279, Val acc: 76.40, Test acc: 77.90
# ff:dls gg:dls dropemb adjdrop0.712 D^-1 200 test loss: 0.03110, Val acc: 76.40, Test acc: 76.60
# ff:dls gg:dls dropemb adjdrop0.5 200 test loss: 0.02577, Val acc: 72.60, Test acc: 72.70
# ff:dls gg:dls dropemb adjdrop0.3 200 test loss: 0.00988, Val acc: 73.40, Test acc: 74.50

# copy resv 200 test loss: 0.00460, Val acc: 77.20, Test acc: 78.70
# copy 200 test loss: 0.00454, Val acc: 77.20, Test acc: 77.90 # 200 test loss: 0.00210, Val acc: 77.20, Test acc: 75.80 # 200 test loss: 0.00373, Val acc: 73.40, Test acc: 75.60
# copy hard 200 test loss: 0.06467, Val acc: 79.80, Test acc: 80.50

# optuna agg, low max66




1 train loss: 1.95799, test loss: 1.94263, Test acc: 31.90
2 train loss: 1.95073, test loss: 1.93990, Test acc: 31.90
3 train loss: 1.94529, test loss: 1.93866, Test acc: 34.80
4 train loss: 1.94153, test loss: 1.93888, Test acc: 13.00
5 train loss: 1.93935, test loss: 1.94031, Test acc: 13.00
6 train loss: 1.93843, test loss: 1.94235, Test acc: 13.00
7 train loss: 1.93812, test loss: 1.94414, Test acc: 13.00
8 train loss: 1.93778, test loss: 1.94514, Test acc: 13.40
9 train loss: 1.93711, test loss: 1.94531, Test acc: 15.50
10 train loss: 1.93616, test loss: 1.94489, Test acc: 14.90
11 train loss: 1.93511, test loss: 1.94406, Test acc: 14.90
12 train loss: 1.93405, test loss: 1.94293, Test acc: 14.90
13 train loss: 1.93300, test loss: 1.94146, Test acc: 16.60
14 train loss: 1.93187, test loss: 1.93960, Test acc: 21.20
15 train loss: 1.93051, test loss: 1.93724, Test acc: 22.20
16 train loss: 1.92876, test loss: 1.93429, Test acc: 22.30
17 train loss: 1.92645, test loss: 1.93071, Test 

In [None]:
# @title run optuna
# !pip install optuna
import optuna

def objective(trial):
    drop=[0.0]*(2*4)
    adjdrop=0.0
    act= [2, 2, 0, 1, 1, 2, 2, 0]
    agg= [0, 1, 2, 2]
    # lr = 10**-(trial.suggest_int("lr", 2,8)/2)
    # weight_decay = 10**-(trial.suggest_int("weight_decay", 2,10)/2)
    # drop = [trial.suggest_float("drop"+str(i), 0.0, 0.8, step=0.1) for i in range(10)]
    adjdrop = trial.suggest_float("adjdrop", 0.0, 0.8, step=0.1)
    # d_model = 2**trial.suggest_int("d_model", 1,5)
    # act = [trial.suggest_int("act"+str(i), 0,2) for i in range(2*4)]
    agg = [trial.suggest_int("agg"+str(i), 0,2) for i in range(2*2)]
    d_model=16
    # lamb = 10**trial.suggest_int("lamb", 2,4)
    # print("d_model:",d_model, ", lamb:",lamb, ", adjdrop:",adjdrop, ", drop:",drop, ", lr:",lr, ", weight_decay:",weight_decay)
    # print(f"d_model: {d_model:.1f}, lamb: {lamb:.1f}, adjdrop: {adjdrop:.1f}, drop: {drop}, lr: {lr}, weight_decay: {weight_decay}")
    # print(f"adjdrop: {adjdrop:.1f}, drop: {drop}")
    print(f"act: {act}, agg: {agg}")

    # model=HMPNN(X.shape[1],d_model,num_classes, drop, adjdrop, act, agg)
    model=HMPNN(X.shape[1],d_model,num_classes, drop, adjdrop)
    num_params = sum(p.numel() for p in model.parameters())

    # optimizer = torch.optim.AdamW(model.parameters(), lr=1e-2, betas=(0.9, 0.999), eps=1e-08, weight_decay=3e-6) # vicreg1e-4
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01) # 0.001 # og
    # optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) # lr 0.001 # og ,weight_decay=3e-6
    # optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=weight_decay) # lr 0.001 # og ,weight_decay=3e-6

    testloss, testacc = [], []
    for epoch in range(200):
        # for _ in range(7):
            # loss = victrain(model, optimizer, H, X)
        # loss = train(model, optimizer, H, X, Y, train_mask, lamb)
        loss = train(model, optimizer, H, X, Y, train_mask)
        # val_acc, test_acc = evaluate(model, H, X, Y, val_mask, test_mask)
        test_loss, test_acc = evaluate(model, H, X, Y, val_mask, test_mask)
        # loss = ctrain(model, optimizer, H, X, Y, train_mask)
        # val_acc, test_acc = vicevaluate(model, H, X, Y, val_mask, test_mask)
        testloss.append(test_loss)
        testacc.append(test_acc)
        if epoch==399:
            print(f"{epoch+1} test loss: {loss:.5f}, Val acc: {val_acc:.2f}, Test acc: {test_acc:.2f}")
    k=20
    atest_loss = sum(testloss[-k:]) / k
    atest_acc = sum(testacc[-k:]) / k
    print("atest_acc",atest_acc,"max testacc",max(testacc))
    return atest_acc

# https://optuna.readthedocs.io/en/stable/tutorial/10_key_features/003_efficient_optimization_algorithms.html
# optuna.logging.set_verbosity(70)
# sampler = optuna.samplers.NSGAIISampler() # https://optuna.readthedocs.io/en/stable/reference/samplers/index.html
# # sampler = optuna.samplers.MOTPESampler()
# pruner = optuna.pruners.MedianPruner()
# study = optuna.create_study(direction="minimize", sampler=sampler, pruner=pruner)

# study = optuna.create_study(direction="maximize")
study = optuna.create_study()

study.optimize(objective, n_trials=100)
print(study.best_params)


In [None]:
# https://matplotlib.org/stable/plot_types/index.html
import matplotlib.pyplot as plt

for name, param in model.named_parameters(): # for param in model.parameters():
    print(name, param.shape)
    if len(param.shape)==1: param=param.unsqueeze(0)
    Z=param.detach().numpy()
    fig, ax = plt.subplots()
    pos=ax.imshow(Z)
    fig.colorbar(pos)
    plt.show()


In [None]:
# https://gist.github.com/eljost/2c4e1af652ef02b2989da341c5569af7
# from nn_plot.ipynb
import matplotlib.pyplot as plt
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
import scipy.stats as st

# np.random.seed(1)
def func(x):
    # print(x.shape)
    # x= np.sum(x**2, axis=-1)
    x=np.random.rand(x.shape[0])
    print(x.shape)
    return x

res = 50
num_pts=15
X=np.random.rand(num_pts,2)*res
# Y = func(X)
Y=np.random.rand(num_pts)
# print(X);print(Y)

lim = 1
# lin = np.linspace(-lim, lim, res)
lin = np.linspace(0, res, res)
x1, x2 = np.meshgrid(lin, lin)
xx = np.vstack((x1.flatten(), x2.flatten())).T

kernel = RBF()
gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10)
gp.fit(X, Y)
# print("Learned kernel", gp.kernel_)
y_mean, y_cov = gp.predict(xx, return_cov=True)

posteriors = st.multivariate_normal.rvs(mean=y_mean, cov=y_cov, size=1)

ax = plt.figure().add_subplot(projection='3d')
Z=posteriors.reshape(-1, res)
# ax.plot_surface(x1, x2, Z)
ax.plot_surface(x1, x2, Z, cmap='rainbow', alpha=0.7)

# ax.plot_surface(x1, x2, posteriors.reshape(-1, res))
ax.contour(x1, x2, Z, zdir='z', offset=-1, cmap='coolwarm') # https://matplotlib.org/stable/gallery/mplot3d/contour3d_3.html#sphx-glr-gallery-mplot3d-contour3d-3-py
# ax.set(xlim=(0, 50), ylim=(0, 50), zlim=(-0.4, 0.5))#, xlabel='X', ylabel='Y', zlabel='Z')
ax.set(xlim=(0, 50), ylim=(0, 50), zlim=(-1, 2))#, xlabel='X', ylabel='Y', zlabel='Z')

# ax.scatter3D(X[:, 0], X[:, 1],Y, c=zdata, cmap='Greens');
# ax.scatter3D(X[:, 0], X[:, 1],Y, cmap='Greens');

plt.show()



#### Hypergraph Neural Network (HGNN) Layer

The [HGNN layer](https://arxiv.org/pdf/1809.09401.pdf) is defined as:

$$f(X^{(l)}, H; W^{(l)}) = \sigma(L X^{(l)} W^{(l)})$$$$L = D_v^{-1/2} H B D_e^{-1} H^\top D_v^{-1/2}$$

where

* $H \in \mathbb{R}^{N \times M}$ is the incidence matrix of hypergraph with $N$ nodes and $M$ hyperedges.
* $D_v \in \mathbb{R}^{N \times N}$ is a diagonal matrix representing node degrees, whose $i$-th diagonal element is $\sum_{j=1}^M H_{ij}$.
* $D_e \in \mathbb{R}^{M \times M}$ is a diagonal matrix representing hyperedge degrees, whose $j$-th diagonal element is $\sum_{i=1}^N H_{ij}$.
* $B \in \mathbb{R}^{M \times M}$ is a diagonal matrix representing the hyperedge weights, whose $j$-th diagonal element is the weight of $j$-th hyperedge.  In our example, $B$ is an identity matrix.

In [None]:
# @title test
# https://colab.research.google.com/github/dmlc/dgl/blob/master/notebooks/sparse/hgnn.ipynb
# https://github.com/dmlc/dgl/blob/master/notebooks/sparse/hgnn.ipynb
# https://github.com/dmlc/dgl/blob/master/examples/sparse/hgnn.py
import torch

cite=torch.Tensor([[0, 1, 2, 2, 2, 2, 3, 4, 5, 5, 5, 5, 6, 7, 7, 8, 8, 9, 9, 10],
                    [0, 0, 0, 1, 3, 4, 2, 1, 0, 2, 3, 4, 2, 1, 3, 1, 3, 2, 4, 4]])
H = torch.sparse_coo_tensor(indices=cite, values=torch.ones(cite.shape[1]),).coalesce()
# uncoalesced tensors, may be duplicate coords in the indices; in this case, the interpretation is that the value at that index is the sum of all duplicate value entries
# vert _ is in hyperedge _
print(H.to_dense()) # cols: hyperedges ; rows: verts

# print(H) # indices = [[x1,x2,...], [y1,y2,y3,...]]
# print(H.to_sparse_csr()) # crow_indices=[row1 got ? elements, row2... , ... ] , col_indices= col idx # https://stackoverflow.com/questions/52299420/scipy-csr-matrix-understand-indptr
# print(H.to_sparse_csc()) # ccol_indices = [start count num elements in col], row_indices = row ind
# print(H.to_dense().to_sparse_bsr())
# print(H.to_sparse_bsc())

csr=H.to_sparse_csr()
# csr.crow_indices
# csr.col_indices
# import numpy as np
# ss=np.split(csr.col_indices(), csr.crow_indices())[1:-1]
ss=torch.split(csr.col_indices(), tuple(torch.diff(csr.crow_indices())))
# ss=torch.split(csr.col_indices(), torch.diff(csr.crow_indices()))
print(ss)

from torch.nn.utils.rnn import pad_sequence
pp=pad_sequence(ss, batch_first=True, padding_value=-1)
print(pp)
mask=pp<0
print(mask)
# node_degrees = H.sum(1)
# print("Node degrees", node_degrees)
# hyperedge_degrees = H.sum(0)
# print("Hyperedge degrees", hyperedge_degrees.values())


# vmsg=torch.rand(11,2)
# svmsg=torch.stack([torch.sum(vmsg[v.to_dense().to(torch.bool)],0) for v in H.T]) # given e, get all vmsgs then aggregate
# # print(svmsg)



In [None]:
# @title requests data
import requests
url = 'https://linqs-data.soe.ucsc.edu/public/lbc/cora.tgz'
# response = requests.get(url)
open("cora.tgz", "wb").write(response.content)

import tarfile # os, sys,
tar = tarfile.open('cora.tgz', 'r')
tar.extractall('/content')

import torch

content = open("cora/cora.content", "r")
# print(content.read(10000))
# paper id, bag of words bool, category 0-6 # all str
rlst = content.read().split('\n')[:-1] # bec last row is ''
pid = [] # paper id
bow = [] # bag of words
cls = [] # classes
# category: Case_Based, Genetic_Algorithms, Neural_Networks, Probabilistic_Methods, Reinforcement_Learning, Rule_Learning, Theory
category = {'Case_Based':0, 'Genetic_Algorithms':1, 'Neural_Networks':2, 'Probabilistic_Methods':3, 'Reinforcement_Learning':4, 'Rule_Learning':5, 'Theory':6} # cora
for r in rlst:
    rr=r.split('\t')
    pid.append(int(rr[0]))
    bow.append(list(map(float, rr[1:-1]))) # must be float
    cls.append(category[rr[-1]])
pid=torch.tensor(pid)
X=torch.tensor(bow)
Y=torch.tensor(cls)
num_classes=7

# https://stellargraph.readthedocs.io/en/v1.0.0rc1/demos/node-classification/gcn/gcn-cora-node-classification-example.html
# The Cora dataset consists of 2708 scientific publications
# classified into one of seven classes.
# The citation network consists of 5429 links
# Each publication in the dataset is described by a 0/1-valued word vector indicating the absence/presence of the corresponding word from the dictionary.
# The dictionary consists of 1433 unique words

cites = open("cora/cora.cites", "r") # cite relation
clst = cites.read().split('\n')[:-1] # bec last row is ''
cite = [] #
for c in clst:
    cc=c.split('\t')
    cite.append([int(cc[0]),int(cc[1])])
cite=torch.tensor(cite) # [5429]

ukeys = torch.unique(pid)
uvals = torch.arange(len(ukeys))
udict = dict(zip(ukeys.tolist(), uvals.tolist())) # assign new id to each paper
pid = pid.apply_(udict.get)
cite = cite.apply_(udict.get)

num_v = len(pid)
H = torch.sparse_coo_tensor(indices=cite.T, values=torch.ones(cite.shape[0]), size=(num_v, num_v)).coalesce() # size=(2708, 2708), nnz=5429, layout=torch.sparse_coo
id = torch.sparse.spdiags(torch.ones(H.shape[0]),torch.tensor(0),H.shape)
H = (id + H).coalesce() # each vert got its hyperedge, contain all cited and itself, [2708, 2708], incedence matrix, |V| hyperedges


train_mask, val_mask, test_mask = torch.zeros(3, num_v, dtype=torch.bool)
train_mask[:140], val_mask[140:640], test_mask[-1000:] = True, True, True # cora mask
# print(len(train_mask))
# print(train_mask)
# H, X, Y, num_classes, train_mask, val_mask, test_mask = load_data()

# print(train_mask, val_mask, test_mask)
# print(sum(train_mask), sum(val_mask), sum(test_mask)) # 140), (500), (1000)
# print(sum(test_mask[-1000:]))
# print(len(test_mask)) # 2708
# print(train_mask[140])
# [:140], [140:640], [-1000:]

# print(H.shape, X.shape, Y.shape) # [2708, 2708], [2708, 1433], [2708]

# @title edge/ incidence list

# edic = dict((id, [id]) for id in pid.tolist()) # edge list H(E)={e1,e2,e3}={{A,D},{D,E},{A,B,C}}
# idic = dict((id, [id]) for id in pid.tolist()) # incidence list {A:{e1,e3}, B:{e3}, C:{e3}, D:{e1,e2}, E:{e2}}
elst = [[id] for id in pid.tolist()] # edge list H(E)={e1,e2,e3}={{A,D},{D,E},{A,B,C}}
ilst = [[id] for id in pid.tolist()] # incidence list {A:{e1,e3}, B:{e3}, C:{e3}, D:{e1,e2}, E:{e2}}
for a,b in cite.tolist():
    elst[a].append(b)
    ilst[b].append(a)
elst = torch.tensor(elst)
ilst = torch.tensor(ilst)
# print(elst)
# print(ilst)


In [None]:
# @title gpt HMPNN
import torch.nn.functional as F

class HMPNNLayer(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(HMPNNLayer, self).__init__()
        self.fc_v = nn.Linear(input_dim, output_dim)
        self.fc_w = nn.Linear(input_dim, output_dim)
        self.batch_norm = nn.BatchNorm1d(output_dim)

    def forward(self, X_v, W_e, M_v):
        # Vertex-to-Hyperedge Message Passing
        M_v = F.relu(self.fc_v(M_v))
        M_v = F.dropout(M_v, p=0.5, training=self.training)  # Adjust dropout as needed
        W_e = W_e * M_v  # Element-wise multiplication with adjacency matrix dropout
        W_e = W_e.sum(dim=1)

        # Hyperedge-to-Vertex Message Passing
        W_e = F.relu(self.fc_w(W_e))
        W_e = F.dropout(W_e, p=0.5, training=self.training)
        M_e = W_e.unsqueeze(2).repeat(1, 1, X_v.size(1))  # Repeat for all vertices in hyperedge
        M_e = M_e * X_v  # Element-wise multiplication
        M_e = M_e.sum(dim=1)

        # Aggregation and Batch Normalization
        M_e = self.batch_norm(M_e)

        return M_e

class HMPNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(HMPNN, self).__init__()
        self.layer1 = HMPNNLayer(input_dim, hidden_dim)
        self.layer2 = HMPNNLayer(hidden_dim, output_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, X_v, W_e, M_v):
        # Forward pass through layers
        M_e = self.layer1(X_v, W_e, M_v)
        M_e = self.layer2(X_v, W_e, M_e)

        # Final activation
        output = self.sigmoid(M_e)
        return output

# Example usage
input_dim = 64  # Adjust based on your input data
hidden_dim = 32
# output_dim = 1  # Assuming binary classification
model = HMPNN(input_dim, hidden_dim, output_dim)
model=HMPNN(num_classes, vembdim, eembdim, vmsgdim, emsgdim)



import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import BatchNorm1d, Dropout

class HMPNNLayer(nn.Module):
    def __init__(self, in_features, out_features, dropout_rate):
        super(HMPNNLayer, self).__init__()
        self.linear = nn.Linear(in_features, out_features)
        self.batch_norm = BatchNorm1d(out_features)
        self.dropout = Dropout(p=dropout_rate)

    def forward(self, x):
        x = self.linear(x)
        x = self.batch_norm(x)
        x = F.relu(x)
        x = self.dropout(x)
        return x

class HMPNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout_rate_v, dropout_rate_e):
        super(HMPNN, self).__init__()
        self.fv = HMPNNLayer(input_size, hidden_size, dropout_rate_v)
        self.fw = HMPNNLayer(hidden_size, output_size, dropout_rate_e)

    def forward(self, vemb, H):
        vmsg = self.fv(vemb)
        aH = F.dropout(H, p=0.7, training=self.training)
        eemb = torch.matmul(aH, vmsg)
        emsg = self.fw(eemb)
        vmsg = torch.matmul(H.T, emsg)
        vemb = vemb + vmsg
        return F.sigmoid(vemb)

# Example Usage:
input_size = 64  # Input feature size for vertices
hidden_size = 32  # Hidden layer size
output_size = 1  # Output size (for binary classification, for example)
dropout_rate_v = 0.5  # Dropout rate for vertices
dropout_rate_e = 0.5  # Dropout rate for hyperedges

# Instantiate the HMPNN model
hmpnn_model = HMPNN(input_size, hidden_size, output_size, dropout_rate_v, dropout_rate_e)

# Dummy data
vemb = torch.randn((batch_size, input_size))
hyperedge_adjacency_matrix = torch.randn((batch_size, batch_size))

# Forward pass
output = hmpnn_model(vemb, hyperedge_adjacency_matrix)




In [None]:
# @title pyt-team/TopoModelX data
!pip install torch_geometric

import torch
import torch_geometric.datasets as geom_datasets
from sklearn.metrics import accuracy_score
import numpy as np


torch.manual_seed(0)
np.random.seed(0)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dataset = geom_datasets.Planetoid(root="tmp/", name="cora")[0]

incidence_1 = torch.sparse_coo_tensor(dataset["edge_index"], torch.ones(dataset["edge_index"].shape[1]), dtype=torch.long)
dataset = dataset.to(device)

x_0s = dataset["x"]
y = dataset["y"]
# print(incidence_1.shape, x_0s.shape, y.shape) # [2708, 2708], [2708, 1433], [2708]


In [None]:
# @title pyt-team/TopoModelX hmpnn
# https://arxiv.org/pdf/2203.16995.pdf
# https://github.com/pyt-team/TopoModelX/tree/main/topomodelx/nn/hypergraph
# https://github.com/pyt-team/TopoModelX/blob/main/topomodelx/nn/hypergraph/hmpnn.py
# https://github.com/pyt-team/TopoModelX/blob/main/tutorials/hypergraph/hmpnn_train.ipynb
import torch
from torch import nn
from torch.nn import functional as F

# https://github.com/pyt-team/TopoModelX/blob/main/topomodelx/utils/scatter.py

def broadcast(src, other, dim):
    """Broadcasts `src` to the shape of `other`."""
    if dim < 0:
        dim = other.dim() + dim
    if src.dim() == 1:
        for _ in range(0, dim):
            src = src.unsqueeze(0)
    for _ in range(src.dim(), other.dim()):
        src = src.unsqueeze(-1)
    src = src.expand(other.size())
    return src


def scatter_sum(src, index, dim = -1, out = None, dim_size = None,):
    """Add all values from the `src` tensor into `out` at the indices."""
    index = broadcast(index, src, dim)
    if out is None:
        size = list(src.size())
        if dim_size is not None:
            size[dim] = dim_size
        elif index.numel() == 0:
            size[dim] = 0
        else:
            size[dim] = int(index.max()) + 1
        out = torch.zeros(size, dtype=src.dtype, device=src.device)
        return out.scatter_add_(dim, index, src)
    else:
        return out.scatter_add_(dim, index, src)


def scatter_add(src, index, dim = -1, out = None, dim_size = None,):
    """Add all values from the `src` tensor into `out` at the indices."""
    return scatter_sum(src, index, dim, out, dim_size)

def scatter_mean(src, index, dim = -1, out = None, dim_size = None,):
    """Compute the mean value of all values from the `src` tensor into `out`."""
    out = scatter_sum(src, index, dim, out, dim_size)
    dim_size = out.size(dim)

    index_dim = dim
    if index_dim < 0:
        index_dim = index_dim + src.dim()
    if index.dim() <= index_dim:
        index_dim = index.dim() - 1
    ones = torch.ones(index.size(), dtype=src.dtype, device=src.device)
    count = scatter_sum(ones, index, index_dim, None, dim_size)
    count[count < 1] = 1
    count = broadcast(count, out, dim)
    if out.is_floating_point():
        out.true_divide_(count)
    else:
        out.div_(count, rounding_mode="floor")
    return out


SCATTER_DICT = {"sum": scatter_sum, "mean": scatter_mean, "add": scatter_sum}


def scatter(scatter: str):
    if isinstance(scatter, str) and scatter in SCATTER_DICT:
        return SCATTER_DICT[scatter]
    else:
        raise ValueError(f"scatter must be callable or string: {list(SCATTER_DICT.keys())}")


import math

# https://github.com/pyt-team/TopoModelX/blob/main/topomodelx/base/message_passing.py
class MessagePassing(torch.nn.Module):
    def __init__(self, aggr_func = "sum", att = False, initialization = "xavier_uniform", initialization_gain = 1.414,):
        # aggr_func: ["sum", "mean", "add"] = "sum",
        # initialization: ["uniform", "xavier_uniform", "xavier_normal"] = "xavier_uniform",
        super().__init__()
        self.aggr_func = aggr_func
        self.att = att
        self.initialization = initialization
        self.initialization_gain = initialization_gain

    def reset_parameters(self):
        match self.initialization:
            case "uniform":
                if self.weight is not None:
                    stdv = 1.0 / math.sqrt(self.weight.size(1))
                    self.weight.data.uniform_(-stdv, stdv)
                if self.att:
                    stdv = 1.0 / math.sqrt(self.att_weight.size(1))
                    self.att_weight.data.uniform_(-stdv, stdv)
            case "xavier_uniform":
                if self.weight is not None: torch.nn.init.xavier_uniform_(self.weight, gain=self.initialization_gain)
                if self.att: torch.nn.init.xavier_uniform_(self.att_weight.view(-1, 1), gain=self.initialization_gain)
            case "xavier_normal":
                if self.weight is not None: torch.nn.init.xavier_normal_(self.weight, gain=self.initialization_gain)
                if self.att: torch.nn.init.xavier_normal_(self.att_weight.view(-1, 1), gain=self.initialization_gain)
            case _: raise ValueError(f"Initialization {self.initialization} not recognized.")

    def message(self, x_source, x_target=None):
        return x_source

    def attention(self, x_source, x_target=None):
        x_source_per_message = x_source[self.source_index_j]
        x_target_per_message = (x_source[self.target_index_i] if x_target is None else x_target[self.target_index_i])
        x_source_target_per_message = torch.cat([x_source_per_message, x_target_per_message], dim=1)
        return torch.nn.functional.elu(torch.matmul(x_source_target_per_message, self.att_weight))

    def aggregate(self, x_message):
        aggr = scatter(self.aggr_func)
        return aggr(x_message, self.target_index_i, 0)

    def forward(self, x_source, neighborhood, x_target=None):
        neighborhood = neighborhood.coalesce()
        self.target_index_i, self.source_index_j = neighborhood.indices()
        neighborhood_values = neighborhood.values()

        x_message = self.message(x_source=x_source, x_target=x_target)
        x_message = x_message.index_select(-2, self.source_index_j)

        if self.att:
            attention_values = self.attention(x_source=x_source, x_target=x_target)
            neighborhood_values = torch.multiply(neighborhood_values, attention_values)

        x_message = neighborhood_values.view(-1, 1) * x_message
        return self.aggregate(x_message)



class _AdjacencyDropoutMixin:
    def apply_dropout(self, neighborhood, dropout_rate):
        neighborhood = neighborhood.coalesce()
        return torch.sparse_coo_tensor(neighborhood.indices(), F.dropout(neighborhood.values().to(torch.float), dropout_rate), neighborhood.size(),).coalesce()


class _NodeToHyperedgeMessenger(MessagePassing, _AdjacencyDropoutMixin):
    def __init__(self, messaging_func, adjacency_dropout = 0.7, aggr_func = "sum",):
        super().__init__(aggr_func)
        self.messaging_func = messaging_func
        self.adjacency_dropout = adjacency_dropout

    def message(self, x_source):
        return self.messaging_func(x_source)

    def forward(self, x_source, neighborhood):
        neighborhood = self.apply_dropout(neighborhood, self.adjacency_dropout)
        source_index_j, self.target_index_i = neighborhood.indices()
        x_message = self.message(x_source)
        x_message_aggregated = self.aggregate(x_message.index_select(-2, source_index_j))
        return x_message_aggregated, x_message


class _HyperedgeToNodeMessenger(MessagePassing, _AdjacencyDropoutMixin):
    def __init__(self, messaging_func, adjacency_dropout = 0.7, aggr_func = "sum",):
        super().__init__(aggr_func)
        self.messaging_func = messaging_func
        self.adjacency_dropout = adjacency_dropout

    def message(self, x_source, neighborhood, node_messages):
        hyperedge_neighborhood = self.apply_dropout(neighborhood, self.adjacency_dropout)
        source_index_j, target_index_i = hyperedge_neighborhood.indices()
        node_messages_aggregated = scatter(self.aggr_func)(node_messages.index_select(-2, source_index_j), target_index_i, 0)
        return self.messaging_func(x_source, node_messages_aggregated)

    def forward(self, x_source, neighborhood, node_messages):
        x_message = self.message(x_source, neighborhood, node_messages)
        neighborhood = self.apply_dropout(neighborhood, self.adjacency_dropout)
        self.target_index_i, source_index_j = neighborhood.indices()
        x_message_aggregated = self.aggregate(x_message.index_select(-2, source_index_j))
        return x_message_aggregated

class _DefaultHyperedgeToNodeMessagingFunc(nn.Module):
    def __init__(self, in_channels):
        super().__init__()
        self.linear = nn.Linear(2 * in_channels, in_channels)
    def forward(self, x_1, m_0): return F.sigmoid(self.linear(torch.cat((x_1, m_0), dim=1)))

class _DefaultUpdatingFunc(nn.Module):
    def __init__(self, in_channels): super().__init__()
    def forward(self, x, m): return F.sigmoid(x + m)


class HMPNNLayer(nn.Module):
    def __init__(self, in_channels, node_to_hyperedge_messaging_func=None, hyperedge_to_node_messaging_func=None, adjacency_dropout = 0.7, aggr_func = "sum", updating_dropout = 0.5, updating_func=None,):
        super().__init__()
        if node_to_hyperedge_messaging_func is None:
            node_to_hyperedge_messaging_func = nn.Sequential(nn.Linear(in_channels, in_channels), nn.Sigmoid())
        self.node_to_hyperedge_messenger = _NodeToHyperedgeMessenger(node_to_hyperedge_messaging_func, adjacency_dropout, aggr_func)
        if hyperedge_to_node_messaging_func is None:
            hyperedge_to_node_messaging_func = _DefaultHyperedgeToNodeMessagingFunc(in_channels)
        self.hyperedge_to_node_messenger = _HyperedgeToNodeMessenger(hyperedge_to_node_messaging_func, adjacency_dropout, aggr_func)
        self.node_batchnorm = nn.BatchNorm1d(in_channels)
        self.hyperedge_batchnorm = nn.BatchNorm1d(in_channels)
        self.dropout = torch.distributions.Bernoulli(updating_dropout)

        if updating_func is None:
            updating_func = _DefaultUpdatingFunc(in_channels)
        self.updating_func = updating_func

    def apply_regular_dropout(self, x):
        if self.training:
            mask = self.dropout.sample(x.shape).to(dtype=torch.float, device=x.device)
            d = x.size(0)
            x *= mask * (2 * d - mask.sum(dim=1)).view(-1, 1) / d
        return x

    def forward(self, x_0, x_1, incidence_1):
        node_messages_aggregated, node_messages = self.node_to_hyperedge_messenger( x_0, incidence_1)
        hyperedge_messages_aggregated = self.hyperedge_to_node_messenger(x_1, incidence_1, node_messages)
        x_0 = self.updating_func(self.apply_regular_dropout(self.node_batchnorm(x_0)), hyperedge_messages_aggregated,)
        x_1 = self.updating_func(self.apply_regular_dropout(self.hyperedge_batchnorm(x_1)), node_messages_aggregated,)
        return x_0, x_1


class HMPNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, n_layers=2, adjacency_dropout_rate=0.7, regular_dropout_rate=0.5,):
        super().__init__()
        self.linear_node = torch.nn.Linear(in_channels, hidden_channels)
        self.linear_edge = torch.nn.Linear(in_channels, hidden_channels)
        self.layers = torch.nn.ModuleList([HMPNNLayer(hidden_channels, adjacency_dropout=adjacency_dropout_rate, updating_dropout=regular_dropout_rate,) for _ in range(n_layers)])

    def forward(self, x_0, x_1, incidence_1):
        x_0 = self.linear_node(x_0)
        x_1 = self.linear_edge(x_1)
        for layer in self.layers:
            x_0, x_1 = layer(x_0, x_1, incidence_1)
        return x_0, x_1


class Network(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, task_level="graph", **kwargs): # task_level: "graph" or "node".
        super().__init__()
        self.base_model = HMPNN(in_channels=in_channels, hidden_channels=hidden_channels, **kwargs)
        self.linear = torch.nn.Linear(hidden_channels, out_channels)
        self.out_pool = True if task_level == "graph" else False

    # def forward(self, x_0, x_1, incidence_1):
    def forward(self, incidence_1, x_0):
        x_1 = torch.zeros_like(x_0)

        x_0, x_1 = self.base_model(x_0, x_1, incidence_1)
        if self.out_pool is True: x = torch.max(x_0, dim=0)[0]
        else: x = x_0
        return self.linear(x)

# Base model hyperparameters
# in_channels = x_0s.shape[1]
in_channels = X.shape[1]
hidden_channels = 128
n_layers = 1

# Readout hyperparameters
out_channels = 7 #torch.unique(y).shape[0]
task_level = "graph" if out_channels == 1 else "node"

model = Network(in_channels=in_channels, hidden_channels=hidden_channels, out_channels=out_channels, n_layers=n_layers, task_level=task_level,).to(device)
# print(in_channels, hidden_channels, out_channels, n_layers, task_level) # 1433, 128, 7, 1, node



In [None]:
# @title pyt-team/TopoModelX run

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()

train_mask = dataset["train_mask"]
val_mask = dataset["val_mask"]
test_mask = dataset["test_mask"]

torch.manual_seed(0)


initial_x_1 = torch.zeros_like(x_0s)
for epoch in range(50):
    model.train()
    optimizer.zero_grad()
    # print(x_0s.shape, initial_x_1.shape, incidence_1.shape)
    # print(x_0s, initial_x_1, incidence_1) # 0s? [2708, 1433], 0s [2708, 1433], sparse coo [2708, 2708]
    # y_hat = model(x_0s, initial_x_1, incidence_1)
    y_hat = model(incidence_1, x_0s)
    loss = loss_fn(y_hat[train_mask], y[train_mask])
    loss.backward()
    optimizer.step()

    train_loss = loss.item()
    y_pred = y_hat.argmax(dim=-1)
    train_acc = accuracy_score(y[train_mask].cpu(), y_pred[train_mask].cpu())
    # print((y[train_mask]==y_pred[train_mask]).sum()/len(y[train_mask]))
    # train_acc = accuracy(y[train_mask], y_pred[train_mask])

    model.eval()
    # y_hat = model(x_0s, initial_x_1, incidence_1)
    y_hat = model(incidence_1, x_0s)
    val_loss = loss_fn(y_hat[val_mask], y[val_mask]).item()
    y_pred = y_hat.argmax(dim=-1)
    # val_acc = accuracy_score(y[val_mask].cpu(), y_pred[val_mask].cpu())

    test_loss = loss_fn(y_hat[test_mask], y[test_mask]).item()
    y_pred = y_hat.argmax(dim=-1)
    test_acc = accuracy_score(y[test_mask].cpu(), y_pred[test_mask].cpu())
    # test_acc = accuracy(y[test_mask], y_pred[test_mask])
    print(f"{epoch + 1} train loss: {train_loss:.4f} test loss: {test_loss:.4f} test acc: {test_acc:.2f}") # val loss: {val_loss:.4f} val acc: {val_acc:.2f}

# 26 train loss: 0.0012 test loss: 1.3559 test acc: 0.64
# 28 train loss: 0.0000 test loss: 2.6673 test acc: 0.61
# 23 train loss: 0.0000 test loss: 3.4005 test acc: 0.60



In [None]:
# @title copy attn mha
import torch
import torch.nn as nn
import torch.nn.functional as F
# Hypergraph Convolution and Hypergraph Attention https://arxiv.org/pdf/1901.08150.pdf



@torch.no_grad
def hypergraph_laplacian(H):
    N,M = H.shape # num_verts, num_edges
    d_V = H.sum(1).to_dense() # node deg
    d_E = H.sum(0).to_dense() # edge deg
    D_v_invsqrt = torch.sparse.spdiags(d_V**-0.5,torch.tensor(0),(N,N)) # torch.diag(d_V**-0.5)
    D_e_inv = torch.sparse.spdiags(d_E**-1,torch.tensor(0),(M,M)) # torch.diag(d_E**-1)
    B = torch.sparse.spdiags(torch.ones(M),torch.tensor(0),(M,M)) # torch.eye(M) # B is id, dim n_edges
    return D_v_invsqrt @ H @ B @ D_e_inv @ H.T @ D_v_invsqrt # Laplacian

# Hypergraph Convolution and Hypergraph Attention https://arxiv.org/pdf/1901.08150.pdf
class HypergraphAttention(nn.Module): # https://github.com/dmlc/dgl/blob/master/examples/sparse/hypergraphatt.py
    def __init__(self, in_dim, out_dim):
        super().__init__()
        self.P = nn.Linear(in_dim, out_dim)
        self.a = nn.Linear(2 * out_dim, 1) # og
        self.q = nn.Linear(in_dim, out_dim)#, bias=False)
        self.k = nn.Linear(in_dim, out_dim)#, bias=False)
        self.drop = nn.Dropout(0.5)
        # self.lin = nn.Linear(d_model, 1)

        d_model=in_dim
        n_heads=2
        self.n_heads = n_heads
        self.head_dim = d_model // n_heads
        self.q = nn.Linear(d_model, d_model, bias=False)
        self.k = nn.Linear(d_model, d_model, bias=False)
        # self.v = nn.Linear(d_model, d_model, bias=False)
        # self.lin = nn.Linear(d_model, d_model)
        self.lin = nn.Linear(d_model, out_dim)
        # self.drop = nn.Dropout(dropout)
        # self.scale = torch.sqrt(torch.tensor((self.head_dim,), dtype=torch.float, device=device))
        self.scale = torch.sqrt(torch.tensor((self.head_dim,), dtype=torch.float))

    def forward(self, H, vemb, eemb): # H [2708, 2708] n_vert,n_edge ; X n_vert,vembdim
        # vemb=self.drop(vemb)
        # vvec = self.P(vemb) # emb verts [n_vert,out_dim]
        # sim = self.a(torch.cat([vvec[H.indices()[0]], vvec[H.indices()[1]]], 1)) #  vertemb,edgeemb(=vertemb)

        # vvec, evec = self.q(vemb), self.k(eemb)
        # Q, K = vvec[H.indices()[0]], evec[H.indices()[1]]
        # sim= Q.unsqueeze(1) @ K.unsqueeze(2)


        batch_size = vemb.shape[0]
        Q = self.q(vemb).view(batch_size, -1, self.n_heads, self.head_dim).transpose(1, 2)
        K = self.k(eemb).view(batch_size, -1, self.n_heads, self.head_dim).transpose(1, 2)
        # V = self.v(value).view(batch_size, -1, self.n_heads, self.head_dim).transpose(1, 2)
        attn = Q @ K.transpose(2, 3) / self.scale # attn = torch.matmul(Q, K.transpose(2, 3)) / self.scale
        # if mask is not None:
        #     attn = attn.masked_fill(mask == 0, -1e10)
        attention = torch.softmax(attn, dim=-1)
        x = self.drop(attention) @ V # x = torch.matmul(self.drop(attention), V)
        x = x.transpose(1, 2).reshape(batch_size, -1, self.d_model)
        x = self.lin(x)


        # sim = F.leaky_relu(sim, 0.2).squeeze(1) # og[13264]
        sim = F.relu(sim.squeeze())
        H_att = torch.sparse_coo_tensor(indices=H.indices(), values=sim,).coalesce()
        H_att = torch.sparse.softmax(H_att,1) # [2708, 2708]
        return hypergraph_laplacian(H_att) @ vvec # [2708, 2708], [2708, hidden_size/out_dim]

class Net(nn.Module):
    def __init__(self, in_dim, out_dim, d_model=16):
        super().__init__()
        self.layer1 = HypergraphAttention(in_dim, d_model)
        self.layer2 = HypergraphAttention(d_model, out_dim)

    def forward(self, H, X):
        Z = self.layer1(H, X, X) # [n_vert, hidden_size]
        Z = F.relu(Z) # og:elu
        Z = self.layer2(H, Z, Z) # [n_vert, out_dim]
        return Z

model = Net(X.shape[1], num_classes) # hg att

# ogatt relu 200 test loss: 0.04677, Val acc: 78.40, Test acc: 79.60, 200 test loss: 0.04678, Val acc: 78.00, Test acc: 79.20 # 200 test loss: 0.03514, Val acc: 78.80, Test acc: 78.20
# ogatt relu drop0.5 200 test loss: 0.12809, Val acc: 77.60, Test acc: 79.70 # 200 test loss: 0.12146, Val acc: 78.80, Test acc: 80.50 # 200 test loss: 0.15017, Val acc: 78.20, Test acc: 80.90

# Q@V 200 test loss: 0.03010, Val acc: 77.20, Test acc: 79.40 # 200 test loss: 0.03118, Val acc: 77.80, Test acc: 78.00 # 200 test loss: 0.03437, Val acc: 76.60, Test acc: 77.70
# Q@V biasT 200 test loss: 0.04530, Val acc: 78.20, Test acc: 78.70 # 200 test loss: 0.04741, Val acc: 76.00, Test acc: 77.20
# Q@V biasT drop0.5 200 test loss: 0.13624, Val acc: 77.40, Test acc: 80.30 # 200 test loss: 0.10379, Val acc: 78.00, Test acc: 80.90 # 200 test loss: 0.09133, Val acc: 78.00, Test acc: 78.60









In [None]:
# @title HMPNN me H attn
# https://arxiv.org/pdf/2203.16995.pdf
import torch
import torch.nn as nn
import torch.nn.functional as F
device = "cuda" if torch.cuda.is_available() else "cpu"
# Vert msg = fv(vert ebd) , Sum edge msgs
# Edge msg = fw(edge emb, Sum Vert msgs)
# Vert emb1 = gv(vert emb, Sum edge msgs)
# Edge emb1 = gw(edge emb, Sum Vert msgs)


class MultiHeadAttention(nn.Module):
    # def __init__(self, d_model, n_heads, dropout=0):
    def __init__(self, d_model, n_heads, out_dim=1, dropout=0):
        super(MultiHeadAttention, self).__init__()
        self.d_model = d_model
        self.n_heads = n_heads
        self.head_dim = d_model // n_heads
        self.q = nn.Linear(d_model, d_model, bias=False)
        self.k = nn.Linear(d_model, d_model, bias=False)
        self.v = nn.Linear(d_model, d_model, bias=False)
        # self.lin = nn.Linear(d_model, d_model)
        self.lin = nn.Linear(d_model, out_dim)
        self.drop = nn.Dropout(dropout)
        self.scale = torch.sqrt(torch.tensor((self.head_dim,), dtype=torch.float, device=device))

    def forward(self, query, key, value, mask=None):
        batch_size = query.shape[0]
        Q = self.q(query).view(batch_size, -1, self.n_heads, self.head_dim).transpose(1, 2)
        K = self.k(key).view(batch_size, -1, self.n_heads, self.head_dim).transpose(1, 2)
        V = self.v(value).view(batch_size, -1, self.n_heads, self.head_dim).transpose(1, 2)
        attn = Q @ K.transpose(2, 3) / self.scale # attn = torch.matmul(Q, K.transpose(2, 3)) / self.scale
        if mask is not None:
            attn = attn.masked_fill(mask == 0, -1e10)
        attention = torch.softmax(attn, dim=-1)
        x = self.drop(attention) @ V # x = torch.matmul(self.drop(attention), V)
        x = x.transpose(1, 2).reshape(batch_size, -1, self.d_model)
        x = self.lin(x)
        return x#, attention

from torch.nn.utils.rnn import pad_sequence
def get_idx(H): # get index of non zero entries for each row
    csr=H.to_sparse_csr()
    ss=torch.split(csr.col_indices(), tuple(torch.diff(csr.crow_indices()))) # https://stackoverflow.com/a/44536294/13359815
    sidx=pad_sequence(ss, batch_first=True, padding_value=-1)
    mask=sidx<0
    return sidx, mask # [n_rows, num_idx]

class ff(nn.Module):
    # def __init__(self, in_dim, hid_dim, out_dim):
    def __init__(self, in_dim, out_dim):
        super(ff, self).__init__()
        h_dim=16
        self.lin = nn.Sequential(
            nn.Dropout(p=0.5), nn.Linear(in_dim, out_dim), nn.Sigmoid() # ReLU Sigmoid
            )
    def forward(self, x):
        return self.lin(x)

class MsgPass(nn.Module):
    def __init__(self, d_model):
        super(MsgPass, self).__init__()
        drop=0.
        self.fv = MultiHeadAttention(d_model, n_heads=1, dropout=drop)
        self.fw = MultiHeadAttention(d_model, n_heads=1, dropout=drop)
        self.gv = MultiHeadAttention(d_model, n_heads=1, dropout=drop)
        self.gw = MultiHeadAttention(d_model, n_heads=1, dropout=drop)
        # self.adjdrop = AdjDropout(0.7) # 0.7 "Adjacency dropout must be applied in neighborhood creation steps of Equations 3 through 5"
        self.adjdrop = AdjDropout(0)
        self.drop = nn.Dropout(0.5)


    def forward(self, H, X, X_edges): # H [2708, 2708] n_vert,n_edge ; X n_vert,vembdim
    def forward(self, H, vemb, eemb, emsg=None):
        Z = self.P(X) # emb verts [n_vert,out_size]
        sim = self.a(torch.cat([Z[H.indices()[0]], Z[H.indices()[1]]], 1)) #  vertemb,edgeemb(=vertemb)
        sim = F.relu(sim).squeeze(1) # me
        H_att = torch.sparse_coo_tensor(indices=H.indices(), values=sim,).coalesce()
        H_att = torch.sparse.softmax(H_att,1) # [2708, 2708]
        return hypergraph_laplacian(H_att) @ Z # [2708, 2708], [2708, hidden_size/out_size]
        Z = F.relu(Z)



        if emsg==None: emsg=vemb
        ridx, mask = get_idx(H) # [n_rows, num_idx]
        semsg=emsg[ridx] # [n_rows, num_idx, d_model]
        mask=mask.unsqueeze(1).unsqueeze(2) # [n_rows, 1, 1, num_idx]
        vmsg = self.fv(vemb, semsg, semsg, mask) # [n_rows, 1, d_model/1]

        # vmsg = self.drop(vmsg)
        cidx, mask = get_idx(H.T) # [n_cols, num_idx]
        svmsg=vmsg[cidx] # [n_cols, num_idx, d_model]
        mask=mask.unsqueeze(1).unsqueeze(2) # [n_cols, 1, 1, num_idx]
        emsg = self.fw(eemb, svmsg, svmsg, mask) # [n_cols, 1, d_model]

        # emsg = self.drop(emsg)
        ridx, mask = get_idx(H) # [n_rows, num_idx]
        semsg=emsg[ridx] # [n_rows, num_idx, d_model]
        mask=mask.unsqueeze(1).unsqueeze(2) # [n_rows, 1, 1, num_idx]
        vemb1 = self.gv(vemb, semsg, semsg, mask) # [n_rows, 1, d_model]

        # vmsg = self.drop(vmsg)
        cidx, mask = get_idx(H.T) # [n_cols, num_idx]
        svmsg=vmsg[cidx] # [n_cols, num_idx, d_model]
        mask=mask.unsqueeze(1).unsqueeze(2) # [n_cols, 1, 1, num_idx]
        eemb1 = self.gw(eemb, svmsg, svmsg, mask) # [n_cols, 1, d_model]
        eemb1=eemb1.squeeze()
        vemb1=vemb1.squeeze()



    def forward(self, H, vemb, eemb, semsg=None):
        N,M = H.shape
        d_V = H.sum(1).to_dense() # node deg
        d_E = H.sum(0).to_dense() # edge deg
        D_v_invsqrt = torch.sparse.spdiags(d_V**-0.5,torch.tensor(0),(N,N))
        D_e_invsqrt = torch.sparse.spdiags(d_E**-0.5,torch.tensor(0),(M,M))

        if semsg != None: semsg = vemb

        # vmsg = vemb + self.fv(torch.cat((vemb, semsg), 1))
        vmsg = self.fv(vemb)

        svmsg = D_e_invsqrt @ self.adjdrop(H).T @ D_v_invsqrt @ vmsg # [num_edge, d_model]
        emsg = svmsg + self.fw(torch.cat((eemb, svmsg), 1))
        # emsg = svmsg
        semsg = D_v_invsqrt @ self.adjdrop(H) @ D_e_invsqrt @ emsg

        # vemb1 = semsg + self.gv(torch.cat((vemb, semsg), 1))
        vemb1 = self.gv(semsg)
        # eemb1 = svmsg + self.gw(torch.cat((eemb, svmsg), 1))
        eemb1 = self.gw(svmsg)
        return vemb1, eemb1, semsg

class HMPNN(nn.Module):
    def __init__(self, in_dim, d_model, out_dim):
        super(HMPNN, self).__init__()
        self.venc = nn.Linear(in_dim, d_model, bias=False)
        self.eenc = nn.Linear(in_dim, d_model, bias=False)
        self.msgpass = MsgPass(d_model)
        self.msgpass2 = MsgPass(d_model)

        self.classifier = nn.Linear(d_model, out_dim)

    def forward(self, H, vemb):
        # vemb = self.venc(vemb)
        vemb, eemb = self.venc(vemb), self.eenc(vemb)
        # eemb = torch.zeros(len(elst),self.eembdim)
        # eemb = vemb
        vemb, eemb, emsg = self.msgpass(H, vemb, eemb)
        # vemb, eemb = vemb+vemb1, eemb+eemb1
        vemb, eemb, emsg = self.msgpass2(H, vemb, eemb, emsg=emsg)
        # vemb, eemb = vemb+vemb1, eemb+eemb1
        # return vemb
        return self.classifier(vemb)


num_v,vdim=X.shape
# print("num_v,vembdim",num_v,vembdim) # 2708, 1433
num_classes=7

model=HMPNN(X.shape[1],16,num_classes)


In [None]:
# @title HMPNN copy+
# https://arxiv.org/pdf/2203.16995.pdf
import torch
import torch.nn as nn
import torch.nn.functional as F
device = "cuda" if torch.cuda.is_available() else "cpu"
# Vert msg = fv(vert ebd) , Sum edge msgs
# Edge msg = fw(edge emb, Sum Vert msgs)
# Vert emb1 = gv(vert emb, Sum edge msgs)
# Edge emb1 = gw(edge emb, Sum Vert msgs)

class ff(nn.Module):
    def __init__(self, in_dim, out_dim):
        super(ff, self).__init__()
        self.lin = nn.Sequential(
            # nn.Linear(in_dim, out_dim), nn.Sigmoid() # ReLU GELU Sigmoid Tanh
            nn.Dropout(p=0.5), nn.Linear(in_dim, out_dim)#, nn.Sigmoid() # me
            )
    def forward(self, x):
        return self.lin(x)

class gg(nn.Module):
    def __init__(self, in_dim, out_dim):
        super(gg, self).__init__()
        self.lin = nn.Sequential(
            # nn.Linear(in_dim, out_dim), nn.Sigmoid(), #nn.Dropout(p=0.5), # ReLU LeakyReLU ELU GELU Sigmoid Tanh
            # nn.BatchNorm1d(in_dim), nn.Dropout(p=0.5), nn.Linear(in_dim, out_dim), nn.Sigmoid(), # Regular dropout can follow a batch normalization right before updating functions in Equations 3 and 5, as part of the corresponding g functions.
            # nn.Linear(in_dim, out_dim), nn.BatchNorm1d(in_dim), nn.Dropout(p=0.5), nn.Sigmoid(), # Graph design space
            # nn.Dropout(p=0.5), nn.Linear(in_dim, out_dim), nn.Sigmoid() #
            nn.ReLU()
            # nn.Sigmoid()
            )
    def forward(self, x):
        return self.lin(x)

class MsgPass(nn.Module):
    # def __init__(self, in_dim, out_dim, order=False):
    def __init__(self, d_model, order=False):
        super(MsgPass, self).__init__()
        self.fv = ff(1*d_model, d_model)
        self.fw = ff(2*d_model, d_model)
        self.gv = gg(1*d_model, d_model)
        self.gw = gg(1*d_model, d_model)
        if order==0: # first MsgPass layer
            self.fv = nn.Sequential()
            # self.fw = nn.Sequential()
        if order==-1: # last MsgPass layer
            self.gv = nn.Sequential()
            self.gw = nn.Sequential()
        self.adjdrop = AdjDropout(0.7) # 0.7 "Adjacency dropout must be applied in neighborhood creation steps of Equations 3 through 5"
        # self.adjdrop = AdjDropout(0)
        self.order=order

    def forward(self, H, vemb, eemb, semsg=None):
        N,M = H.shape
        d_V = H.sum(1).to_dense() # node deg
        d_E = H.sum(0).to_dense() # edge deg
        D_v_invsqrt = torch.sparse.spdiags(d_V**-0.5,torch.tensor(0),(N,N))
        D_e_invsqrt = torch.sparse.spdiags(d_E**-0.5,torch.tensor(0),(M,M))

        if semsg != None: semsg = vemb

        # vmsg = vemb + self.fv(torch.cat((vemb, semsg), 1))
        vmsg = self.fv(vemb)

        svmsg = D_e_invsqrt @ self.adjdrop(H).T @ D_v_invsqrt @ vmsg # [num_edge, d_model]
        emsg = svmsg + self.fw(torch.cat((eemb, svmsg), 1))
        # emsg = svmsg
        semsg = D_v_invsqrt @ self.adjdrop(H) @ D_e_invsqrt @ emsg

        # vemb1 = semsg + self.gv(torch.cat((vemb, semsg), 1))
        vemb1 = self.gv(semsg)
        # eemb1 = svmsg + self.gw(torch.cat((eemb, svmsg), 1))
        eemb1 = self.gw(svmsg)
        return vemb1, eemb1, semsg


class HMPNN(nn.Module):
    def __init__(self, in_dim, d_model, out_dim):
        super(HMPNN, self).__init__()
        self.venc = nn.Linear(in_dim, d_model)
        self.eenc = nn.Linear(in_dim, d_model)
        self.msgpass = MsgPass(d_model, order=0)
        self.msgpass2 = MsgPass(d_model, order=-1)
        # self.msgpass = MsgPass(in_dim, d_model, first=True)
        # self.msgpass2 = MsgPass(d_model, out_dim)
        self.drop = nn.Dropout(0.5)

    def forward(self, H, X):
        vemb = eemb = X
        vemb, eemb = self.drop(vemb), self.drop(eemb)
        vemb, eemb = self.venc(vemb), self.eenc(eemb)
        # eemb = torch.zeros(len(elst),self.eembdim)
        vemb, eemb, semsg = self.msgpass(H, vemb, eemb)
        vemb, eemb, semsg = self.msgpass2(H, vemb, eemb, semsg=semsg)
        return vemb


num_v,in_dim=X.shape # 2708, 1433
num_classes=7
model=HMPNN(X.shape[1],16,num_classes)



In [None]:
# @title HMPNN me H
# https://arxiv.org/pdf/2203.16995.pdf
import torch
import torch.nn as nn
import torch.nn.functional as F
# Vert msg = fv(vert ebd) , Sum edge msgs
# Edge msg = fw(edge emb, Sum Vert msgs)
# Vert emb1 = gv(vert emb, Sum edge msgs)
# Edge emb1 = gw(edge emb, Sum Vert msgs)

class MsgPass(nn.Module):
    def __init__(self, vembdim, eembdim, vmsgdim, emsgdim):
    def __init__(self, in_dim, hid_dim, out_dim):
        super(MsgPass, self).__init__()
        self.h_dim = 16
        self.fv = nn.Sequential(
            # nn.Linear(vembdim, self.h_dim), nn.ReLU(),
            # nn.Linear(vembdim, self.h_dim), nn.Sigmoid(),
            # nn.Linear(self.h_dim, vmsgdim),
            # nn.Linear(vembdim, vmsgdim), nn.ReLU(),
            nn.Linear(vembdim, vmsgdim), #nn.Sigmoid(),
            )
        self.fw = nn.Sequential(
            # nn.Linear(eembdim+vmsgdim, self.h_dim), nn.ReLU(),
            # nn.Linear(eembdim+vmsgdim, self.h_dim), nn.Sigmoid(),
            # nn.Linear(self.h_dim, emsgdim),
            # nn.Linear(eembdim+vmsgdim, emsgdim), nn.ReLU(),
            nn.Linear(eembdim+vmsgdim, emsgdim), #nn.Sigmoid(),
            )
        self.gv = nn.Sequential(
            # nn.Linear(vembdim+emsgdim, self.h_dim), nn.ReLU(),
            # nn.Linear(vembdim+emsgdim, self.h_dim), nn.Sigmoid(),
            # nn.Linear(self.h_dim, vembdim),
            # nn.Linear(vembdim+emsgdim, vembdim), nn.ReLU(),
            nn.Linear(vembdim+emsgdim, vembdim), #nn.Sigmoid(),
            )
        self.gw = nn.Sequential(
            # nn.Linear(eembdim+vmsgdim, self.h_dim), nn.ReLU(),
            # nn.Linear(eembdim+vmsgdim, self.h_dim), nn.Sigmoid(),
            # nn.Linear(self.h_dim, eembdim),
            # nn.Linear(eembdim+vmsgdim, eembdim), nn.ReLU(),
            nn.Linear(eembdim+vmsgdim, eembdim), #nn.Sigmoid(),
            )
        # self.vmsgdim = vmsgdim
        # self.emsgdim = emsgdim
        self.drop = nn.Dropout(0.5)
        self.adjdrop = AdjDropout(0.7)
        self.sig = nn.Sigmoid()

    def forward(self, H, vemb, eemb, emsg=None):
        vemb, eemb = self.drop(vemb), self.drop(eemb)
        vmsg = self.fv(vemb)
        # vmsg = self.drop(vmsg)
        # print("vemb, eemb",vemb.shape, eemb.shape) # [2708, 2], [2708, 2]
        H = self.adjdrop(H)
        HT = H.T
        vmsg = self.sig(vmsg)
        svmsg = HT @ vmsg # sum aggregate
        # svmsg = vmsg @ H
        # svmsg = self.sig(svmsg)
        # print("vmsg, svmsg",vmsg.shape, svmsg.shape) # [2708, 2], [2708, 2]
        emsg = self.fw(torch.cat((eemb, svmsg), 1))
        # emsg = self.drop(emsg)
        emsg = self.sig(emsg)
        semsg = H @ emsg
        # semsg = self.sig(semsg)
        vemb1 = self.gv(torch.cat((vemb, semsg), 1))
        # vemb1 = self.drop(vemb1)
        vmsg = self.sig(vmsg)
        svmsg = HT @ vmsg
        # svmsg = self.sig(svmsg)
        eemb1 = self.gw(torch.cat((eemb, svmsg), 1))
        # eemb1 = self.drop(eemb1)


        # vmsg = self.fv(vemb)
        # vmsg = D_v_invsqrt @ vmsg # outgoing node msg is node features mul by inv sqrt of their deg, i.e. Dv^-1/2 X(l)
        # svmsg = self.adjdrop(H).T @ vmsg # sum aggregate
        # # svmsg = self.adjdrop(H).T @ D_v_invsqrt @ vmsg # node aggregation function is sum of input multiplied by the inverse square root of their degree, i.e. Dv^-1/2 H
        # # emsg = self.fw(torch.cat((eemb, svmsg), 1))
        # emsg=eemb
        # semsg = self.adjdrop(H) @ emsg
        # semsg = D_e_inv @ semsg # hyperedge aggregation is the average, i.e. De^-1 HT
        # vemb1 = self.gv(torch.cat((vemb, semsg), 1)) # node updating function is σ(XΘ(l))
        # vemb1 = D_v_invsqrt @ vemb1 #
        # svmsg = self.adjdrop(H).T @ vmsg
        # eemb1 = self.gw(torch.cat((eemb, svmsg), 1))

        return vemb1, eemb1, emsg

class AdjDropout(nn.Module):
    def __init__(self, p=0.7):
        super(AdjDropout, self).__init__()
    def forward(self, H):
        mask = (torch.rand(n_e) >= p).float().expand(n_v,n_e) # 1->keep, throw p
        return H*mask

class HMPNN(nn.Module):
    def __init__(self, outdim, vembdim, eembdim, vmsgdim, emsgdim):
        super(HMPNN, self).__init__()
        # self.msgpass = MsgPass(vembdim, eembdim, vmsgdim, emsgdim)
        self.msgpass = MsgPass(X.size(1), eembdim, vmsgdim, emsgdim)
        self.msgpass2 = MsgPass(vembdim, eembdim, vmsgdim, emsgdim)
        self.msgpass3 = MsgPass(vembdim, eembdim, vmsgdim, emsgdim)
        self.lin = nn.Linear(vembdim, outdim)
        # vert 1/0 emb provided
        # self.ve = nn.Embedding(vdim, vembdim) # turn vects from 1/0 vect to vect emb
        # self.ee = nn.Embedding(edim, eembdim)
        self.eemb = None
        # create edge vect emb
        self.eembdim = eembdim
        # self.ve = nn.Linear(X.size(-1), vembdim, bias=False)

    def forward(self, H, X):
        # vemb = self.ve(X)
        # print("vemb",vemb.shape)
        eemb = torch.zeros(len(elst),self.eembdim)
        # eemb = self.ee(eemb)
        vemb1, eemb1, emsg = self.msgpass(H, vemb, eemb)
        vemb, eemb = vemb+vemb1, eemb+eemb1
        vemb1, eemb1, emsg = self.msgpass2(H, vemb, eemb, emsg=emsg)
        vemb, eemb = vemb+vemb1, eemb+eemb1
        # vemb1, eemb1, emsg = self.msgpass3(H, vemb, eemb, emsg=emsg)
        # vemb, eemb = vemb+vemb1, eemb+eemb1
        x = self.lin(vemb)
        return x

num_v,vdim=X.shape
# print("num_v,vembdim",num_v,vembdim) # 2708, 1433
# vembdim, eembdim = 2, 2
# vmsgdim, emsgdim = 2, 2
vembdim=eembdim=vmsgdim=emsgdim=16

num_classes=7
model=HMPNN(num_classes, vembdim, eembdim, vmsgdim, emsgdim)
# yhat = model(X, elst, ilst)
# print(H.shape)
yhat = model(H, X)
print(yhat.shape) # [2708, 7]

# Implementation Details Our model uses two layers of HMPNN with sigmoid
# activation and a hidden representation of size 2. We use sum as the message
# aggregation functions, with adjacency matrix dropout with rate 0.7, as well as
# dropout with rate 0.5 for vertex and hyperedge representation.

# print(len(X[0]))

torch.Size([2708, 7])


In [None]:
# @title HMPNN elst, ilst
import torch
import torch.nn as nn
import torch.nn.functional as F
# Vert msg = fv(vert ebd)
# Edge msg = fw(edge emb, Sum Vert msgs)
# Vert emb1 = gv(vert emb, Sum edge msgs)
# Edge emb1 = gw(edge emb, Sum Vert msgs)

class MsgPass(nn.Module):
    def __init__(self, vembdim, eembdim, vmsgdim, emsgdim):
        super(MsgPass, self).__init__()
        self.h_dim = 4
        self.fv = nn.Sequential(
            # nn.Linear(vembdim, self.h_dim), nn.ReLU(),
            # nn.Linear(vembdim, self.h_dim), nn.Sigmoid(),
            # nn.Linear(self.h_dim, vmsgdim),
            # nn.Linear(vembdim, vmsgdim), nn.ReLU(),
            nn.Linear(vembdim, vmsgdim), nn.Sigmoid(),
            )
        self.fw = nn.Sequential(
            # nn.Linear(eembdim+vmsgdim, self.h_dim), nn.ReLU(),
            # nn.Linear(eembdim+vmsgdim, self.h_dim), nn.Sigmoid(),
            # nn.Linear(self.h_dim, emsgdim),
            # nn.Linear(eembdim+vmsgdim, emsgdim), nn.ReLU(),
            nn.Linear(eembdim+vmsgdim, emsgdim), nn.Sigmoid(),
            )
        self.gv = nn.Sequential(
            # nn.Linear(vembdim+emsgdim, self.h_dim), nn.ReLU(),
            # nn.Linear(vembdim+emsgdim, self.h_dim), nn.Sigmoid(),
            # nn.Linear(self.h_dim, vembdim),
            # nn.Linear(vembdim+emsgdim, vembdim), nn.ReLU(),
            nn.Linear(vembdim+emsgdim, vembdim), nn.Sigmoid(),
            )
        self.gw = nn.Sequential(
            # nn.Linear(eembdim+vmsgdim, self.h_dim), nn.ReLU(),
            # nn.Linear(eembdim+vmsgdim, self.h_dim), nn.Sigmoid(),
            # nn.Linear(self.h_dim, eembdim),
            # nn.Linear(eembdim+vmsgdim, eembdim), nn.ReLU(),
            nn.Linear(eembdim+vmsgdim, eembdim), nn.Sigmoid(),
            )
        self.vmsgdim = vmsgdim
        self.emsgdim = emsgdim

    def forward(self, vemb, eemb, elst, ilst):
        # semsg=torch.stack([torch.mean(emsg[e],0) for e in ilst]) # given v, get all emsgs then aggregate
        vmsg = self.fv(vemb)
        # vmsg = self.fv(torch.cat((vemb, semsg), 1))
        # vmsg = F.dropout(F.batch_norm(vmsg,torch.zeros(self.vmsgdim),torch.ones(self.vmsgdim)),p=0.5)

        mvemb, meemb, melst, milst = vemb, eemb, elst, ilst
        # _, meemb, melst, milst = adjdrop(vemb, eemb, elst, ilst, p=0.7)

        svmsg=torch.stack([torch.sum(vmsg[v],0) for v in melst]) # given e, get all vmsgs then aggregate
        memsg = self.fw(torch.cat((meemb, svmsg), 1))
        # memsg = F.dropout(F.batch_norm(memsg,torch.zeros(self.emsgdim),torch.ones(self.emsgdim)),p=0.5)

        semsg=torch.stack([torch.sum(memsg[e],0) for e in milst]) # given v, get all emsgs then aggregate # cannot be mean bec vert in ilst may be isolated, divide by 0 hyperedges
        vemb1 = self.gv(torch.cat((vemb, semsg), 1))

        svmsg=torch.stack([torch.sum(vmsg[v],0) for v in elst]) # given e, get all vmsgs then aggregate
        eemb1 = self.gw(torch.cat((eemb, svmsg), 1))
        return vemb1, eemb1

# Vert msg = fv(vert ebd)
# Edge msg = fw(edge emb, Sum Vert msgs)
# Vert emb1 = gv(vert emb, Sum edge msgs)
# Edge emb1 = gw(edge emb, Sum Vert msgs)

    # def forward(self, vemb, eemb, elst, ilst):
    #     vmsg = self.fv(vemb)
    #     return vmsg

    # def forward(self, vemb, eemb, elst, ilst):
    #     svmsg=torch.stack([torch.sum(vmsg[v],0) for v in elst]) # given e, get all vmsgs then aggregate
    #     eemb1 = self.gw(torch.cat((eemb, svmsg), 1))

    #     svmsg=torch.stack([torch.sum(vmsg[v],0) for v in elst]) # given e, get all vmsgs then aggregate
    #     emsg = self.fw(torch.cat((meemb, svmsg), 1))

    #     semsg=torch.stack([torch.sum(emsg[e],0) for e in ilst]) # given v, get all emsgs then aggregate # cannot be mean bec vert in ilst may be isolated, divide by 0 hyperedges
    #     vemb1 = self.gv(torch.cat((vemb, semsg), 1))

    #     return vemb1, eemb1

def ilst_from_elst(elst, n_v=len(ilst)): # generate incidence list from edge list
    ilst = [[] for id in range(n_v)]
    for e,vs in enumerate(elst):
        [ilst[v].append(e) for v in vs]
    return ilst

def adjdrop(vemb, eemb, elst, ilst, p=0.7): # adjacency dropout, maybe can replace with slicing of sparse tensors if pytorch implements it
    mask = torch.rand(len(elst)) >= p # True->keep, throw p
    melst = [e for e, m in zip(elst, mask) if m]
    meemb = eemb[mask==True]
    milst = ilst_from_elst(melst)
    return vemb, meemb, melst, milst


class HMPNN(nn.Module):
    def __init__(self, outdim, vembdim, eembdim):
        super(HMPNN, self).__init__()
        self.msgpass = MsgPass(vembdim, eembdim, vmsgdim=2, emsgdim=2)
        self.lin = nn.Linear(vembdim, outdim)

    def forward(self, x, elst=elst, ilst=ilst):
        vemb = x
        eemb = torch.zeros(len(elst),eembdim)
        # vemb, eemb = self.msgpass(vemb, eemb)
        # vemb, eemb = self.msgpass(vemb, eemb, elst=elst, ilst=ilst)
        vemb1, eemb1 = self.msgpass(vemb, eemb, elst=elst, ilst=ilst)
        vemb, eemb = vemb+vemb1, eemb+eemb1
        vemb1, eemb1 = self.msgpass(vemb, eemb, elst=elst, ilst=ilst)
        vemb, eemb = vemb+vemb1, eemb+eemb1
        vemb1, eemb1 = self.msgpass(vemb, eemb, elst=elst, ilst=ilst)
        vemb, eemb = vemb+vemb1, eemb+eemb1
        x = self.lin(vemb)
        return x


def trainl(model, optimizer, elst, ilst, X, Y, train_mask):
    model.train()
    Y_hat = model(X,elst, ilst)
    l,r=0,4
    # print(Y_hat[train_mask][l:r], Y[train_mask][l:r])
    loss = F.cross_entropy(Y_hat[train_mask], Y[train_mask]) # loss_fn = nn.CrossEntropyLoss()
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    return loss.item()

def evaluatel(model, elst, ilst, X, Y, val_mask, test_mask):
    model.eval()
    Y_hat = model(X,elst, ilst) # model(X)
    val_acc = accuracy(Y_hat[val_mask].argmax(1), Y[val_mask])
    test_acc = accuracy(Y_hat[test_mask].argmax(1), Y[test_mask])
    return val_acc, test_acc

# val_acc, test_acc = evaluatel(model, elst, ilst, X, Y, val_mask, test_mask)
# val_acc, test_acc = evaluatel(model, elst, ilst, X, Y, val_mask, train_mask)
# print(val_acc, test_acc)



num_v,vembdim=X.shape
eembdim=2
num_classes=7
model=HMPNN(num_classes, vembdim, eembdim)
yhat = model(X, elst, ilst)
# print(yhat.shape) # [2708, 7]

# print(len(X[0]))

In [None]:
# @title HMPNN H
import torch
import torch.nn as nn
# Vert msg = fv(vert ebd)
# Edge msg = fw(edge emb, Sum Vert msgs)
# Vert emb1 = gv(vert emb, Sum edge msgs)
# Edge emb1 = gw(edge emb, Sum Vert msgs)

class MsgPass(nn.Module):
    def __init__(self, vembdim, eembdim, vmsgdim, emsgdim):
        super(MsgPass, self).__init__()
        self.h_dim = 4
        self.fv = nn.Sequential(
            # nn.Linear(vembdim, self.h_dim), nn.ReLU(),
            nn.Linear(vembdim, self.h_dim), nn.Sigmoid(),
            nn.Linear(self.h_dim, vmsgdim),
            # nn.Linear(vembdim, vmsgdim), nn.ReLU(),
            # nn.Linear(vembdim, vmsgdim), nn.Sigmoid(),
            )
        self.fw = nn.Sequential(
            # nn.Linear(eembdim+vmsgdim, self.h_dim), nn.ReLU(),
            nn.Linear(eembdim+vmsgdim, self.h_dim), nn.Sigmoid(),
            nn.Linear(self.h_dim, emsgdim),
            # nn.Linear(eembdim+vmsgdim, emsgdim), nn.ReLU(),
            # nn.Linear(eembdim+vmsgdim, emsgdim), nn.Sigmoid(),
            )
        self.gv = nn.Sequential(
            # nn.Linear(vembdim+emsgdim, self.h_dim), nn.ReLU(),
            nn.Linear(vembdim+emsgdim, self.h_dim), nn.Sigmoid(),
            nn.Linear(self.h_dim, vembdim),
            # nn.Linear(vembdim+emsgdim, vembdim), nn.ReLU(),
            # nn.Linear(vembdim+emsgdim, vembdim), nn.Sigmoid(),
            )
        self.gw = nn.Sequential(
            # nn.Linear(eembdim+vmsgdim, self.h_dim), nn.ReLU(),
            nn.Linear(eembdim+vmsgdim, self.h_dim), nn.Sigmoid(),
            nn.Linear(self.h_dim, eembdim),
            # nn.Linear(eembdim+vmsgdim, eembdim), nn.ReLU(),
            # nn.Linear(eembdim+vmsgdim, eembdim), nn.Sigmoid(),
            )
        self.vmsgdim = vmsgdim
        self.emsgdim = emsgdim

    def forward(self, H, vemb, eemb):
        # semsg=torch.stack([torch.mean(emsg[e],0) for e in ilst]) # given v, get all emsgs then aggregate
        vmsg = self.fv(vemb)
        # vmsg = self.fv(torch.cat((vemb, semsg), 1))
        # vmsg = F.dropout(F.batch_norm(vmsg,torch.zeros(self.vmsgdim),torch.ones(self.vmsgdim)),p=0.5)

        # mvemb, meemb, melst, milst = vemb, eemb, elst, ilst
        # _, meemb, melst, milst = adjdrop(vemb, eemb, elst, ilst, p=0.7)


        # svmsg=torch.stack([torch.sum(vmsg[v],0) for v in melst]) # given e, get all vmsgs then aggregate
        svmsg=torch.stack([torch.sum(vmsg[v.to_dense().to(torch.bool)],0) for v in H.T]) # given e, get all vmsgs then aggregate
        memsg = self.fw(torch.cat((meemb, svmsg), 1))
        memsg = F.dropout(F.batch_norm(memsg,torch.zeros(self.emsgdim),torch.ones(self.emsgdim)),p=0.5)

        # cannot be mean bec vert in ilst may be isolated, divide by 0 hyperedges
        # semsg=torch.stack([torch.sum(memsg[e],0) for e in milst]) # given v, get all emsgs then aggregate
        semsg=torch.stack([torch.sum(memsg[e.to_dense().to(torch.bool)],0) for e in H]) # given e, get all vmsgs then aggregate
        vemb1 = self.gv(torch.cat((vemb, semsg), 1))

        # svmsg=torch.stack([torch.sum(vmsg[v],0) for v in elst]) # given e, get all vmsgs then aggregate
        svmsg=torch.stack([torch.sum(vmsg[v.to_dense().to(torch.bool)],0) for v in H.T]) # given e, get all vmsgs then aggregate
        eemb1 = self.gw(torch.cat((eemb, svmsg), 1))
        return vemb1, eemb1

# def ilst_from_elst(elst, n_v=len(ilst)): # generate incidence list from edge list
#     ilst = [[] for id in range(n_v)]
#     for e,vs in enumerate(elst):
#         [ilst[v].append(e) for v in vs]
#     return ilst

def adjdrop(vemb, eemb, elst, ilst, p=0.7): # adjacency dropout, maybe can replace with slicing of sparse tensors if pytorch implements it
    mask = torch.rand(len(elst)) >= p # True->keep, throw p
    melst = [e for e, m in zip(elst, mask) if m]
    meemb = eemb[mask==True]
    milst = ilst_from_elst(melst)
    return vemb, meemb, melst, milst


class HMPNN(nn.Module):
    def __init__(self, outdim, vembdim, eembdim):
        super(HMPNN, self).__init__()
        self.msgpass = MsgPass(vembdim, eembdim, vmsgdim=2, emsgdim=2)
        self.lin = nn.Linear(vembdim, outdim)

    def forward(self, H, X):
        vemb = x
        eemb = torch.zeros(len(elst),eembdim)
        # vemb, eemb = self.msgpass(vemb, eemb)
        vemb, eemb = self.msgpass(vemb, eemb, elst=elst, ilst=ilst)
        vemb, eemb = self.msgpass(vemb, eemb, elst=elst, ilst=ilst)
        x = self.lin(vemb)
        return x

num_v,vembdim=X.shape
eembdim=2
num_classes=7
model=HMPNN(num_classes, vembdim, eembdim)
# yhat = model(X, elst, ilst)
Y_hat = model(H, X)
# print(yhat.shape) # [2708, 7]

# print(len(X[0]))

In [None]:
# @title HMPNN me H attn vic
# https://arxiv.org/pdf/2203.16995.pdf
import torch
import torch.nn as nn
import torch.nn.functional as F
device = "cuda" if torch.cuda.is_available() else "cpu"
# Vert msg = fv(vert ebd) , Sum edge msgs
# Edge msg = fw(edge emb, Sum Vert msgs)
# Vert emb1 = gv(vert emb, Sum edge msgs)
# Edge emb1 = gw(edge emb, Sum Vert msgs)

def off_diagonal(x):
    n, m = x.shape
    assert n == m
    return x.flatten()[:-1].view(n - 1, n + 1)[:, 1:].flatten()

class MultiHeadAttention(nn.Module):
    # def __init__(self, d_model, n_heads, dropout=0):
    def __init__(self, d_model, n_heads, dropout=0):
        super(MultiHeadAttention, self).__init__()
        self.d_model = d_model
        self.n_heads = n_heads
        self.head_dim = d_model // n_heads
        self.q = nn.Linear(d_model, d_model, bias=False)
        self.k = nn.Linear(d_model, d_model, bias=False)
        self.v = nn.Linear(d_model, d_model, bias=False)
        self.lin = nn.Linear(d_model, d_model)
        # self.lin = nn.Linear(d_model, out_dim)
        self.drop = nn.Dropout(dropout)
        self.scale = torch.sqrt(torch.tensor((self.head_dim,), dtype=torch.float, device=device))

    def forward(self, query, key, value, mask=None):
        batch_size = query.shape[0]
        Q = self.q(query).view(batch_size, -1, self.n_heads, self.head_dim).transpose(1, 2)
        K = self.k(key).view(batch_size, -1, self.n_heads, self.head_dim).transpose(1, 2)
        V = self.v(value).view(batch_size, -1, self.n_heads, self.head_dim).transpose(1, 2)
        attn = Q @ K.transpose(2, 3) / self.scale # attn = torch.matmul(Q, K.transpose(2, 3)) / self.scale
        if mask is not None:
            attn = attn.masked_fill(mask == 0, -1e10)
        attention = torch.softmax(attn, dim=-1)
        x = self.drop(attention) @ V # x = torch.matmul(self.drop(attention), V)
        x = x.transpose(1, 2).reshape(batch_size, -1, self.d_model)
        x = self.lin(x)
        return x#, attention

from torch.nn.utils.rnn import pad_sequence
def get_idx(H): # get index of non zero entries for each row
    csr=H.to_sparse_csr()
    ss=torch.split(csr.col_indices(), tuple(torch.diff(csr.crow_indices()))) # https://stackoverflow.com/a/44536294/13359815
    sidx=pad_sequence(ss, batch_first=True, padding_value=-1)
    mask=sidx<0
    return sidx, mask # [n_rows, num_idx]

class ff(nn.Module):
    # def __init__(self, in_dim, hid_dim, out_dim):
    def __init__(self, in_dim, out_dim):
        super(ff, self).__init__()
        h_dim=16
        self.lin = nn.Sequential(
            # nn.Linear(in_dim, h_dim), nn.ReLU(), # ReLU Sigmoid
            # nn.Linear(h_dim, h_dim), nn.ReLU(),
            # nn.Linear(h_dim, out_dim),

            # nn.Linear(in_dim, out_dim), nn.BatchNorm1d(out_dim),
            # # nn.Sigmoid(), nn.Dropout(p=0.5) # google
            # nn.Dropout(p=0.5), nn.Sigmoid() # intra-layer

            # nn.BatchNorm1d(in_dim), nn.Linear(in_dim, out_dim), nn.Sigmoid(), nn.Dropout(p=0.1) # me
            nn.Linear(in_dim, out_dim), nn.Sigmoid(), nn.Dropout(p=0.5)
            )
    def forward(self, x):
        return self.lin(x)

class MsgPass(nn.Module):
    def __init__(self, d_model):
        super(MsgPass, self).__init__()
        self.ffv = ff(d_model, d_model)
        self.ffw = ff(2*d_model, d_model)
        self.fgv = ff(2*d_model, d_model)
        self.fgw = ff(2*d_model, d_model)
        drop=0.
        self.fv = MultiHeadAttention(d_model, n_heads=1, dropout=drop)
        self.fw = MultiHeadAttention(d_model, n_heads=1, dropout=drop)
        self.gv = MultiHeadAttention(d_model, n_heads=1, dropout=drop)
        self.gw = MultiHeadAttention(d_model, n_heads=1, dropout=drop)
        # self.adjdrop = AdjDropout(0.7) # 0.7 "Adjacency dropout must be applied in neighborhood creation steps of Equations 3 through 5"
        self.adjdrop = AdjDropout(0)
        self.drop = nn.Dropout(0.5)

    def forward(self, H, vemb, eemb, emsg=None):
        # vmsg = self.ffv(vemb)
        # H = AdjDropout(0.7)(H)
        # svmsg = self.adjdrop(H).T @ vmsg # sum aggregate
        # emsg = self.ffw(torch.cat((eemb, svmsg), 1))
        # semsg = self.adjdrop(H) @ emsg
        # vemb1 = self.fgv(torch.cat((vemb, semsg), 1))
        # svmsg = self.adjdrop(H).T @ vmsg
        # eemb1 = self.fgw(torch.cat((eemb, svmsg), 1))

        # vemb = self.drop(vemb)

        if emsg==None:
            vmsg = self.ffv(vemb)
        else:
            ridx, mask = get_idx(H) # [n_rows, num_idx]
            semsg=emsg[ridx] # [n_rows, num_idx, d_model]
            mask=mask.unsqueeze(1).unsqueeze(2) # [n_rows, 1, 1, num_idx]
            vmsg = self.fv(vemb, semsg, semsg, mask) # [n_rows, 1, d_model]

        # vmsg = self.drop(vmsg)
        cidx, mask = get_idx(H.T) # [n_cols, num_idx]
        svmsg=vmsg[cidx] # [n_cols, num_idx, d_model]
        mask=mask.unsqueeze(1).unsqueeze(2) # [n_cols, 1, 1, num_idx]
        emsg = self.fw(eemb, svmsg, svmsg, mask) # [n_cols, 1, d_model]

        # emsg = self.drop(emsg)
        ridx, mask = get_idx(H) # [n_rows, num_idx]
        semsg=emsg[ridx] # [n_rows, num_idx, d_model]
        mask=mask.unsqueeze(1).unsqueeze(2) # [n_rows, 1, 1, num_idx]
        vemb1 = self.gv(vemb, semsg, semsg, mask) # [n_rows, 1, d_model]

        # vmsg = self.drop(vmsg)
        cidx, mask = get_idx(H.T) # [n_cols, num_idx]
        svmsg=vmsg[cidx] # [n_cols, num_idx, d_model]
        mask=mask.unsqueeze(1).unsqueeze(2) # [n_cols, 1, 1, num_idx]
        eemb1 = self.gw(eemb, svmsg, svmsg, mask) # [n_cols, 1, d_model]
        eemb1=eemb1.squeeze()
        vemb1=vemb1.squeeze()

        return vemb1, eemb1, emsg

class HMPNN(nn.Module):
    def __init__(self, in_dim, d_model, out_dim):
        super(HMPNN, self).__init__()
        self.venc = nn.Linear(in_dim, d_model, bias=False)
        self.eenc = nn.Linear(in_dim, d_model, bias=False)
        self.msgpass = MsgPass(d_model)
        self.msgpass2 = MsgPass(d_model)

        # f=[d_model,256,256,256]
        f=[d_model,32,32,32]
        self.exp = nn.Sequential(
            nn.Linear(f[0], f[1]), nn.BatchNorm1d(f[1]), nn.ReLU(),
            nn.Linear(f[1], f[2]), nn.BatchNorm1d(f[2]), nn.ReLU(),
            nn.Linear(f[-2], f[-1], bias=False)
            )
        self.classifier = nn.Linear(d_model, out_dim)

    def forward(self, H, vemb):
        # vemb = self.venc(vemb)
        vemb, eemb = self.venc(vemb), self.eenc(vemb)
        # eemb = torch.zeros(len(elst),self.eembdim)
        # eemb = vemb
        vemb, eemb, emsg = self.msgpass(H, vemb, eemb)
        # vemb, eemb = vemb+vemb1, eemb+eemb1
        vemb, eemb, emsg = self.msgpass2(H, vemb, eemb, emsg=emsg)
        # vemb, eemb = vemb+vemb1, eemb+eemb1
        # return vemb
        return self.classifier(vemb)

    # https://arxiv.org/pdf/2105.04906.pdf
    def vicreg(self, x, y): # https://github.com/facebookresearch/vicreg/blob/main/main_vicreg.py
        # invariance loss
        repr_loss = F.mse_loss(x, y) # s(Z, Z')

        x = x - x.mean(dim=0)
        y = y - y.mean(dim=0)

        # variance loss
        std_x = torch.sqrt(x.var(dim=0) + 0.0001) #ϵ=0.0001
        std_y = torch.sqrt(y.var(dim=0) + 0.0001)
        std_loss = torch.mean(F.relu(1 - std_x)) / 2 + torch.mean(F.relu(1 - std_y)) / 2

        batch_size, num_features = x.shape
        sim_coeff=10.0 # 25.0 # λ
        std_coeff=10.0 # 25.0 # µ
        cov_coeff=1.0 # 1.0 # ν

        if x.dim() == 1: x = x.unsqueeze(0)
        if y.dim() == 1: y = y.unsqueeze(0)

        # # covariance loss
        cov_x = (x.T @ x) / (batch_size - 1) #C(Z)
        cov_y = (y.T @ y) / (batch_size - 1)
        cov_loss = off_diagonal(cov_x).pow_(2).sum().div(num_features)\
         + off_diagonal(cov_y).pow_(2).sum().div(num_features) #c(Z)
        loss = (sim_coeff * repr_loss + std_coeff * std_loss + cov_coeff * cov_loss)
        print("in vicreg ",(sim_coeff * repr_loss).item() , (std_coeff * std_loss).item() , (cov_coeff * cov_loss).item())
        return loss

    def loss(self, H1, H2, vemb):
        sx, sy = self.forward(H1, vemb), self.forward(H2, vemb)
        vx, vy = self.exp(sx), self.exp(sy)
        loss = self.vicreg(vx,vy)
        return loss

    def classify(self, x):
        return self.classifier(x)


num_v,vdim=X.shape
# print("num_v,vembdim",num_v,vembdim) # 2708, 1433
num_classes=7

model=HMPNN(X.shape[1],16,num_classes)


In [None]:
# @title HMPNN no cls
# https://arxiv.org/pdf/2203.16995.pdf
import torch
import torch.nn as nn
import torch.nn.functional as F
device = "cuda" if torch.cuda.is_available() else "cpu"
# Vert msg = fv(vert ebd) , Sum edge msgs
# Edge msg = fw(edge emb, Sum Vert msgs)
# Vert emb1 = gv(vert emb, Sum edge msgs)
# Edge emb1 = gw(edge emb, Sum Vert msgs)

class ff(nn.Module):
    # def __init__(self, in_dim, hid_dim, out_dim):
    def __init__(self, in_dim, out_dim):
        super(ff, self).__init__()
        h_dim=out_dim
        self.lin = nn.Sequential(
            # nn.Linear(in_dim, h_dim), nn.Sigmoid(), #nn.Dropout(p=0.5), # ReLU GELU Sigmoid Tanh
            # nn.Linear(in_dim, out_dim), nn.Sigmoid()

            # nn.BatchNorm1d(in_dim), nn.Linear(in_dim, out_dim), nn.Sigmoid(), #nn.Dropout(p=0.1) # me
            # nn.Dropout(p=0.5), nn.Linear(in_dim, out_dim), nn.Sigmoid() # me
            # nn.Linear(in_dim, out_dim), nn.Sigmoid(), nn.Dropout(p=0.5) # best nah
            nn.Linear(in_dim, out_dim), #nn.Sigmoid()#, nn.Dropout(p=0.5) #
            )
        # for p in self.parameters(): # relu: kaiming_uniform_ kaiming_normal_
        #     if p.dim() > 1:
        #         nn.init.xavier_normal_(p) # sigmoid/tanh: xavier_uniform_ xavier_normal_

    def forward(self, x):
        return self.lin(x)

class gg(nn.Module):
    def __init__(self, in_dim, out_dim):
        super(gg, self).__init__()
        # h_dim=out_dim
        self.lin = nn.Sequential(
            # nn.Linear(in_dim, out_dim), nn.Sigmoid(), #nn.Dropout(p=0.5), # ReLU LeakyReLU ELU GELU Sigmoid Tanh
            # Regular dropout can follow a batch normalization right before updating functions in Equations 3 and 5, as part of the corresponding g functions.
            # nn.BatchNorm1d(in_dim), nn.Dropout(p=0.5), nn.Linear(in_dim, out_dim), nn.Sigmoid(), # #
            # nn.Dropout(p=0.5), nn.Linear(in_dim, out_dim), nn.Sigmoid() #
            nn.Linear(in_dim, out_dim), nn.ReLU()
            )
        # for p in self.parameters(): # relu: kaiming_uniform_ kaiming_normal_
        #     if p.dim() > 1:
        #         nn.init.kaiming_normal_(p) # sigmoid/tanh: xavier_uniform_ xavier_normal_

    def forward(self, x):
        return self.lin(x)

class MsgPass(nn.Module):
    # def __init__(self, d_model):
    def __init__(self, in_dim, out_dim):
        super(MsgPass, self).__init__()
        self.ffv = ff(in_dim, in_dim)
        self.fv = ff(2*in_dim, in_dim)
        self.fw = ff(2*in_dim, in_dim)
        # self.gv = ff(2*d_model, d_model)
        # self.gw = ff(2*d_model, d_model)
        self.gv = gg(2*in_dim, out_dim)
        self.gw = gg(2*in_dim, out_dim)
        # self.adjdrop = AdjDropout(0.7) # 0.7 "Adjacency dropout must be applied in neighborhood creation steps of Equations 3 through 5"
        self.adjdrop = AdjDropout(0)

    def forward(self, H, vemb, eemb, semsg=None):
        N,M = H.shape
        d_V = H.sum(1).to_dense() # node deg
        d_E = H.sum(0).to_dense() # edge deg
        D_v_invsqrt = torch.sparse.spdiags(d_V**-0.5,torch.tensor(0),(N,N))
        D_e_invsqrt = torch.sparse.spdiags(d_E**-0.5,torch.tensor(0),(M,M))


        # if semsg != None: vmsg = vemb + self.fv(torch.cat((vemb, semsg), 1))
        # else: vmsg = vemb + self.ffv(vemb)

        # if semsg == None: semsg = torch.zeros(eemb.shape)
        # if semsg == None: semsg = vemb
        # vmsg = vemb + self.fv(torch.cat((vemb, semsg), 1))
        vmsg = vemb + self.ffv(vemb)

        svmsg = D_e_invsqrt @ self.adjdrop(H).T @ D_v_invsqrt @ vmsg # [num_edge, d_model]
        emsg = svmsg + self.fw(torch.cat((eemb, svmsg), 1))
        semsg = D_v_invsqrt @ self.adjdrop(H) @ D_e_invsqrt @ emsg

        # vemb1 = semsg + self.gv(torch.cat((vemb, semsg), 1))
        # eemb1 = svmsg + self.gw(torch.cat((eemb, svmsg), 1))
        vemb1 = self.gv(torch.cat((vemb, semsg), 1))
        eemb1 = self.gw(torch.cat((eemb, svmsg), 1))
        return vemb1, eemb1, semsg


class HMPNN(nn.Module):
    def __init__(self, in_dim, d_model, out_dim):
        super(HMPNN, self).__init__()
        self.venc = nn.Linear(in_dim, d_model, bias=False)
        self.eenc = nn.Linear(in_dim, d_model, bias=False)
        # self.msgpass = MsgPass(d_model)
        # self.msgpass2 = MsgPass(d_model)
        self.msgpass = MsgPass(in_dim, d_model)
        self.msgpass2 = MsgPass(d_model, out_dim)
        self.classifier = nn.Linear(d_model, out_dim)
        self.drop = nn.Dropout(0.5)
        # for p in self.parameters():
        #     if p.dim() > 1:
        #         nn.init.xavier_normal_(p) # xavier_uniform_ xavier_normal_

    def forward(self, H, X):
        vemb = eemb = X
        # vemb, eemb = self.drop(vemb), self.drop(eemb)
        # vemb, eemb = self.venc(vemb), self.eenc(eemb)
        # eemb = torch.zeros(len(elst),self.eembdim)
        vemb, eemb, semsg = self.msgpass(H, vemb, eemb)
        # vemb, eemb = vemb+vemb1, eemb+eemb1
        vemb, eemb, semsg = self.msgpass2(H, vemb, eemb, semsg=semsg)
        # vemb, eemb = vemb+vemb1, eemb+eemb1
        # return self.classifier(vemb)
        return vemb

num_v,in_dim=X.shape # 2708, 1433
num_classes=7
model=HMPNN(X.shape[1],16,num_classes)
# 200 test loss: 0.27799, Val acc: 60.00, Test acc: 61.30
# 200 test loss: 1.38994, Val acc: 15.20, Test acc: 16.80


In [None]:
# @title optuna result
'''
[I 2024-02-21 03:33:59,082] A new study created in memory with name: no-name-97ac4315-ca85-44d7-ae03-78a174b04d79
adjdrop: 0.3, drop: [0.6000000000000001, 0.5, 0.5, 0.2, 0.0, 0.7000000000000001, 0.6000000000000001, 0.0, 0.8, 0.30000000000000004]
[I 2024-02-21 03:34:44,669] Trial 0 finished with value: 76.31500000000003 and parameters: {'drop0': 0.6000000000000001, 'drop1': 0.5, 'drop2': 0.5, 'drop3': 0.2, 'drop4': 0.0, 'drop5': 0.7000000000000001, 'drop6': 0.6000000000000001, 'drop7': 0.0, 'drop8': 0.8, 'drop9': 0.30000000000000004, 'adjdrop': 0.30000000000000004}. Best is trial 0 with value: 76.31500000000003.
400 test loss: 0.04511, Val acc: 56.60, Test acc: 74.40
adjdrop: 0.0, drop: [0.0, 0.8, 0.6000000000000001, 0.7000000000000001, 0.0, 0.0, 0.8, 0.0, 0.4, 0.7000000000000001]
[I 2024-02-21 03:35:15,153] Trial 1 finished with value: 75.92999999999999 and parameters: {'drop0': 0.0, 'drop1': 0.8, 'drop2': 0.6000000000000001, 'drop3': 0.7000000000000001, 'drop4': 0.0, 'drop5': 0.0, 'drop6': 0.8, 'drop7': 0.0, 'drop8': 0.4, 'drop9': 0.7000000000000001, 'adjdrop': 0.0}. Best is trial 1 with value: 75.92999999999999.
400 test loss: 0.26852, Val acc: 56.60, Test acc: 74.60
adjdrop: 0.7, drop: [0.0, 0.1, 0.0, 0.30000000000000004, 0.30000000000000004, 0.0, 0.6000000000000001, 0.8, 0.6000000000000001, 0.1]
[I 2024-02-21 03:35:47,409] Trial 2 finished with value: 75.86500000000002 and parameters: {'drop0': 0.0, 'drop1': 0.1, 'drop2': 0.0, 'drop3': 0.30000000000000004, 'drop4': 0.30000000000000004, 'drop5': 0.0, 'drop6': 0.6000000000000001, 'drop7': 0.8, 'drop8': 0.6000000000000001, 'drop9': 0.1, 'adjdrop': 0.7000000000000001}. Best is trial 2 with value: 75.86500000000002.
400 test loss: 0.01942, Val acc: 56.60, Test acc: 76.40
adjdrop: 0.7, drop: [0.5, 0.0, 0.8, 0.8, 0.4, 0.7000000000000001, 0.5, 0.4, 0.30000000000000004, 0.6000000000000001]
[I 2024-02-21 03:36:30,279] Trial 3 finished with value: 74.76 and parameters: {'drop0': 0.5, 'drop1': 0.0, 'drop2': 0.8, 'drop3': 0.8, 'drop4': 0.4, 'drop5': 0.7000000000000001, 'drop6': 0.5, 'drop7': 0.4, 'drop8': 0.30000000000000004, 'drop9': 0.6000000000000001, 'adjdrop': 0.7000000000000001}. Best is trial 3 with value: 74.76.
400 test loss: 0.13639, Val acc: 56.60, Test acc: 75.40
adjdrop: 0.2, drop: [0.5, 0.7000000000000001, 0.6000000000000001, 0.30000000000000004, 0.30000000000000004, 0.4, 0.5, 0.30000000000000004, 0.4, 0.8]
[I 2024-02-21 03:37:14,376] Trial 4 finished with value: 70.495 and parameters: {'drop0': 0.5, 'drop1': 0.7000000000000001, 'drop2': 0.6000000000000001, 'drop3': 0.30000000000000004, 'drop4': 0.30000000000000004, 'drop5': 0.4, 'drop6': 0.5, 'drop7': 0.30000000000000004, 'drop8': 0.4, 'drop9': 0.8, 'adjdrop': 0.2}. Best is trial 4 with value: 70.495.
400 test loss: 0.57701, Val acc: 56.60, Test acc: 71.40
adjdrop: 0.0, drop: [0.6000000000000001, 0.7000000000000001, 0.1, 0.8, 0.2, 0.4, 0.1, 0.0, 0.2, 0.2]
[I 2024-02-21 03:37:58,780] Trial 5 finished with value: 77.115 and parameters: {'drop0': 0.6000000000000001, 'drop1': 0.7000000000000001, 'drop2': 0.1, 'drop3': 0.8, 'drop4': 0.2, 'drop5': 0.4, 'drop6': 0.1, 'drop7': 0.0, 'drop8': 0.2, 'drop9': 0.2, 'adjdrop': 0.0}. Best is trial 4 with value: 70.495.
400 test loss: 0.04339, Val acc: 56.60, Test acc: 79.10
adjdrop: 0.1, drop: [0.2, 0.4, 0.0, 0.8, 0.1, 0.6000000000000001, 0.2, 0.1, 0.2, 0.1]
[I 2024-02-21 03:38:43,222] Trial 6 finished with value: 76.66 and parameters: {'drop0': 0.2, 'drop1': 0.4, 'drop2': 0.0, 'drop3': 0.8, 'drop4': 0.1, 'drop5': 0.6000000000000001, 'drop6': 0.2, 'drop7': 0.1, 'drop8': 0.2, 'drop9': 0.1, 'adjdrop': 0.1}. Best is trial 4 with value: 70.495.
400 test loss: 0.00668, Val acc: 56.60, Test acc: 76.60
adjdrop: 0.2, drop: [0.1, 0.4, 0.7000000000000001, 0.8, 0.7000000000000001, 0.5, 0.7000000000000001, 0.1, 0.0, 0.2]
[I 2024-02-21 03:39:26,252] Trial 7 finished with value: 77.725 and parameters: {'drop0': 0.1, 'drop1': 0.4, 'drop2': 0.7000000000000001, 'drop3': 0.8, 'drop4': 0.7000000000000001, 'drop5': 0.5, 'drop6': 0.7000000000000001, 'drop7': 0.1, 'drop8': 0.0, 'drop9': 0.2, 'adjdrop': 0.2}. Best is trial 4 with value: 70.495.
400 test loss: 0.02099, Val acc: 56.60, Test acc: 77.60
adjdrop: 0.2, drop: [0.4, 0.30000000000000004, 0.5, 0.30000000000000004, 0.2, 0.4, 0.1, 0.6000000000000001, 0.0, 0.0]
[I 2024-02-21 03:40:10,434] Trial 8 finished with value: 75.625 and parameters: {'drop0': 0.4, 'drop1': 0.30000000000000004, 'drop2': 0.5, 'drop3': 0.30000000000000004, 'drop4': 0.2, 'drop5': 0.4, 'drop6': 0.1, 'drop7': 0.6000000000000001, 'drop8': 0.0, 'drop9': 0.0, 'adjdrop': 0.2}. Best is trial 4 with value: 70.495.
400 test loss: 0.00192, Val acc: 56.60, Test acc: 75.50
adjdrop: 0.6, drop: [0.6000000000000001, 0.6000000000000001, 0.7000000000000001, 0.5, 0.1, 0.0, 0.1, 0.2, 0.7000000000000001, 0.1]
[I 2024-02-21 03:40:55,334] Trial 9 finished with value: 75.785 and parameters: {'drop0': 0.6000000000000001, 'drop1': 0.6000000000000001, 'drop2': 0.7000000000000001, 'drop3': 0.5, 'drop4': 0.1, 'drop5': 0.0, 'drop6': 0.1, 'drop7': 0.2, 'drop8': 0.7000000000000001, 'drop9': 0.1, 'adjdrop': 0.6000000000000001}. Best is trial 4 with value: 70.495.
400 test loss: 0.00908, Val acc: 56.60, Test acc: 77.70
adjdrop: 0.5, drop: [0.8, 0.8, 0.30000000000000004, 0.0, 0.6000000000000001, 0.2, 0.30000000000000004, 0.4, 0.5, 0.8]
[I 2024-02-21 03:41:37,513] Trial 10 finished with value: 73.36500000000001 and parameters: {'drop0': 0.8, 'drop1': 0.8, 'drop2': 0.30000000000000004, 'drop3': 0.0, 'drop4': 0.6000000000000001, 'drop5': 0.2, 'drop6': 0.30000000000000004, 'drop7': 0.4, 'drop8': 0.5, 'drop9': 0.8, 'adjdrop': 0.5}. Best is trial 4 with value: 70.495.
400 test loss: 0.85707, Val acc: 56.60, Test acc: 73.00
adjdrop: 0.5, drop: [0.8, 0.8, 0.30000000000000004, 0.0, 0.6000000000000001, 0.2, 0.30000000000000004, 0.4, 0.5, 0.8]
[I 2024-02-21 03:42:20,993] Trial 11 finished with value: 70.485 and parameters: {'drop0': 0.8, 'drop1': 0.8, 'drop2': 0.30000000000000004, 'drop3': 0.0, 'drop4': 0.6000000000000001, 'drop5': 0.2, 'drop6': 0.30000000000000004, 'drop7': 0.4, 'drop8': 0.5, 'drop9': 0.8, 'adjdrop': 0.5}. Best is trial 11 with value: 70.485.
400 test loss: 0.85154, Val acc: 56.60, Test acc: 71.30
adjdrop: 0.4, drop: [0.8, 0.6000000000000001, 0.30000000000000004, 0.0, 0.5, 0.2, 0.4, 0.30000000000000004, 0.5, 0.5]
[I 2024-02-21 03:43:04,615] Trial 12 finished with value: 77.81 and parameters: {'drop0': 0.8, 'drop1': 0.6000000000000001, 'drop2': 0.30000000000000004, 'drop3': 0.0, 'drop4': 0.5, 'drop5': 0.2, 'drop6': 0.4, 'drop7': 0.30000000000000004, 'drop8': 0.5, 'drop9': 0.5, 'adjdrop': 0.4}. Best is trial 11 with value: 70.485.
400 test loss: 0.14227, Val acc: 56.60, Test acc: 77.30
adjdrop: 0.4, drop: [0.30000000000000004, 0.8, 0.30000000000000004, 0.1, 0.8, 0.2, 0.4, 0.6000000000000001, 0.4, 0.8]
[I 2024-02-21 03:43:47,540] Trial 13 finished with value: 73.565 and parameters: {'drop0': 0.30000000000000004, 'drop1': 0.8, 'drop2': 0.30000000000000004, 'drop3': 0.1, 'drop4': 0.8, 'drop5': 0.2, 'drop6': 0.4, 'drop7': 0.6000000000000001, 'drop8': 0.4, 'drop9': 0.8, 'adjdrop': 0.4}. Best is trial 11 with value: 70.485.
400 test loss: 0.66470, Val acc: 56.60, Test acc: 73.50
adjdrop: 0.5, drop: [0.7000000000000001, 0.6000000000000001, 0.4, 0.5, 0.5, 0.30000000000000004, 0.30000000000000004, 0.6000000000000001, 0.6000000000000001, 0.6000000000000001]
[I 2024-02-21 03:44:32,227] Trial 14 finished with value: 77.62499999999999 and parameters: {'drop0': 0.7000000000000001, 'drop1': 0.6000000000000001, 'drop2': 0.4, 'drop3': 0.5, 'drop4': 0.5, 'drop5': 0.30000000000000004, 'drop6': 0.30000000000000004, 'drop7': 0.6000000000000001, 'drop8': 0.6000000000000001, 'drop9': 0.6000000000000001, 'adjdrop': 0.5}. Best is trial 11 with value: 70.485.
400 test loss: 0.19553, Val acc: 56.60, Test acc: 77.20
adjdrop: 0.8, drop: [0.4, 0.7000000000000001, 0.2, 0.2, 0.4, 0.30000000000000004, 0.5, 0.30000000000000004, 0.30000000000000004, 0.4]
[I 2024-02-21 03:45:14,955] Trial 15 finished with value: 76.49000000000001 and parameters: {'drop0': 0.4, 'drop1': 0.7000000000000001, 'drop2': 0.2, 'drop3': 0.2, 'drop4': 0.4, 'drop5': 0.30000000000000004, 'drop6': 0.5, 'drop7': 0.30000000000000004, 'drop8': 0.30000000000000004, 'drop9': 0.4, 'adjdrop': 0.8}. Best is trial 11 with value: 70.485.
400 test loss: 0.03849, Val acc: 56.60, Test acc: 76.10
adjdrop: 0.3, drop: [0.7000000000000001, 0.2, 0.5, 0.5, 0.6000000000000001, 0.5, 0.0, 0.5, 0.5, 0.7000000000000001]
[I 2024-02-21 03:45:58,882] Trial 16 finished with value: 77.47 and parameters: {'drop0': 0.7000000000000001, 'drop1': 0.2, 'drop2': 0.5, 'drop3': 0.5, 'drop4': 0.6000000000000001, 'drop5': 0.5, 'drop6': 0.0, 'drop7': 0.5, 'drop8': 0.5, 'drop9': 0.7000000000000001, 'adjdrop': 0.30000000000000004}. Best is trial 11 with value: 70.485.
400 test loss: 0.34408, Val acc: 56.60, Test acc: 77.60
adjdrop: 0.2, drop: [0.5, 0.7000000000000001, 0.4, 0.1, 0.8, 0.1, 0.30000000000000004, 0.30000000000000004, 0.2, 0.8]
[I 2024-02-21 03:46:40,226] Trial 17 finished with value: 72.05 and parameters: {'drop0': 0.5, 'drop1': 0.7000000000000001, 'drop2': 0.4, 'drop3': 0.1, 'drop4': 0.8, 'drop5': 0.1, 'drop6': 0.30000000000000004, 'drop7': 0.30000000000000004, 'drop8': 0.2, 'drop9': 0.8, 'adjdrop': 0.2}. Best is trial 11 with value: 70.485.
400 test loss: 0.68502, Val acc: 56.60, Test acc: 72.30
adjdrop: 0.5, drop: [0.30000000000000004, 0.5, 0.6000000000000001, 0.4, 0.30000000000000004, 0.8, 0.5, 0.8, 0.8, 0.6000000000000001]
[I 2024-02-21 03:47:23,578] Trial 18 finished with value: 69.10499999999999 and parameters: {'drop0': 0.30000000000000004, 'drop1': 0.5, 'drop2': 0.6000000000000001, 'drop3': 0.4, 'drop4': 0.30000000000000004, 'drop5': 0.8, 'drop6': 0.5, 'drop7': 0.8, 'drop8': 0.8, 'drop9': 0.6000000000000001, 'adjdrop': 0.5}. Best is trial 18 with value: 69.10499999999999.
400 test loss: 0.16351, Val acc: 56.60, Test acc: 69.10
adjdrop: 0.5, drop: [0.2, 0.5, 0.2, 0.6000000000000001, 0.6000000000000001, 0.8, 0.8, 0.8, 0.8, 0.5]
[I 2024-02-21 03:48:05,607] Trial 19 finished with value: 71.66 and parameters: {'drop0': 0.2, 'drop1': 0.5, 'drop2': 0.2, 'drop3': 0.6000000000000001, 'drop4': 0.6000000000000001, 'drop5': 0.8, 'drop6': 0.8, 'drop7': 0.8, 'drop8': 0.8, 'drop9': 0.5, 'adjdrop': 0.5}. Best is trial 18 with value: 69.10499999999999.
400 test loss: 0.08178, Val acc: 56.60, Test acc: 71.20
adjdrop: 0.6, drop: [0.30000000000000004, 0.30000000000000004, 0.6000000000000001, 0.6000000000000001, 0.5, 0.8, 0.6000000000000001, 0.7000000000000001, 0.7000000000000001, 0.6000000000000001]
[I 2024-02-21 03:48:48,855] Trial 20 finished with value: 72.53999999999999 and parameters: {'drop0': 0.30000000000000004, 'drop1': 0.30000000000000004, 'drop2': 0.6000000000000001, 'drop3': 0.6000000000000001, 'drop4': 0.5, 'drop5': 0.8, 'drop6': 0.6000000000000001, 'drop7': 0.7000000000000001, 'drop8': 0.7000000000000001, 'drop9': 0.6000000000000001, 'adjdrop': 0.6000000000000001}. Best is trial 18 with value: 69.10499999999999.
400 test loss: 0.19398, Val acc: 56.60, Test acc: 74.10
adjdrop: 0.3, drop: [0.30000000000000004, 0.5, 0.6000000000000001, 0.4, 0.30000000000000004, 0.5, 0.5, 0.5, 0.6000000000000001, 0.7000000000000001]
[I 2024-02-21 03:49:32,241] Trial 21 finished with value: 71.29000000000002 and parameters: {'drop0': 0.30000000000000004, 'drop1': 0.5, 'drop2': 0.6000000000000001, 'drop3': 0.4, 'drop4': 0.30000000000000004, 'drop5': 0.5, 'drop6': 0.5, 'drop7': 0.5, 'drop8': 0.6000000000000001, 'drop9': 0.7000000000000001, 'adjdrop': 0.30000000000000004}. Best is trial 18 with value: 69.10499999999999.
400 test loss: 0.22999, Val acc: 56.60, Test acc: 71.20
adjdrop: 0.4, drop: [0.5, 0.7000000000000001, 0.8, 0.30000000000000004, 0.30000000000000004, 0.30000000000000004, 0.4, 0.5, 0.30000000000000004, 0.7000000000000001]
[I 2024-02-21 03:50:16,414] Trial 22 finished with value: 75.23500000000001 and parameters: {'drop0': 0.5, 'drop1': 0.7000000000000001, 'drop2': 0.8, 'drop3': 0.30000000000000004, 'drop4': 0.30000000000000004, 'drop5': 0.30000000000000004, 'drop6': 0.4, 'drop7': 0.5, 'drop8': 0.30000000000000004, 'drop9': 0.7000000000000001, 'adjdrop': 0.4}. Best is trial 18 with value: 69.10499999999999.
400 test loss: 0.25706, Val acc: 56.60, Test acc: 75.60
adjdrop: 0.6, drop: [0.2, 0.8, 0.7000000000000001, 0.4, 0.4, 0.6000000000000001, 0.4, 0.2, 0.7000000000000001, 0.8]
[I 2024-02-21 03:51:00,110] Trial 23 finished with value: 72.53999999999999 and parameters: {'drop0': 0.2, 'drop1': 0.8, 'drop2': 0.7000000000000001, 'drop3': 0.4, 'drop4': 0.4, 'drop5': 0.6000000000000001, 'drop6': 0.4, 'drop7': 0.2, 'drop8': 0.7000000000000001, 'drop9': 0.8, 'adjdrop': 0.6000000000000001}. Best is trial 18 with value: 69.10499999999999.
400 test loss: 0.67190, Val acc: 56.60, Test acc: 72.50
adjdrop: 0.5, drop: [0.7000000000000001, 0.6000000000000001, 0.5, 0.2, 0.2, 0.4, 0.7000000000000001, 0.2, 0.1, 0.5]
[I 2024-02-21 03:51:44,885] Trial 24 finished with value: 75.43500000000003 and parameters: {'drop0': 0.7000000000000001, 'drop1': 0.6000000000000001, 'drop2': 0.5, 'drop3': 0.2, 'drop4': 0.2, 'drop5': 0.4, 'drop6': 0.7000000000000001, 'drop7': 0.2, 'drop8': 0.1, 'drop9': 0.5, 'adjdrop': 0.5}. Best is trial 18 with value: 69.10499999999999.
400 test loss: 0.08696, Val acc: 56.60, Test acc: 76.70
adjdrop: 0.1, drop: [0.4, 0.7000000000000001, 0.4, 0.1, 0.7000000000000001, 0.1, 0.2, 0.4, 0.4, 0.6000000000000001]
[I 2024-02-21 03:52:29,141] Trial 25 finished with value: 74.32499999999999 and parameters: {'drop0': 0.4, 'drop1': 0.7000000000000001, 'drop2': 0.4, 'drop3': 0.1, 'drop4': 0.7000000000000001, 'drop5': 0.1, 'drop6': 0.2, 'drop7': 0.4, 'drop8': 0.4, 'drop9': 0.6000000000000001, 'adjdrop': 0.1}. Best is trial 18 with value: 69.10499999999999.
400 test loss: 0.12698, Val acc: 56.60, Test acc: 73.80
adjdrop: 0.3, drop: [0.8, 0.5, 0.6000000000000001, 0.4, 0.4, 0.6000000000000001, 0.5, 0.7000000000000001, 0.8, 0.7000000000000001]
[I 2024-02-21 03:53:12,062] Trial 26 finished with value: 74.39500000000001 and parameters: {'drop0': 0.8, 'drop1': 0.5, 'drop2': 0.6000000000000001, 'drop3': 0.4, 'drop4': 0.4, 'drop5': 0.6000000000000001, 'drop6': 0.5, 'drop7': 0.7000000000000001, 'drop8': 0.8, 'drop9': 0.7000000000000001, 'adjdrop': 0.30000000000000004}. Best is trial 18 with value: 69.10499999999999.
400 test loss: 0.48133, Val acc: 56.60, Test acc: 73.60
adjdrop: 0.4, drop: [0.1, 0.8, 0.7000000000000001, 0.0, 0.30000000000000004, 0.1, 0.30000000000000004, 0.4, 0.5, 0.8]
[I 2024-02-21 03:53:54,600] Trial 27 finished with value: 74.66499999999999 and parameters: {'drop0': 0.1, 'drop1': 0.8, 'drop2': 0.7000000000000001, 'drop3': 0.0, 'drop4': 0.30000000000000004, 'drop5': 0.1, 'drop6': 0.30000000000000004, 'drop7': 0.4, 'drop8': 0.5, 'drop9': 0.8, 'adjdrop': 0.4}. Best is trial 18 with value: 69.10499999999999.
400 test loss: 0.51588, Val acc: 56.60, Test acc: 75.40
adjdrop: 0.7, drop: [0.5, 0.6000000000000001, 0.2, 0.30000000000000004, 0.1, 0.30000000000000004, 0.2, 0.30000000000000004, 0.4, 0.4]
[I 2024-02-21 03:54:37,852] Trial 28 finished with value: 71.875 and parameters: {'drop0': 0.5, 'drop1': 0.6000000000000001, 'drop2': 0.2, 'drop3': 0.30000000000000004, 'drop4': 0.1, 'drop5': 0.30000000000000004, 'drop6': 0.2, 'drop7': 0.30000000000000004, 'drop8': 0.4, 'drop9': 0.4, 'adjdrop': 0.7000000000000001}. Best is trial 18 with value: 69.10499999999999.
400 test loss: 0.05037, Val acc: 56.60, Test acc: 70.70
adjdrop: 0.1, drop: [0.6000000000000001, 0.30000000000000004, 0.5, 0.2, 0.5, 0.7000000000000001, 0.7000000000000001, 0.5, 0.8, 0.7000000000000001]
[I 2024-02-21 03:55:20,942] Trial 29 finished with value: 73.36999999999999 and parameters: {'drop0': 0.6000000000000001, 'drop1': 0.30000000000000004, 'drop2': 0.5, 'drop3': 0.2, 'drop4': 0.5, 'drop5': 0.7000000000000001, 'drop6': 0.7000000000000001, 'drop7': 0.5, 'drop8': 0.8, 'drop9': 0.7000000000000001, 'adjdrop': 0.1}. Best is trial 18 with value: 69.10499999999999.
400 test loss: 0.32151, Val acc: 56.60, Test acc: 75.10
adjdrop: 0.6, drop: [0.4, 0.4, 0.30000000000000004, 0.6000000000000001, 0.7000000000000001, 0.4, 0.6000000000000001, 0.7000000000000001, 0.6000000000000001, 0.5]
[I 2024-02-21 03:56:03,584] Trial 30 finished with value: 74.15999999999998 and parameters: {'drop0': 0.4, 'drop1': 0.4, 'drop2': 0.30000000000000004, 'drop3': 0.6000000000000001, 'drop4': 0.7000000000000001, 'drop5': 0.4, 'drop6': 0.6000000000000001, 'drop7': 0.7000000000000001, 'drop8': 0.6000000000000001, 'drop9': 0.5, 'adjdrop': 0.6000000000000001}. Best is trial 18 with value: 69.10499999999999.
400 test loss: 0.07741, Val acc: 56.60, Test acc: 74.00
adjdrop: 0.3, drop: [0.30000000000000004, 0.5, 0.6000000000000001, 0.4, 0.30000000000000004, 0.5, 0.5, 0.5, 0.6000000000000001, 0.7000000000000001]
[I 2024-02-21 03:56:47,967] Trial 31 finished with value: 73.00500000000001 and parameters: {'drop0': 0.30000000000000004, 'drop1': 0.5, 'drop2': 0.6000000000000001, 'drop3': 0.4, 'drop4': 0.30000000000000004, 'drop5': 0.5, 'drop6': 0.5, 'drop7': 0.5, 'drop8': 0.6000000000000001, 'drop9': 0.7000000000000001, 'adjdrop': 0.30000000000000004}. Best is trial 18 with value: 69.10499999999999.
400 test loss: 0.24513, Val acc: 56.60, Test acc: 73.50
adjdrop: 0.2, drop: [0.30000000000000004, 0.5, 0.6000000000000001, 0.4, 0.2, 0.5, 0.5, 0.4, 0.7000000000000001, 0.8]
[I 2024-02-21 03:57:31,505] Trial 32 finished with value: 64.98 and parameters: {'drop0': 0.30000000000000004, 'drop1': 0.5, 'drop2': 0.6000000000000001, 'drop3': 0.4, 'drop4': 0.2, 'drop5': 0.5, 'drop6': 0.5, 'drop7': 0.4, 'drop8': 0.7000000000000001, 'drop9': 0.8, 'adjdrop': 0.2}. Best is trial 32 with value: 64.98.
400 test loss: 0.51334, Val acc: 56.60, Test acc: 65.50
adjdrop: 0.2, drop: [0.1, 0.4, 0.5, 0.5, 0.0, 0.8, 0.4, 0.4, 0.7000000000000001, 0.8]
[I 2024-02-21 03:58:14,457] Trial 33 finished with value: 69.545 and parameters: {'drop0': 0.1, 'drop1': 0.4, 'drop2': 0.5, 'drop3': 0.5, 'drop4': 0.0, 'drop5': 0.8, 'drop6': 0.4, 'drop7': 0.4, 'drop8': 0.7000000000000001, 'drop9': 0.8, 'adjdrop': 0.2}. Best is trial 32 with value: 64.98.
400 test loss: 0.59165, Val acc: 56.60, Test acc: 68.80
adjdrop: 0.0, drop: [0.1, 0.2, 0.5, 0.7000000000000001, 0.0, 0.8, 0.6000000000000001, 0.4, 0.7000000000000001, 0.6000000000000001]
[I 2024-02-21 03:58:57,738] Trial 34 finished with value: 73.14500000000001 and parameters: {'drop0': 0.1, 'drop1': 0.2, 'drop2': 0.5, 'drop3': 0.7000000000000001, 'drop4': 0.0, 'drop5': 0.8, 'drop6': 0.6000000000000001, 'drop7': 0.4, 'drop8': 0.7000000000000001, 'drop9': 0.6000000000000001, 'adjdrop': 0.0}. Best is trial 32 with value: 64.98.
400 test loss: 0.12049, Val acc: 56.60, Test acc: 73.60
adjdrop: 0.5, drop: [0.0, 0.4, 0.4, 0.5, 0.0, 0.7000000000000001, 0.4, 0.4, 0.8, 0.8]
[I 2024-02-21 03:59:29,136] Trial 35 finished with value: 73.925 and parameters: {'drop0': 0.0, 'drop1': 0.4, 'drop2': 0.4, 'drop3': 0.5, 'drop4': 0.0, 'drop5': 0.7000000000000001, 'drop6': 0.4, 'drop7': 0.4, 'drop8': 0.8, 'drop9': 0.8, 'adjdrop': 0.5}. Best is trial 32 with value: 64.98.
400 test loss: 0.54662, Val acc: 56.60, Test acc: 73.50
adjdrop: 0.2, drop: [0.2, 0.30000000000000004, 0.4, 0.7000000000000001, 0.2, 0.7000000000000001, 0.4, 0.6000000000000001, 0.7000000000000001, 0.7000000000000001]
[I 2024-02-21 04:00:11,982] Trial 36 finished with value: 71.67500000000001 and parameters: {'drop0': 0.2, 'drop1': 0.30000000000000004, 'drop2': 0.4, 'drop3': 0.7000000000000001, 'drop4': 0.2, 'drop5': 0.7000000000000001, 'drop6': 0.4, 'drop7': 0.6000000000000001, 'drop8': 0.7000000000000001, 'drop9': 0.7000000000000001, 'adjdrop': 0.2}. Best is trial 32 with value: 64.98.
400 test loss: 0.27778, Val acc: 56.60, Test acc: 71.90
adjdrop: 0.1, drop: [0.0, 0.2, 0.6000000000000001, 0.5, 0.1, 0.8, 0.30000000000000004, 0.5, 0.8, 0.8]
[I 2024-02-21 04:00:44,765] Trial 37 finished with value: 69.70500000000001 and parameters: {'drop0': 0.0, 'drop1': 0.2, 'drop2': 0.6000000000000001, 'drop3': 0.5, 'drop4': 0.1, 'drop5': 0.8, 'drop6': 0.30000000000000004, 'drop7': 0.5, 'drop8': 0.8, 'drop9': 0.8, 'adjdrop': 0.1}. Best is trial 32 with value: 64.98.
400 test loss: 0.59115, Val acc: 56.60, Test acc: 69.90
adjdrop: 0.1, drop: [0.0, 0.0, 0.7000000000000001, 0.5, 0.1, 0.8, 0.5, 0.7000000000000001, 0.8, 0.6000000000000001]
[I 2024-02-21 04:01:17,153] Trial 38 finished with value: 70.755 and parameters: {'drop0': 0.0, 'drop1': 0.0, 'drop2': 0.7000000000000001, 'drop3': 0.5, 'drop4': 0.1, 'drop5': 0.8, 'drop6': 0.5, 'drop7': 0.7000000000000001, 'drop8': 0.8, 'drop9': 0.6000000000000001, 'adjdrop': 0.1}. Best is trial 32 with value: 64.98.
400 test loss: 0.09482, Val acc: 56.60, Test acc: 71.30
adjdrop: 0.0, drop: [0.1, 0.1, 0.6000000000000001, 0.6000000000000001, 0.0, 0.7000000000000001, 0.2, 0.8, 0.7000000000000001, 0.8]
[I 2024-02-21 04:02:03,457] Trial 39 finished with value: 67.69999999999999 and parameters: {'drop0': 0.1, 'drop1': 0.1, 'drop2': 0.6000000000000001, 'drop3': 0.6000000000000001, 'drop4': 0.0, 'drop5': 0.7000000000000001, 'drop6': 0.2, 'drop7': 0.8, 'drop8': 0.7000000000000001, 'drop9': 0.8, 'adjdrop': 0.0}. Best is trial 32 with value: 64.98.
400 test loss: 0.59128, Val acc: 56.60, Test acc: 67.30
adjdrop: 0.0, drop: [0.1, 0.0, 0.8, 0.6000000000000001, 0.0, 0.7000000000000001, 0.0, 0.8, 0.7000000000000001, 0.30000000000000004]
[I 2024-02-21 04:02:47,933] Trial 40 finished with value: 72.11 and parameters: {'drop0': 0.1, 'drop1': 0.0, 'drop2': 0.8, 'drop3': 0.6000000000000001, 'drop4': 0.0, 'drop5': 0.7000000000000001, 'drop6': 0.0, 'drop7': 0.8, 'drop8': 0.7000000000000001, 'drop9': 0.30000000000000004, 'adjdrop': 0.0}. Best is trial 32 with value: 64.98.
400 test loss: 0.01658, Val acc: 56.60, Test acc: 74.70
adjdrop: 0.0, drop: [0.0, 0.1, 0.6000000000000001, 0.5, 0.1, 0.8, 0.30000000000000004, 0.8, 0.8, 0.8]
[I 2024-02-21 04:03:21,864] Trial 41 finished with value: 64.51000000000002 and parameters: {'drop0': 0.0, 'drop1': 0.1, 'drop2': 0.6000000000000001, 'drop3': 0.5, 'drop4': 0.1, 'drop5': 0.8, 'drop6': 0.30000000000000004, 'drop7': 0.8, 'drop8': 0.8, 'drop9': 0.8, 'adjdrop': 0.0}. Best is trial 41 with value: 64.51000000000002.
400 test loss: 0.58428, Val acc: 56.60, Test acc: 65.40
adjdrop: 0.0, drop: [0.1, 0.1, 0.6000000000000001, 0.6000000000000001, 0.0, 0.7000000000000001, 0.2, 0.8, 0.7000000000000001, 0.7000000000000001]
[I 2024-02-21 04:04:07,164] Trial 42 finished with value: 72.015 and parameters: {'drop0': 0.1, 'drop1': 0.1, 'drop2': 0.6000000000000001, 'drop3': 0.6000000000000001, 'drop4': 0.0, 'drop5': 0.7000000000000001, 'drop6': 0.2, 'drop7': 0.8, 'drop8': 0.7000000000000001, 'drop9': 0.7000000000000001, 'adjdrop': 0.0}. Best is trial 41 with value: 64.51000000000002.
400 test loss: 0.25138, Val acc: 56.60, Test acc: 71.60
adjdrop: 0.0, drop: [0.2, 0.1, 0.7000000000000001, 0.4, 0.2, 0.6000000000000001, 0.1, 0.8, 0.8, 0.8]
[I 2024-02-21 04:04:51,323] Trial 43 finished with value: 71.01000000000002 and parameters: {'drop0': 0.2, 'drop1': 0.1, 'drop2': 0.7000000000000001, 'drop3': 0.4, 'drop4': 0.2, 'drop5': 0.6000000000000001, 'drop6': 0.1, 'drop7': 0.8, 'drop8': 0.8, 'drop9': 0.8, 'adjdrop': 0.0}. Best is trial 41 with value: 64.51000000000002.
400 test loss: 0.58201, Val acc: 56.60, Test acc: 71.00
adjdrop: 0.2, drop: [0.0, 0.1, 0.5, 0.4, 0.1, 0.8, 0.6000000000000001, 0.8, 0.6000000000000001, 0.7000000000000001]
[I 2024-02-21 04:05:25,088] Trial 44 finished with value: 72.405 and parameters: {'drop0': 0.0, 'drop1': 0.1, 'drop2': 0.5, 'drop3': 0.4, 'drop4': 0.1, 'drop5': 0.8, 'drop6': 0.6000000000000001, 'drop7': 0.8, 'drop8': 0.6000000000000001, 'drop9': 0.7000000000000001, 'adjdrop': 0.2}. Best is trial 41 with value: 64.51000000000002.
400 test loss: 0.22291, Val acc: 56.60, Test acc: 72.60
adjdrop: 0.0, drop: [0.1, 0.5, 0.6000000000000001, 0.7000000000000001, 0.0, 0.7000000000000001, 0.2, 0.7000000000000001, 0.7000000000000001, 0.8]
[I 2024-02-21 04:06:09,855] Trial 45 finished with value: 70.71499999999999 and parameters: {'drop0': 0.1, 'drop1': 0.5, 'drop2': 0.6000000000000001, 'drop3': 0.7000000000000001, 'drop4': 0.0, 'drop5': 0.7000000000000001, 'drop6': 0.2, 'drop7': 0.7000000000000001, 'drop8': 0.7000000000000001, 'drop9': 0.8, 'adjdrop': 0.0}. Best is trial 41 with value: 64.51000000000002.
400 test loss: 0.62409, Val acc: 56.60, Test acc: 68.60
adjdrop: 0.1, drop: [0.2, 0.4, 0.5, 0.30000000000000004, 0.2, 0.8, 0.4, 0.8, 0.8, 0.8]
[I 2024-02-21 04:06:56,273] Trial 46 finished with value: 64.41499999999999 and parameters: {'drop0': 0.2, 'drop1': 0.4, 'drop2': 0.5, 'drop3': 0.30000000000000004, 'drop4': 0.2, 'drop5': 0.8, 'drop6': 0.4, 'drop7': 0.8, 'drop8': 0.8, 'drop9': 0.8, 'adjdrop': 0.1}. Best is trial 46 with value: 64.41499999999999.
400 test loss: 0.80377, Val acc: 56.60, Test acc: 64.30
adjdrop: 0.1, drop: [0.30000000000000004, 0.1, 0.7000000000000001, 0.30000000000000004, 0.2, 0.6000000000000001, 0.30000000000000004, 0.7000000000000001, 0.8, 0.6000000000000001]
[I 2024-02-21 04:07:42,266] Trial 47 finished with value: 76.13000000000001 and parameters: {'drop0': 0.30000000000000004, 'drop1': 0.1, 'drop2': 0.7000000000000001, 'drop3': 0.30000000000000004, 'drop4': 0.2, 'drop5': 0.6000000000000001, 'drop6': 0.30000000000000004, 'drop7': 0.7000000000000001, 'drop8': 0.8, 'drop9': 0.6000000000000001, 'adjdrop': 0.1}. Best is trial 46 with value: 64.41499999999999.
400 test loss: 0.10089, Val acc: 56.60, Test acc: 76.20
adjdrop: 0.1, drop: [0.2, 0.2, 0.6000000000000001, 0.30000000000000004, 0.2, 0.8, 0.5, 0.8, 0.8, 0.7000000000000001]
[I 2024-02-21 04:08:25,982] Trial 48 finished with value: 69.86500000000001 and parameters: {'drop0': 0.2, 'drop1': 0.2, 'drop2': 0.6000000000000001, 'drop3': 0.30000000000000004, 'drop4': 0.2, 'drop5': 0.8, 'drop6': 0.5, 'drop7': 0.8, 'drop8': 0.8, 'drop9': 0.7000000000000001, 'adjdrop': 0.1}. Best is trial 46 with value: 64.41499999999999.
400 test loss: 0.31363, Val acc: 56.60, Test acc: 69.90
adjdrop: 0.0, drop: [0.2, 0.0, 0.7000000000000001, 0.4, 0.1, 0.6000000000000001, 0.1, 0.8, 0.8, 0.8]
[I 2024-02-21 04:09:07,705] Trial 49 finished with value: 73.59 and parameters: {'drop0': 0.2, 'drop1': 0.0, 'drop2': 0.7000000000000001, 'drop3': 0.4, 'drop4': 0.1, 'drop5': 0.6000000000000001, 'drop6': 0.1, 'drop7': 0.8, 'drop8': 0.8, 'drop9': 0.8, 'adjdrop': 0.0}. Best is trial 46 with value: 64.41499999999999.
400 test loss: 0.60037, Val acc: 56.60, Test acc: 74.50
adjdrop: 0.1, drop: [0.30000000000000004, 0.5, 0.8, 0.2, 0.1, 0.7000000000000001, 0.2, 0.6000000000000001, 0.7000000000000001, 0.8]
[I 2024-02-21 04:09:53,072] Trial 50 finished with value: 74.67499999999998 and parameters: {'drop0': 0.30000000000000004, 'drop1': 0.5, 'drop2': 0.8, 'drop3': 0.2, 'drop4': 0.1, 'drop5': 0.7000000000000001, 'drop6': 0.2, 'drop7': 0.6000000000000001, 'drop8': 0.7000000000000001, 'drop9': 0.8, 'adjdrop': 0.1}. Best is trial 46 with value: 64.41499999999999.
400 test loss: 0.48808, Val acc: 56.60, Test acc: 74.60
adjdrop: 0.2, drop: [0.1, 0.4, 0.5, 0.5, 0.0, 0.8, 0.4, 0.8, 0.7000000000000001, 0.8]
[I 2024-02-21 04:10:38,405] Trial 51 finished with value: 71.22 and parameters: {'drop0': 0.1, 'drop1': 0.4, 'drop2': 0.5, 'drop3': 0.5, 'drop4': 0.0, 'drop5': 0.8, 'drop6': 0.4, 'drop7': 0.8, 'drop8': 0.7000000000000001, 'drop9': 0.8, 'adjdrop': 0.2}. Best is trial 46 with value: 64.41499999999999.
400 test loss: 0.69472, Val acc: 56.60, Test acc: 71.40
adjdrop: 0.2, drop: [0.0, 0.4, 0.5, 0.5, 0.30000000000000004, 0.8, 0.30000000000000004, 0.7000000000000001, 0.6000000000000001, 0.8]
[I 2024-02-21 04:11:10,311] Trial 52 finished with value: 70.475 and parameters: {'drop0': 0.0, 'drop1': 0.4, 'drop2': 0.5, 'drop3': 0.5, 'drop4': 0.30000000000000004, 'drop5': 0.8, 'drop6': 0.30000000000000004, 'drop7': 0.7000000000000001, 'drop8': 0.6000000000000001, 'drop9': 0.8, 'adjdrop': 0.2}. Best is trial 46 with value: 64.41499999999999.
400 test loss: 0.49485, Val acc: 56.60, Test acc: 70.20
adjdrop: 0.0, drop: [0.1, 0.30000000000000004, 0.6000000000000001, 0.4, 0.1, 0.8, 0.4, 0.8, 0.7000000000000001, 0.7000000000000001]
[I 2024-02-21 04:11:52,432] Trial 53 finished with value: 70.94000000000001 and parameters: {'drop0': 0.1, 'drop1': 0.30000000000000004, 'drop2': 0.6000000000000001, 'drop3': 0.4, 'drop4': 0.1, 'drop5': 0.8, 'drop6': 0.4, 'drop7': 0.8, 'drop8': 0.7000000000000001, 'drop9': 0.7000000000000001, 'adjdrop': 0.0}. Best is trial 46 with value: 64.41499999999999.
400 test loss: 0.35681, Val acc: 56.60, Test acc: 71.70
adjdrop: 0.1, drop: [0.2, 0.6000000000000001, 0.5, 0.5, 0.2, 0.8, 0.5, 0.7000000000000001, 0.8, 0.8]
[I 2024-02-21 04:12:37,416] Trial 54 finished with value: 68.89999999999999 and parameters: {'drop0': 0.2, 'drop1': 0.6000000000000001, 'drop2': 0.5, 'drop3': 0.5, 'drop4': 0.2, 'drop5': 0.8, 'drop6': 0.5, 'drop7': 0.7000000000000001, 'drop8': 0.8, 'drop9': 0.8, 'adjdrop': 0.1}. Best is trial 46 with value: 64.41499999999999.
400 test loss: 0.50877, Val acc: 56.60, Test acc: 68.00
adjdrop: 0.1, drop: [0.2, 0.6000000000000001, 0.6000000000000001, 0.6000000000000001, 0.2, 0.7000000000000001, 0.5, 0.7000000000000001, 0.8, 0.0]
[I 2024-02-21 04:13:19,497] Trial 55 finished with value: 75.43499999999997 and parameters: {'drop0': 0.2, 'drop1': 0.6000000000000001, 'drop2': 0.6000000000000001, 'drop3': 0.6000000000000001, 'drop4': 0.2, 'drop5': 0.7000000000000001, 'drop6': 0.5, 'drop7': 0.7000000000000001, 'drop8': 0.8, 'drop9': 0.0, 'adjdrop': 0.1}. Best is trial 46 with value: 64.41499999999999.
400 test loss: 0.00315, Val acc: 56.60, Test acc: 75.60
adjdrop: 0.0, drop: [0.30000000000000004, 0.5, 0.5, 0.30000000000000004, 0.2, 0.7000000000000001, 0.5, 0.8, 0.8, 0.30000000000000004]
[I 2024-02-21 04:14:02,855] Trial 56 finished with value: 73.52500000000002 and parameters: {'drop0': 0.30000000000000004, 'drop1': 0.5, 'drop2': 0.5, 'drop3': 0.30000000000000004, 'drop4': 0.2, 'drop5': 0.7000000000000001, 'drop6': 0.5, 'drop7': 0.8, 'drop8': 0.8, 'drop9': 0.30000000000000004, 'adjdrop': 0.0}. Best is trial 46 with value: 64.41499999999999.
400 test loss: 0.04561, Val acc: 56.60, Test acc: 74.20
adjdrop: 0.1, drop: [0.2, 0.6000000000000001, 0.4, 0.30000000000000004, 0.4, 0.8, 0.6000000000000001, 0.7000000000000001, 0.8, 0.8]
[I 2024-02-21 04:14:45,621] Trial 57 finished with value: 69.39999999999999 and parameters: {'drop0': 0.2, 'drop1': 0.6000000000000001, 'drop2': 0.4, 'drop3': 0.30000000000000004, 'drop4': 0.4, 'drop5': 0.8, 'drop6': 0.6000000000000001, 'drop7': 0.7000000000000001, 'drop8': 0.8, 'drop9': 0.8, 'adjdrop': 0.1}. Best is trial 46 with value: 64.41499999999999.
400 test loss: 0.59066, Val acc: 56.60, Test acc: 70.20
adjdrop: 0.0, drop: [0.30000000000000004, 0.6000000000000001, 0.7000000000000001, 0.5, 0.30000000000000004, 0.8, 0.5, 0.6000000000000001, 0.8, 0.7000000000000001]
[I 2024-02-21 04:15:28,509] Trial 58 finished with value: 69.6 and parameters: {'drop0': 0.30000000000000004, 'drop1': 0.6000000000000001, 'drop2': 0.7000000000000001, 'drop3': 0.5, 'drop4': 0.30000000000000004, 'drop5': 0.8, 'drop6': 0.5, 'drop7': 0.6000000000000001, 'drop8': 0.8, 'drop9': 0.7000000000000001, 'adjdrop': 0.0}. Best is trial 46 with value: 64.41499999999999.
400 test loss: 0.28443, Val acc: 56.60, Test acc: 68.90
adjdrop: 0.1, drop: [0.2, 0.5, 0.5, 0.6000000000000001, 0.2, 0.7000000000000001, 0.30000000000000004, 0.8, 0.7000000000000001, 0.8]
[I 2024-02-21 04:16:12,002] Trial 59 finished with value: 68.49499999999999 and parameters: {'drop0': 0.2, 'drop1': 0.5, 'drop2': 0.5, 'drop3': 0.6000000000000001, 'drop4': 0.2, 'drop5': 0.7000000000000001, 'drop6': 0.30000000000000004, 'drop7': 0.8, 'drop8': 0.7000000000000001, 'drop9': 0.8, 'adjdrop': 0.1}. Best is trial 46 with value: 64.41499999999999.
400 test loss: 0.53936, Val acc: 56.60, Test acc: 67.30
adjdrop: 0.1, drop: [0.2, 0.1, 0.5, 0.6000000000000001, 0.2, 0.5, 0.30000000000000004, 0.0, 0.6000000000000001, 0.8]
[I 2024-02-21 04:16:56,196] Trial 60 finished with value: 63.86 and parameters: {'drop0': 0.2, 'drop1': 0.1, 'drop2': 0.5, 'drop3': 0.6000000000000001, 'drop4': 0.2, 'drop5': 0.5, 'drop6': 0.30000000000000004, 'drop7': 0.0, 'drop8': 0.6000000000000001, 'drop9': 0.8, 'adjdrop': 0.1}. Best is trial 60 with value: 63.86.
400 test loss: 0.71517, Val acc: 56.60, Test acc: 63.30
adjdrop: 0.1, drop: [0.2, 0.1, 0.5, 0.6000000000000001, 0.2, 0.5, 0.30000000000000004, 0.0, 0.6000000000000001, 0.8]
[I 2024-02-21 04:17:40,747] Trial 61 finished with value: 72.24 and parameters: {'drop0': 0.2, 'drop1': 0.1, 'drop2': 0.5, 'drop3': 0.6000000000000001, 'drop4': 0.2, 'drop5': 0.5, 'drop6': 0.30000000000000004, 'drop7': 0.0, 'drop8': 0.6000000000000001, 'drop9': 0.8, 'adjdrop': 0.1}. Best is trial 60 with value: 63.86.
400 test loss: 0.50345, Val acc: 56.60, Test acc: 72.90
adjdrop: 0.1, drop: [0.2, 0.2, 0.4, 0.6000000000000001, 0.2, 0.6000000000000001, 0.30000000000000004, 0.1, 0.7000000000000001, 0.8]
[I 2024-02-21 04:18:23,341] Trial 62 finished with value: 70.35999999999999 and parameters: {'drop0': 0.2, 'drop1': 0.2, 'drop2': 0.4, 'drop3': 0.6000000000000001, 'drop4': 0.2, 'drop5': 0.6000000000000001, 'drop6': 0.30000000000000004, 'drop7': 0.1, 'drop8': 0.7000000000000001, 'drop9': 0.8, 'adjdrop': 0.1}. Best is trial 60 with value: 63.86.
400 test loss: 0.55727, Val acc: 56.60, Test acc: 70.40
adjdrop: 0.0, drop: [0.1, 0.0, 0.6000000000000001, 0.7000000000000001, 0.1, 0.5, 0.2, 0.1, 0.6000000000000001, 0.2]
[I 2024-02-21 04:19:05,649] Trial 63 finished with value: 76.17 and parameters: {'drop0': 0.1, 'drop1': 0.0, 'drop2': 0.6000000000000001, 'drop3': 0.7000000000000001, 'drop4': 0.1, 'drop5': 0.5, 'drop6': 0.2, 'drop7': 0.1, 'drop8': 0.6000000000000001, 'drop9': 0.2, 'adjdrop': 0.0}. Best is trial 60 with value: 63.86.
400 test loss: 0.00701, Val acc: 56.60, Test acc: 76.60
adjdrop: 0.1, drop: [0.1, 0.1, 0.0, 0.8, 0.2, 0.4, 0.4, 0.2, 0.7000000000000001, 0.8]
[I 2024-02-21 04:19:46,778] Trial 64 finished with value: 69.965 and parameters: {'drop0': 0.1, 'drop1': 0.1, 'drop2': 0.0, 'drop3': 0.8, 'drop4': 0.2, 'drop5': 0.4, 'drop6': 0.4, 'drop7': 0.2, 'drop8': 0.7000000000000001, 'drop9': 0.8, 'adjdrop': 0.1}. Best is trial 60 with value: 63.86.
400 test loss: 0.54519, Val acc: 56.60, Test acc: 70.00
adjdrop: 0.2, drop: [0.2, 0.2, 0.5, 0.7000000000000001, 0.1, 0.7000000000000001, 0.30000000000000004, 0.0, 0.5, 0.7000000000000001]
[I 2024-02-21 04:20:31,527] Trial 65 finished with value: 73.22000000000001 and parameters: {'drop0': 0.2, 'drop1': 0.2, 'drop2': 0.5, 'drop3': 0.7000000000000001, 'drop4': 0.1, 'drop5': 0.7000000000000001, 'drop6': 0.30000000000000004, 'drop7': 0.0, 'drop8': 0.5, 'drop9': 0.7000000000000001, 'adjdrop': 0.2}. Best is trial 60 with value: 63.86.
400 test loss: 0.27613, Val acc: 56.60, Test acc: 72.70
adjdrop: 0.0, drop: [0.30000000000000004, 0.6000000000000001, 0.5, 0.6000000000000001, 0.2, 0.5, 0.30000000000000004, 0.8, 0.7000000000000001, 0.8]
[I 2024-02-21 04:21:15,723] Trial 66 finished with value: 67.53499999999998 and parameters: {'drop0': 0.30000000000000004, 'drop1': 0.6000000000000001, 'drop2': 0.5, 'drop3': 0.6000000000000001, 'drop4': 0.2, 'drop5': 0.5, 'drop6': 0.30000000000000004, 'drop7': 0.8, 'drop8': 0.7000000000000001, 'drop9': 0.8, 'adjdrop': 0.0}. Best is trial 60 with value: 63.86.
400 test loss: 0.54980, Val acc: 56.60, Test acc: 67.80
adjdrop: 0.0, drop: [0.4, 0.1, 0.4, 0.6000000000000001, 0.30000000000000004, 0.5, 0.2, 0.8, 0.6000000000000001, 0.8]
[I 2024-02-21 04:22:00,529] Trial 67 finished with value: 75.09 and parameters: {'drop0': 0.4, 'drop1': 0.1, 'drop2': 0.4, 'drop3': 0.6000000000000001, 'drop4': 0.30000000000000004, 'drop5': 0.5, 'drop6': 0.2, 'drop7': 0.8, 'drop8': 0.6000000000000001, 'drop9': 0.8, 'adjdrop': 0.0}. Best is trial 60 with value: 63.86.
400 test loss: 0.51652, Val acc: 56.60, Test acc: 75.30
adjdrop: 0.0, drop: [0.30000000000000004, 0.0, 0.5, 0.7000000000000001, 0.1, 0.5, 0.30000000000000004, 0.8, 0.7000000000000001, 0.7000000000000001]
[I 2024-02-21 04:22:45,882] Trial 68 finished with value: 74.105 and parameters: {'drop0': 0.30000000000000004, 'drop1': 0.0, 'drop2': 0.5, 'drop3': 0.7000000000000001, 'drop4': 0.1, 'drop5': 0.5, 'drop6': 0.30000000000000004, 'drop7': 0.8, 'drop8': 0.7000000000000001, 'drop9': 0.7000000000000001, 'adjdrop': 0.0}. Best is trial 60 with value: 63.86.
400 test loss: 0.22343, Val acc: 56.60, Test acc: 74.90
adjdrop: 0.1, drop: [0.4, 0.5, 0.1, 0.6000000000000001, 0.2, 0.4, 0.2, 0.8, 0.7000000000000001, 0.7000000000000001]
[I 2024-02-21 04:23:30,456] Trial 69 finished with value: 72.705 and parameters: {'drop0': 0.4, 'drop1': 0.5, 'drop2': 0.1, 'drop3': 0.6000000000000001, 'drop4': 0.2, 'drop5': 0.4, 'drop6': 0.2, 'drop7': 0.8, 'drop8': 0.7000000000000001, 'drop9': 0.7000000000000001, 'adjdrop': 0.1}. Best is trial 60 with value: 63.86.
400 test loss: 0.36847, Val acc: 56.60, Test acc: 72.60
adjdrop: 0.0, drop: [0.0, 0.7000000000000001, 0.6000000000000001, 0.6000000000000001, 0.30000000000000004, 0.6000000000000001, 0.30000000000000004, 0.7000000000000001, 0.6000000000000001, 0.8]
[I 2024-02-21 04:24:01,526] Trial 70 finished with value: 74.55499999999999 and parameters: {'drop0': 0.0, 'drop1': 0.7000000000000001, 'drop2': 0.6000000000000001, 'drop3': 0.6000000000000001, 'drop4': 0.30000000000000004, 'drop5': 0.6000000000000001, 'drop6': 0.30000000000000004, 'drop7': 0.7000000000000001, 'drop8': 0.6000000000000001, 'drop9': 0.8, 'adjdrop': 0.0}. Best is trial 60 with value: 63.86.
400 test loss: 0.50479, Val acc: 56.60, Test acc: 74.60
adjdrop: 0.2, drop: [0.2, 0.6000000000000001, 0.5, 0.5, 0.2, 0.5, 0.4, 0.8, 0.7000000000000001, 0.8]
[I 2024-02-21 04:24:45,914] Trial 71 finished with value: 75.55999999999999 and parameters: {'drop0': 0.2, 'drop1': 0.6000000000000001, 'drop2': 0.5, 'drop3': 0.5, 'drop4': 0.2, 'drop5': 0.5, 'drop6': 0.4, 'drop7': 0.8, 'drop8': 0.7000000000000001, 'drop9': 0.8, 'adjdrop': 0.2}. Best is trial 60 with value: 63.86.
400 test loss: 0.55446, Val acc: 56.60, Test acc: 75.30
adjdrop: 0.1, drop: [0.30000000000000004, 0.7000000000000001, 0.4, 0.5, 0.2, 0.6000000000000001, 0.4, 0.7000000000000001, 0.8, 0.8]
[I 2024-02-21 04:25:29,747] Trial 72 finished with value: 64.3 and parameters: {'drop0': 0.30000000000000004, 'drop1': 0.7000000000000001, 'drop2': 0.4, 'drop3': 0.5, 'drop4': 0.2, 'drop5': 0.6000000000000001, 'drop6': 0.4, 'drop7': 0.7000000000000001, 'drop8': 0.8, 'drop9': 0.8, 'adjdrop': 0.1}. Best is trial 60 with value: 63.86.
400 test loss: 0.60815, Val acc: 56.60, Test acc: 65.10
adjdrop: 0.1, drop: [0.30000000000000004, 0.7000000000000001, 0.4, 0.6000000000000001, 0.1, 0.6000000000000001, 0.4, 0.8, 0.7000000000000001, 0.8]
[I 2024-02-21 04:26:13,286] Trial 73 finished with value: 73.74999999999999 and parameters: {'drop0': 0.30000000000000004, 'drop1': 0.7000000000000001, 'drop2': 0.4, 'drop3': 0.6000000000000001, 'drop4': 0.1, 'drop5': 0.6000000000000001, 'drop6': 0.4, 'drop7': 0.8, 'drop8': 0.7000000000000001, 'drop9': 0.8, 'adjdrop': 0.1}. Best is trial 60 with value: 63.86.
400 test loss: 0.53374, Val acc: 56.60, Test acc: 73.30
adjdrop: 0.0, drop: [0.30000000000000004, 0.7000000000000001, 0.4, 0.5, 0.30000000000000004, 0.6000000000000001, 0.30000000000000004, 0.7000000000000001, 0.8, 0.7000000000000001]
[I 2024-02-21 04:26:56,564] Trial 74 finished with value: 71.54 and parameters: {'drop0': 0.30000000000000004, 'drop1': 0.7000000000000001, 'drop2': 0.4, 'drop3': 0.5, 'drop4': 0.30000000000000004, 'drop5': 0.6000000000000001, 'drop6': 0.30000000000000004, 'drop7': 0.7000000000000001, 'drop8': 0.8, 'drop9': 0.7000000000000001, 'adjdrop': 0.0}. Best is trial 60 with value: 63.86.
400 test loss: 0.31795, Val acc: 56.60, Test acc: 72.40
adjdrop: 0.2, drop: [0.4, 0.5, 0.30000000000000004, 0.7000000000000001, 0.1, 0.5, 0.1, 0.6000000000000001, 0.7000000000000001, 0.8]
[I 2024-02-21 04:27:40,100] Trial 75 finished with value: 71.33500000000001 and parameters: {'drop0': 0.4, 'drop1': 0.5, 'drop2': 0.30000000000000004, 'drop3': 0.7000000000000001, 'drop4': 0.1, 'drop5': 0.5, 'drop6': 0.1, 'drop7': 0.6000000000000001, 'drop8': 0.7000000000000001, 'drop9': 0.8, 'adjdrop': 0.2}. Best is trial 60 with value: 63.86.
400 test loss: 0.61884, Val acc: 56.60, Test acc: 71.30
adjdrop: 0.1, drop: [0.4, 0.7000000000000001, 0.6000000000000001, 0.5, 0.2, 0.6000000000000001, 0.4, 0.8, 0.8, 0.8]
[I 2024-02-21 04:28:23,594] Trial 76 finished with value: 69.035 and parameters: {'drop0': 0.4, 'drop1': 0.7000000000000001, 'drop2': 0.6000000000000001, 'drop3': 0.5, 'drop4': 0.2, 'drop5': 0.6000000000000001, 'drop6': 0.4, 'drop7': 0.8, 'drop8': 0.8, 'drop9': 0.8, 'adjdrop': 0.1}. Best is trial 60 with value: 63.86.
400 test loss: 0.66484, Val acc: 56.60, Test acc: 68.90
adjdrop: 0.3, drop: [0.30000000000000004, 0.8, 0.6000000000000001, 0.6000000000000001, 0.2, 0.7000000000000001, 0.30000000000000004, 0.30000000000000004, 0.30000000000000004, 0.8]
[I 2024-02-21 04:29:05,950] Trial 77 finished with value: 72.595 and parameters: {'drop0': 0.30000000000000004, 'drop1': 0.8, 'drop2': 0.6000000000000001, 'drop3': 0.6000000000000001, 'drop4': 0.2, 'drop5': 0.7000000000000001, 'drop6': 0.30000000000000004, 'drop7': 0.30000000000000004, 'drop8': 0.30000000000000004, 'drop9': 0.8, 'adjdrop': 0.30000000000000004}. Best is trial 60 with value: 63.86.
400 test loss: 0.61886, Val acc: 56.60, Test acc: 72.50
adjdrop: 0.0, drop: [0.2, 0.6000000000000001, 0.5, 0.4, 0.30000000000000004, 0.4, 0.4, 0.8, 0.8, 0.8]
[I 2024-02-21 04:29:48,584] Trial 78 finished with value: 73.72999999999999 and parameters: {'drop0': 0.2, 'drop1': 0.6000000000000001, 'drop2': 0.5, 'drop3': 0.4, 'drop4': 0.30000000000000004, 'drop5': 0.4, 'drop6': 0.4, 'drop7': 0.8, 'drop8': 0.8, 'drop9': 0.8, 'adjdrop': 0.0}. Best is trial 60 with value: 63.86.
400 test loss: 0.58137, Val acc: 56.60, Test acc: 73.20
adjdrop: 0.1, drop: [0.1, 0.30000000000000004, 0.4, 0.6000000000000001, 0.0, 0.5, 0.30000000000000004, 0.7000000000000001, 0.5, 0.7000000000000001]
[I 2024-02-21 04:30:30,948] Trial 79 finished with value: 76.015 and parameters: {'drop0': 0.1, 'drop1': 0.30000000000000004, 'drop2': 0.4, 'drop3': 0.6000000000000001, 'drop4': 0.0, 'drop5': 0.5, 'drop6': 0.30000000000000004, 'drop7': 0.7000000000000001, 'drop8': 0.5, 'drop9': 0.7000000000000001, 'adjdrop': 0.1}. Best is trial 60 with value: 63.86.
400 test loss: 0.27292, Val acc: 56.60, Test acc: 76.20
adjdrop: 0.0, drop: [0.0, 0.4, 0.30000000000000004, 0.5, 0.1, 0.6000000000000001, 0.2, 0.2, 0.2, 0.7000000000000001]
[I 2024-02-21 04:31:02,610] Trial 80 finished with value: 70.215 and parameters: {'drop0': 0.0, 'drop1': 0.4, 'drop2': 0.30000000000000004, 'drop3': 0.5, 'drop4': 0.1, 'drop5': 0.6000000000000001, 'drop6': 0.2, 'drop7': 0.2, 'drop8': 0.2, 'drop9': 0.7000000000000001, 'adjdrop': 0.0}. Best is trial 60 with value: 63.86.
400 test loss: 0.23711, Val acc: 56.60, Test acc: 70.40
adjdrop: 0.1, drop: [0.2, 0.6000000000000001, 0.5, 0.5, 0.2, 0.7000000000000001, 0.4, 0.7000000000000001, 0.8, 0.8]
[I 2024-02-21 04:31:45,804] Trial 81 finished with value: 66.41 and parameters: {'drop0': 0.2, 'drop1': 0.6000000000000001, 'drop2': 0.5, 'drop3': 0.5, 'drop4': 0.2, 'drop5': 0.7000000000000001, 'drop6': 0.4, 'drop7': 0.7000000000000001, 'drop8': 0.8, 'drop9': 0.8, 'adjdrop': 0.1}. Best is trial 60 with value: 63.86.
400 test loss: 0.57848, Val acc: 56.60, Test acc: 66.00
adjdrop: 0.1, drop: [0.30000000000000004, 0.6000000000000001, 0.5, 0.6000000000000001, 0.2, 0.7000000000000001, 0.4, 0.7000000000000001, 0.8, 0.8]
[I 2024-02-21 04:32:28,975] Trial 82 finished with value: 73.54999999999998 and parameters: {'drop0': 0.30000000000000004, 'drop1': 0.6000000000000001, 'drop2': 0.5, 'drop3': 0.6000000000000001, 'drop4': 0.2, 'drop5': 0.7000000000000001, 'drop6': 0.4, 'drop7': 0.7000000000000001, 'drop8': 0.8, 'drop9': 0.8, 'adjdrop': 0.1}. Best is trial 60 with value: 63.86.
400 test loss: 0.58340, Val acc: 56.60, Test acc: 73.40
adjdrop: 0.2, drop: [0.2, 0.5, 0.5, 0.5, 0.2, 0.7000000000000001, 0.4, 0.6000000000000001, 0.7000000000000001, 0.8]
[I 2024-02-21 04:33:13,429] Trial 83 finished with value: 69.74000000000001 and parameters: {'drop0': 0.2, 'drop1': 0.5, 'drop2': 0.5, 'drop3': 0.5, 'drop4': 0.2, 'drop5': 0.7000000000000001, 'drop6': 0.4, 'drop7': 0.6000000000000001, 'drop8': 0.7000000000000001, 'drop9': 0.8, 'adjdrop': 0.2}. Best is trial 60 with value: 63.86.
400 test loss: 0.52769, Val acc: 56.60, Test acc: 70.30
adjdrop: 0.0, drop: [0.30000000000000004, 0.5, 0.4, 0.4, 0.30000000000000004, 0.6000000000000001, 0.30000000000000004, 0.8, 0.8, 0.8]
[I 2024-02-21 04:33:58,309] Trial 84 finished with value: 69.11000000000001 and parameters: {'drop0': 0.30000000000000004, 'drop1': 0.5, 'drop2': 0.4, 'drop3': 0.4, 'drop4': 0.30000000000000004, 'drop5': 0.6000000000000001, 'drop6': 0.30000000000000004, 'drop7': 0.8, 'drop8': 0.8, 'drop9': 0.8, 'adjdrop': 0.0}. Best is trial 60 with value: 63.86.
400 test loss: 0.64582, Val acc: 56.60, Test acc: 68.40
adjdrop: 0.1, drop: [0.1, 0.7000000000000001, 0.6000000000000001, 0.7000000000000001, 0.2, 0.7000000000000001, 0.4, 0.7000000000000001, 0.0, 0.7000000000000001]
[I 2024-02-21 04:34:41,324] Trial 85 finished with value: 69.015 and parameters: {'drop0': 0.1, 'drop1': 0.7000000000000001, 'drop2': 0.6000000000000001, 'drop3': 0.7000000000000001, 'drop4': 0.2, 'drop5': 0.7000000000000001, 'drop6': 0.4, 'drop7': 0.7000000000000001, 'drop8': 0.0, 'drop9': 0.7000000000000001, 'adjdrop': 0.1}. Best is trial 60 with value: 63.86.
400 test loss: 0.29188, Val acc: 56.60, Test acc: 68.20
adjdrop: 0.1, drop: [0.2, 0.1, 0.5, 0.5, 0.1, 0.5, 0.30000000000000004, 0.8, 0.7000000000000001, 0.8]
[I 2024-02-21 04:35:24,872] Trial 86 finished with value: 72.06499999999998 and parameters: {'drop0': 0.2, 'drop1': 0.1, 'drop2': 0.5, 'drop3': 0.5, 'drop4': 0.1, 'drop5': 0.5, 'drop6': 0.30000000000000004, 'drop7': 0.8, 'drop8': 0.7000000000000001, 'drop9': 0.8, 'adjdrop': 0.1}. Best is trial 60 with value: 63.86.
400 test loss: 0.64187, Val acc: 56.60, Test acc: 72.60
adjdrop: 0.0, drop: [0.30000000000000004, 0.6000000000000001, 0.6000000000000001, 0.6000000000000001, 0.1, 0.4, 0.2, 0.8, 0.8, 0.1]
[I 2024-02-21 04:36:10,645] Trial 87 finished with value: 75.44 and parameters: {'drop0': 0.30000000000000004, 'drop1': 0.6000000000000001, 'drop2': 0.6000000000000001, 'drop3': 0.6000000000000001, 'drop4': 0.1, 'drop5': 0.4, 'drop6': 0.2, 'drop7': 0.8, 'drop8': 0.8, 'drop9': 0.1, 'adjdrop': 0.0}. Best is trial 60 with value: 63.86.
400 test loss: 0.01367, Val acc: 56.60, Test acc: 74.60
adjdrop: 0.3, drop: [0.1, 0.8, 0.5, 0.4, 0.30000000000000004, 0.7000000000000001, 0.1, 0.1, 0.8, 0.8]
[I 2024-02-21 04:36:53,134] Trial 88 finished with value: 69.06500000000001 and parameters: {'drop0': 0.1, 'drop1': 0.8, 'drop2': 0.5, 'drop3': 0.4, 'drop4': 0.30000000000000004, 'drop5': 0.7000000000000001, 'drop6': 0.1, 'drop7': 0.1, 'drop8': 0.8, 'drop9': 0.8, 'adjdrop': 0.30000000000000004}. Best is trial 60 with value: 63.86.
400 test loss: 0.60348, Val acc: 56.60, Test acc: 69.70
adjdrop: 0.1, drop: [0.2, 0.2, 0.6000000000000001, 0.5, 0.2, 0.6000000000000001, 0.4, 0.8, 0.7000000000000001, 0.7000000000000001]
[I 2024-02-21 04:37:36,136] Trial 89 finished with value: 72.445 and parameters: {'drop0': 0.2, 'drop1': 0.2, 'drop2': 0.6000000000000001, 'drop3': 0.5, 'drop4': 0.2, 'drop5': 0.6000000000000001, 'drop6': 0.4, 'drop7': 0.8, 'drop8': 0.7000000000000001, 'drop9': 0.7000000000000001, 'adjdrop': 0.1}. Best is trial 60 with value: 63.86.
400 test loss: 0.26909, Val acc: 56.60, Test acc: 73.80
adjdrop: 0.8, drop: [0.4, 0.4, 0.4, 0.4, 0.0, 0.8, 0.30000000000000004, 0.7000000000000001, 0.6000000000000001, 0.8]
[I 2024-02-21 04:38:19,933] Trial 90 finished with value: 72.95500000000001 and parameters: {'drop0': 0.4, 'drop1': 0.4, 'drop2': 0.4, 'drop3': 0.4, 'drop4': 0.0, 'drop5': 0.8, 'drop6': 0.30000000000000004, 'drop7': 0.7000000000000001, 'drop8': 0.6000000000000001, 'drop9': 0.8, 'adjdrop': 0.8}. Best is trial 60 with value: 63.86.
400 test loss: 0.59014, Val acc: 56.60, Test acc: 72.50
adjdrop: 0.2, drop: [0.2, 0.6000000000000001, 0.5, 0.5, 0.2, 0.8, 0.5, 0.6000000000000001, 0.8, 0.8]
[I 2024-02-21 04:39:05,631] Trial 91 finished with value: 70.20500000000001 and parameters: {'drop0': 0.2, 'drop1': 0.6000000000000001, 'drop2': 0.5, 'drop3': 0.5, 'drop4': 0.2, 'drop5': 0.8, 'drop6': 0.5, 'drop7': 0.6000000000000001, 'drop8': 0.8, 'drop9': 0.8, 'adjdrop': 0.2}. Best is trial 60 with value: 63.86.
400 test loss: 0.56795, Val acc: 56.60, Test acc: 70.10
adjdrop: 0.1, drop: [0.2, 0.6000000000000001, 0.5, 0.6000000000000001, 0.2, 0.8, 0.5, 0.7000000000000001, 0.8, 0.8]
[I 2024-02-21 04:39:51,159] Trial 92 finished with value: 63.59999999999999 and parameters: {'drop0': 0.2, 'drop1': 0.6000000000000001, 'drop2': 0.5, 'drop3': 0.6000000000000001, 'drop4': 0.2, 'drop5': 0.8, 'drop6': 0.5, 'drop7': 0.7000000000000001, 'drop8': 0.8, 'drop9': 0.8, 'adjdrop': 0.1}. Best is trial 92 with value: 63.59999999999999.
400 test loss: 0.79534, Val acc: 56.60, Test acc: 64.20
adjdrop: 0.1, drop: [0.1, 0.6000000000000001, 0.5, 0.6000000000000001, 0.2, 0.7000000000000001, 0.5, 0.7000000000000001, 0.8, 0.8]
[I 2024-02-21 04:40:34,814] Trial 93 finished with value: 68.91499999999999 and parameters: {'drop0': 0.1, 'drop1': 0.6000000000000001, 'drop2': 0.5, 'drop3': 0.6000000000000001, 'drop4': 0.2, 'drop5': 0.7000000000000001, 'drop6': 0.5, 'drop7': 0.7000000000000001, 'drop8': 0.8, 'drop9': 0.8, 'adjdrop': 0.1}. Best is trial 92 with value: 63.59999999999999.
400 test loss: 0.66920, Val acc: 56.60, Test acc: 69.30
adjdrop: 0.0, drop: [0.2, 0.5, 0.6000000000000001, 0.6000000000000001, 0.2, 0.8, 0.4, 0.8, 0.8, 0.8]
[I 2024-02-21 04:41:18,049] Trial 94 finished with value: 65.685 and parameters: {'drop0': 0.2, 'drop1': 0.5, 'drop2': 0.6000000000000001, 'drop3': 0.6000000000000001, 'drop4': 0.2, 'drop5': 0.8, 'drop6': 0.4, 'drop7': 0.8, 'drop8': 0.8, 'drop9': 0.8, 'adjdrop': 0.0}. Best is trial 92 with value: 63.59999999999999.
400 test loss: 0.54390, Val acc: 56.60, Test acc: 64.60
adjdrop: 0.0, drop: [0.30000000000000004, 0.7000000000000001, 0.7000000000000001, 0.6000000000000001, 0.1, 0.8, 0.5, 0.7000000000000001, 0.8, 0.8]
[I 2024-02-21 04:42:03,640] Trial 95 finished with value: 71.215 and parameters: {'drop0': 0.30000000000000004, 'drop1': 0.7000000000000001, 'drop2': 0.7000000000000001, 'drop3': 0.6000000000000001, 'drop4': 0.1, 'drop5': 0.8, 'drop6': 0.5, 'drop7': 0.7000000000000001, 'drop8': 0.8, 'drop9': 0.8, 'adjdrop': 0.0}. Best is trial 92 with value: 63.59999999999999.
400 test loss: 0.57317, Val acc: 56.60, Test acc: 71.90
adjdrop: 0.0, drop: [0.2, 0.1, 0.6000000000000001, 0.7000000000000001, 0.2, 0.8, 0.4, 0.8, 0.8, 0.7000000000000001]
[I 2024-02-21 04:42:47,560] Trial 96 finished with value: 74.685 and parameters: {'drop0': 0.2, 'drop1': 0.1, 'drop2': 0.6000000000000001, 'drop3': 0.7000000000000001, 'drop4': 0.2, 'drop5': 0.8, 'drop6': 0.4, 'drop7': 0.8, 'drop8': 0.8, 'drop9': 0.7000000000000001, 'adjdrop': 0.0}. Best is trial 92 with value: 63.59999999999999.
400 test loss: 0.26603, Val acc: 56.60, Test acc: 74.20
adjdrop: 0.0, drop: [0.30000000000000004, 0.6000000000000001, 0.6000000000000001, 0.6000000000000001, 0.4, 0.8, 0.4, 0.5, 0.8, 0.4]
[I 2024-02-21 04:43:32,205] Trial 97 finished with value: 77.755 and parameters: {'drop0': 0.30000000000000004, 'drop1': 0.6000000000000001, 'drop2': 0.6000000000000001, 'drop3': 0.6000000000000001, 'drop4': 0.4, 'drop5': 0.8, 'drop6': 0.4, 'drop7': 0.5, 'drop8': 0.8, 'drop9': 0.4, 'adjdrop': 0.0}. Best is trial 92 with value: 63.59999999999999.
400 test loss: 0.03519, Val acc: 56.60, Test acc: 77.10
adjdrop: 0.0, drop: [0.0, 0.6000000000000001, 0.7000000000000001, 0.30000000000000004, 0.30000000000000004, 0.8, 0.6000000000000001, 0.7000000000000001, 0.8, 0.8]
[I 2024-02-21 04:44:05,163] Trial 98 finished with value: 68.65 and parameters: {'drop0': 0.0, 'drop1': 0.6000000000000001, 'drop2': 0.7000000000000001, 'drop3': 0.30000000000000004, 'drop4': 0.30000000000000004, 'drop5': 0.8, 'drop6': 0.6000000000000001, 'drop7': 0.7000000000000001, 'drop8': 0.8, 'drop9': 0.8, 'adjdrop': 0.0}. Best is trial 92 with value: 63.59999999999999.
400 test loss: 0.53219, Val acc: 56.60, Test acc: 67.80
adjdrop: 0.0, drop: [0.1, 0.0, 0.6000000000000001, 0.5, 0.1, 0.8, 0.5, 0.30000000000000004, 0.8, 0.8]
[I 2024-02-21 04:44:49,979] Trial 99 finished with value: 64.08500000000001 and parameters: {'drop0': 0.1, 'drop1': 0.0, 'drop2': 0.6000000000000001, 'drop3': 0.5, 'drop4': 0.1, 'drop5': 0.8, 'drop6': 0.5, 'drop7': 0.30000000000000004, 'drop8': 0.8, 'drop9': 0.8, 'adjdrop': 0.0}. Best is trial 92 with value: 63.59999999999999.
400 test loss: 0.49386, Val acc: 56.60, Test acc: 64.00
{'drop0': 0.2, 'drop1': 0.6000000000000001, 'drop2': 0.5, 'drop3': 0.6000000000000001, 'drop4': 0.2, 'drop5': 0.8, 'drop6': 0.5, 'drop7': 0.7000000000000001, 'drop8': 0.8, 'drop9': 0.8, 'adjdrop': 0.1}
'''

In [None]:
# @title optuna result act 0,1,2,1,0,1,2,0
        # act=[0,1,2,1,0,1,2,0]
# agg 2

[I 2024-02-11 16:10:13,282] A new study created in memory with name: no-name-671d55c5-0c93-4f92-9c31-feb69596b11f
adjdrop: 0.6, drop: [0.1, 0.6, 0.8, 0.1, 0.5, 0.4, 0.8, 0.6]
[I 2024-02-11 16:10:44,544] Trial 0 finished with value: 80.8 and parameters: {'drop0': 0.1, 'drop1': 0.6, 'drop2': 0.8, 'drop3': 0.1, 'drop4': 0.5, 'drop5': 0.4, 'drop6': 0.8, 'drop7': 0.6, 'adjdrop': 0.6}
200 test loss: 0.25408, Val acc: 78.00, Test acc: 80.80
adjdrop: 0.0, drop: [0.2, 0.4, 0.1, 0.2, 0.4, 0.2, 0.4, 0.3]
[I 2024-02-11 16:11:17,546] Trial 1 finished with value: 75.0 and parameters: {'drop0': 0.2, 'drop1': 0.4, 'drop2': 0.1, 'drop3': 0.2, 'drop4': 0.4, 'drop5': 0.2, 'drop6': 0.4, 'drop7': 0.3, 'adjdrop': 0.0}
200 test loss: 0.00377, Val acc: 74.00, Test acc: 75.00
adjdrop: 0.2, drop: [0.5, 0.6, 0.7, 0.6, 0.3, 0.3, 0.5, 0.2]
[I 2024-02-11 16:11:50,141] Trial 2 finished with value: 73.1 and parameters: {'drop0': 0.5, 'drop1': 0.6, 'drop2': 0.7, 'drop3': 0.6, 'drop4': 0.3, 'drop5': 0.3, 'drop6': 0.5, 'drop7': 0.2, 'adjdrop': 0.2}
200 test loss: 0.08939, Val acc: 72.60, Test acc: 73.10
adjdrop: 0.0, drop: [0.8, 0.2, 0.7, 0.1, 0.6, 0.2, 0.4, 0.8]
[I 2024-02-11 16:12:22,656] Trial 3 finished with value: 78.5 and parameters: {'drop0': 0.8, 'drop1': 0.2, 'drop2': 0.7, 'drop3': 0.1, 'drop4': 0.6, 'drop5': 0.2, 'drop6': 0.4, 'drop7': 0.8, 'adjdrop': 0.0}
200 test loss: 0.03620, Val acc: 77.40, Test acc: 78.50
adjdrop: 0.6, drop: [0.8, 0.7, 0.8, 0.2, 0.8, 0.4, 0.8, 0.0]
[I 2024-02-11 16:12:55,588] Trial 4 finished with value: 76.6 and parameters: {'drop0': 0.8, 'drop1': 0.7, 'drop2': 0.8, 'drop3': 0.2, 'drop4': 0.8, 'drop5': 0.4, 'drop6': 0.8, 'drop7': 0.0, 'adjdrop': 0.6}
200 test loss: 0.01527, Val acc: 75.80, Test acc: 76.60
adjdrop: 0.1, drop: [0.7, 0.6, 0.3, 0.5, 0.8, 0.7, 0.8, 0.4]
[I 2024-02-11 16:13:28,884] Trial 5 finished with value: 77.2 and parameters: {'drop0': 0.7, 'drop1': 0.6, 'drop2': 0.3, 'drop3': 0.5, 'drop4': 0.8, 'drop5': 0.7, 'drop6': 0.8, 'drop7': 0.4, 'adjdrop': 0.1}
200 test loss: 0.04575, Val acc: 76.40, Test acc: 77.20
adjdrop: 0.5, drop: [0.1, 0.4, 0.2, 0.8, 0.0, 0.1, 0.7, 0.2]
[I 2024-02-11 16:14:02,937] Trial 6 finished with value: 76.9 and parameters: {'drop0': 0.1, 'drop1': 0.4, 'drop2': 0.2, 'drop3': 0.8, 'drop4': 0.0, 'drop5': 0.1, 'drop6': 0.7, 'drop7': 0.2, 'adjdrop': 0.5}
200 test loss: 0.13221, Val acc: 77.40, Test acc: 76.90
adjdrop: 0.7, drop: [0.6, 0.5, 0.0, 0.0, 0.5, 0.6, 0.7, 0.8]
[I 2024-02-11 16:14:29,033] Trial 7 finished with value: 73.3 and parameters: {'drop0': 0.6, 'drop1': 0.5, 'drop2': 0.0, 'drop3': 0.0, 'drop4': 0.5, 'drop5': 0.6, 'drop6': 0.7, 'drop7': 0.8, 'adjdrop': 0.7}
200 test loss: 0.87074, Val acc: 71.20, Test acc: 73.30
adjdrop: 0.1, drop: [0.5, 0.0, 0.5, 0.6, 0.0, 0.3, 0.0, 0.4]
[I 2024-02-11 16:14:55,088] Trial 8 finished with value: 74.9 and parameters: {'drop0': 0.5, 'drop1': 0.0, 'drop2': 0.5, 'drop3': 0.6, 'drop4': 0.0, 'drop5': 0.3, 'drop6': 0.0, 'drop7': 0.4, 'adjdrop': 0.1}
200 test loss: 0.00662, Val acc: 74.00, Test acc: 74.90
adjdrop: 0.1, drop: [0.1, 0.8, 0.5, 0.0, 0.4, 0.7, 0.7, 0.4]
[I 2024-02-11 16:15:22,965] Trial 9 finished with value: 76.4 and parameters: {'drop0': 0.1, 'drop1': 0.8, 'drop2': 0.5, 'drop3': 0.0, 'drop4': 0.4, 'drop5': 0.7, 'drop6': 0.7, 'drop7': 0.4, 'adjdrop': 0.1}
200 test loss: 0.03664, Val acc: 74.00, Test acc: 76.40
adjdrop: 0.8, drop: [0.3, 0.2, 0.6, 0.3, 0.2, 0.5, 0.1, 0.6]
[I 2024-02-11 16:15:52,372] Trial 10 finished with value: 72.3 and parameters: {'drop0': 0.3, 'drop1': 0.2, 'drop2': 0.6, 'drop3': 0.3, 'drop4': 0.2, 'drop5': 0.5, 'drop6': 0.1, 'drop7': 0.6, 'adjdrop': 0.8}
200 test loss: 0.32802, Val acc: 73.00, Test acc: 72.30
adjdrop: 0.4, drop: [0.0, 0.2, 0.8, 0.1, 0.6, 0.1, 0.3, 0.8]
[I 2024-02-11 16:16:19,145] Trial 11 finished with value: 77.4 and parameters: {'drop0': 0.0, 'drop1': 0.2, 'drop2': 0.8, 'drop3': 0.1, 'drop4': 0.6, 'drop5': 0.1, 'drop6': 0.3, 'drop7': 0.8, 'adjdrop': 0.4}
200 test loss: 0.33177, Val acc: 76.40, Test acc: 77.40
adjdrop: 0.3, drop: [0.3, 0.2, 0.7, 0.3, 0.6, 0.0, 0.5, 0.7]
[I 2024-02-11 16:16:49,565] Trial 12 finished with value: 74.5 and parameters: {'drop0': 0.3, 'drop1': 0.2, 'drop2': 0.7, 'drop3': 0.3, 'drop4': 0.6, 'drop5': 0.0, 'drop6': 0.5, 'drop7': 0.7, 'adjdrop': 0.3}
200 test loss: 0.16716, Val acc: 71.20, Test acc: 74.50
adjdrop: 0.5, drop: [0.8, 0.0, 0.6, 0.1, 0.6, 0.4, 0.2, 0.6]
[I 2024-02-11 16:17:15,539] Trial 13 finished with value: 77.7 and parameters: {'drop0': 0.8, 'drop1': 0.0, 'drop2': 0.6, 'drop3': 0.1, 'drop4': 0.6, 'drop5': 0.4, 'drop6': 0.2, 'drop7': 0.6, 'adjdrop': 0.5}
200 test loss: 0.22140, Val acc: 76.80, Test acc: 77.70
adjdrop: 0.3, drop: [0.3, 0.3, 0.8, 0.4, 0.7, 0.2, 0.5, 0.6]
[I 2024-02-11 16:17:45,528] Trial 14 finished with value: 77.9 and parameters: {'drop0': 0.3, 'drop1': 0.3, 'drop2': 0.8, 'drop3': 0.4, 'drop4': 0.7, 'drop5': 0.2, 'drop6': 0.5, 'drop7': 0.6, 'adjdrop': 0.3}
200 test loss: 0.15415, Val acc: 78.00, Test acc: 77.90
adjdrop: 0.8, drop: [0.5, 0.8, 0.4, 0.1, 0.2, 0.5, 0.3, 0.7]
[I 2024-02-11 16:18:14,812] Trial 15 finished with value: 69.4 and parameters: {'drop0': 0.5, 'drop1': 0.8, 'drop2': 0.4, 'drop3': 0.1, 'drop4': 0.2, 'drop5': 0.5, 'drop6': 0.3, 'drop7': 0.7, 'adjdrop': 0.8}
200 test loss: 0.74301, Val acc: 67.40, Test acc: 69.40
adjdrop: 0.6, drop: [0.0, 0.1, 0.7, 0.3, 0.5, 0.8, 0.6, 0.7]
[I 2024-02-11 16:18:41,512] Trial 16 finished with value: 76.7 and parameters: {'drop0': 0.0, 'drop1': 0.1, 'drop2': 0.7, 'drop3': 0.3, 'drop4': 0.5, 'drop5': 0.8, 'drop6': 0.6, 'drop7': 0.7, 'adjdrop': 0.6}
200 test loss: 0.47513, Val acc: 74.60, Test acc: 76.70
adjdrop: 0.4, drop: [0.4, 0.5, 0.6, 0.0, 0.7, 0.3, 0.3, 0.5]
[I 2024-02-11 16:19:07,792] Trial 17 finished with value: 78.2 and parameters: {'drop0': 0.4, 'drop1': 0.5, 'drop2': 0.6, 'drop3': 0.0, 'drop4': 0.7, 'drop5': 0.3, 'drop6': 0.3, 'drop7': 0.5, 'adjdrop': 0.4}
200 test loss: 0.13570, Val acc: 77.20, Test acc: 78.20
adjdrop: 0.0, drop: [0.7, 0.3, 0.5, 0.2, 0.5, 0.0, 0.4, 0.8]
[I 2024-02-11 16:19:38,124] Trial 18 finished with value: 79.1 and parameters: {'drop0': 0.7, 'drop1': 0.3, 'drop2': 0.5, 'drop3': 0.2, 'drop4': 0.5, 'drop5': 0.0, 'drop6': 0.4, 'drop7': 0.8, 'adjdrop': 0.0}
200 test loss: 0.03530, Val acc: 78.80, Test acc: 79.10
adjdrop: 0.6, drop: [0.6, 0.4, 0.4, 0.2, 0.3, 0.0, 0.6, 0.5]
[I 2024-02-11 16:20:09,582] Trial 19 finished with value: 78.2 and parameters: {'drop0': 0.6, 'drop1': 0.4, 'drop2': 0.4, 'drop3': 0.2, 'drop4': 0.3, 'drop5': 0.0, 'drop6': 0.6, 'drop7': 0.5, 'adjdrop': 0.6}
200 test loss: 0.17559, Val acc: 77.40, Test acc: 78.20
adjdrop: 0.3, drop: [0.2, 0.6, 0.3, 0.4, 0.5, 0.1, 0.2, 0.7]
[I 2024-02-11 16:20:38,717] Trial 20 finished with value: 78.2 and parameters: {'drop0': 0.2, 'drop1': 0.6, 'drop2': 0.3, 'drop3': 0.4, 'drop4': 0.5, 'drop5': 0.1, 'drop6': 0.2, 'drop7': 0.7, 'adjdrop': 0.3}
200 test loss: 0.12911, Val acc: 76.40, Test acc: 78.20
adjdrop: 0.0, drop: [0.7, 0.3, 0.5, 0.1, 0.4, 0.2, 0.4, 0.8]
[I 2024-02-11 16:21:09,794] Trial 21 finished with value: 78.2 and parameters: {'drop0': 0.7, 'drop1': 0.3, 'drop2': 0.5, 'drop3': 0.1, 'drop4': 0.4, 'drop5': 0.2, 'drop6': 0.4, 'drop7': 0.8, 'adjdrop': 0.0}
200 test loss: 0.03716, Val acc: 77.60, Test acc: 78.20
adjdrop: 0.0, drop: [0.7, 0.3, 0.7, 0.2, 0.7, 0.0, 0.4, 0.8]
[I 2024-02-11 16:21:39,890] Trial 22 finished with value: 79.0 and parameters: {'drop0': 0.7, 'drop1': 0.3, 'drop2': 0.7, 'drop3': 0.2, 'drop4': 0.7, 'drop5': 0.0, 'drop6': 0.4, 'drop7': 0.8, 'adjdrop': 0.0}
200 test loss: 0.03268, Val acc: 78.20, Test acc: 79.00
adjdrop: 0.2, drop: [0.7, 0.3, 0.8, 0.2, 0.7, 0.0, 0.6, 0.6]
[I 2024-02-11 16:22:11,351] Trial 23 finished with value: 77.8 and parameters: {'drop0': 0.7, 'drop1': 0.3, 'drop2': 0.8, 'drop3': 0.2, 'drop4': 0.7, 'drop5': 0.0, 'drop6': 0.6, 'drop7': 0.6, 'adjdrop': 0.2}
200 test loss: 0.12109, Val acc: 76.40, Test acc: 77.80
adjdrop: 0.2, drop: [0.6, 0.5, 0.6, 0.3, 0.5, 0.0, 0.2, 0.7]
[I 2024-02-11 16:22:43,633] Trial 24 finished with value: 78.1 and parameters: {'drop0': 0.6, 'drop1': 0.5, 'drop2': 0.6, 'drop3': 0.3, 'drop4': 0.5, 'drop5': 0.0, 'drop6': 0.2, 'drop7': 0.7, 'adjdrop': 0.2}
200 test loss: 0.08548, Val acc: 76.20, Test acc: 78.10
adjdrop: 0.7, drop: [0.4, 0.3, 0.7, 0.2, 0.8, 0.1, 0.4, 0.8]
[I 2024-02-11 16:23:13,429] Trial 25 finished with value: 75.3 and parameters: {'drop0': 0.4, 'drop1': 0.3, 'drop2': 0.7, 'drop3': 0.2, 'drop4': 0.8, 'drop5': 0.1, 'drop6': 0.4, 'drop7': 0.8, 'adjdrop': 0.7}
200 test loss: 0.78887, Val acc: 73.60, Test acc: 75.30
adjdrop: 0.1, drop: [0.7, 0.1, 0.5, 0.0, 0.7, 0.5, 0.5, 0.5]
[I 2024-02-11 16:23:39,398] Trial 26 finished with value: 77.1 and parameters: {'drop0': 0.7, 'drop1': 0.1, 'drop2': 0.5, 'drop3': 0.0, 'drop4': 0.7, 'drop5': 0.5, 'drop6': 0.5, 'drop7': 0.5, 'adjdrop': 0.1}
200 test loss: 0.03586, Val acc: 75.60, Test acc: 77.10
adjdrop: 0.0, drop: [0.6, 0.7, 0.8, 0.5, 0.3, 0.1, 0.3, 0.7]
[I 2024-02-11 16:24:09,245] Trial 27 finished with value: 79.4 and parameters: {'drop0': 0.6, 'drop1': 0.7, 'drop2': 0.8, 'drop3': 0.5, 'drop4': 0.3, 'drop5': 0.1, 'drop6': 0.3, 'drop7': 0.7, 'adjdrop': 0.0}
200 test loss: 0.02271, Val acc: 77.80, Test acc: 79.40
adjdrop: 0.5, drop: [0.6, 0.7, 0.3, 0.5, 0.3, 0.1, 0.1, 0.6]
[I 2024-02-11 16:24:38,578] Trial 28 finished with value: 78.6 and parameters: {'drop0': 0.6, 'drop1': 0.7, 'drop2': 0.3, 'drop3': 0.5, 'drop4': 0.3, 'drop5': 0.1, 'drop6': 0.1, 'drop7': 0.6, 'adjdrop': 0.5}
200 test loss: 0.22962, Val acc: 76.20, Test acc: 78.60
adjdrop: 0.0, drop: [0.5, 0.7, 0.0, 0.8, 0.1, 0.3, 0.3, 0.7]
[I 2024-02-11 16:25:08,356] Trial 29 finished with value: 77.1 and parameters: {'drop0': 0.5, 'drop1': 0.7, 'drop2': 0.0, 'drop3': 0.8, 'drop4': 0.1, 'drop5': 0.3, 'drop6': 0.3, 'drop7': 0.7, 'adjdrop': 0.0}
200 test loss: 0.01096, Val acc: 75.60, Test acc: 77.10
adjdrop: 0.7, drop: [0.2, 0.8, 0.8, 0.5, 0.4, 0.6, 0.1, 0.5]
[I 2024-02-11 16:25:38,681] Trial 30 finished with value: 71.2 and parameters: {'drop0': 0.2, 'drop1': 0.8, 'drop2': 0.8, 'drop3': 0.5, 'drop4': 0.4, 'drop5': 0.6, 'drop6': 0.1, 'drop7': 0.5, 'adjdrop': 0.7}
200 test loss: 0.35870, Val acc: 69.40, Test acc: 71.20
adjdrop: 0.0, drop: [0.7, 0.5, 0.7, 0.6, 0.2, 0.0, 0.4, 0.8]
[I 2024-02-11 16:26:08,887] Trial 31 finished with value: 79.5 and parameters: {'drop0': 0.7, 'drop1': 0.5, 'drop2': 0.7, 'drop3': 0.6, 'drop4': 0.2, 'drop5': 0.0, 'drop6': 0.4, 'drop7': 0.8, 'adjdrop': 0.0}
200 test loss: 0.02913, Val acc: 79.80, Test acc: 79.50
adjdrop: 0.0, drop: [0.8, 0.6, 0.8, 0.7, 0.2, 0.1, 0.5, 0.7]
[I 2024-02-11 16:26:39,184] Trial 32 finished with value: 79.7 and parameters: {'drop0': 0.8, 'drop1': 0.6, 'drop2': 0.8, 'drop3': 0.7, 'drop4': 0.2, 'drop5': 0.1, 'drop6': 0.5, 'drop7': 0.7, 'adjdrop': 0.0}
200 test loss: 0.06829, Val acc: 78.80, Test acc: 79.70
adjdrop: 0.1, drop: [0.8, 0.6, 0.8, 0.7, 0.2, 0.2, 0.6, 0.7]
[I 2024-02-11 16:27:09,493] Trial 33 finished with value: 77.2 and parameters: {'drop0': 0.8, 'drop1': 0.6, 'drop2': 0.8, 'drop3': 0.7, 'drop4': 0.2, 'drop5': 0.2, 'drop6': 0.6, 'drop7': 0.7, 'adjdrop': 0.1}
200 test loss: 0.10092, Val acc: 78.00, Test acc: 77.20
adjdrop: 0.0, drop: [0.8, 0.6, 0.7, 0.6, 0.1, 0.1, 0.8, 0.0]
[I 2024-02-11 16:27:39,626] Trial 34 finished with value: 77.0 and parameters: {'drop0': 0.8, 'drop1': 0.6, 'drop2': 0.7, 'drop3': 0.6, 'drop4': 0.1, 'drop5': 0.1, 'drop6': 0.8, 'drop7': 0.0, 'adjdrop': 0.0}
200 test loss: 0.01449, Val acc: 75.80, Test acc: 77.00
adjdrop: 0.2, drop: [0.8, 0.7, 0.8, 0.7, 0.3, 0.2, 0.5, 0.6]
[I 2024-02-11 16:28:10,495] Trial 35 finished with value: 80.3 and parameters: {'drop0': 0.8, 'drop1': 0.7, 'drop2': 0.8, 'drop3': 0.7, 'drop4': 0.3, 'drop5': 0.2, 'drop6': 0.5, 'drop7': 0.6, 'adjdrop': 0.2}
200 test loss: 0.13920, Val acc: 79.00, Test acc: 80.30
adjdrop: 0.2, drop: [0.8, 0.5, 0.7, 0.7, 0.1, 0.3, 0.5, 0.3]
[I 2024-02-11 16:28:39,698] Trial 36 finished with value: 75.9 and parameters: {'drop0': 0.8, 'drop1': 0.5, 'drop2': 0.7, 'drop3': 0.7, 'drop4': 0.1, 'drop5': 0.3, 'drop6': 0.5, 'drop7': 0.3, 'adjdrop': 0.2}
200 test loss: 0.04383, Val acc: 76.40, Test acc: 75.90
adjdrop: 0.1, drop: [0.8, 0.6, 0.8, 0.7, 0.2, 0.4, 0.7, 0.1]
[I 2024-02-11 16:29:10,242] Trial 37 finished with value: 78.9 and parameters: {'drop0': 0.8, 'drop1': 0.6, 'drop2': 0.8, 'drop3': 0.7, 'drop4': 0.2, 'drop5': 0.4, 'drop6': 0.7, 'drop7': 0.1, 'adjdrop': 0.1}
200 test loss: 0.04633, Val acc: 77.60, Test acc: 78.90
adjdrop: 0.2, drop: [0.1, 0.4, 0.1, 0.6, 0.3, 0.2, 0.5, 0.6]
[I 2024-02-11 16:29:39,476] Trial 38 finished with value: 73.9 and parameters: {'drop0': 0.1, 'drop1': 0.4, 'drop2': 0.1, 'drop3': 0.6, 'drop4': 0.3, 'drop5': 0.2, 'drop6': 0.5, 'drop7': 0.6, 'adjdrop': 0.2}
200 test loss: 0.02664, Val acc: 74.80, Test acc: 73.90
adjdrop: 0.4, drop: [0.8, 0.7, 0.7, 0.8, 0.1, 0.3, 0.8, 0.4]
[I 2024-02-11 16:30:10,069] Trial 39 finished with value: 79.0 and parameters: {'drop0': 0.8, 'drop1': 0.7, 'drop2': 0.7, 'drop3': 0.8, 'drop4': 0.1, 'drop5': 0.3, 'drop6': 0.8, 'drop7': 0.4, 'adjdrop': 0.4}
200 test loss: 0.19923, Val acc: 78.00, Test acc: 79.00
adjdrop: 0.1, drop: [0.7, 0.5, 0.6, 0.7, 0.4, 0.2, 0.7, 0.3]
[I 2024-02-11 16:30:39,440] Trial 40 finished with value: 77.9 and parameters: {'drop0': 0.7, 'drop1': 0.5, 'drop2': 0.6, 'drop3': 0.7, 'drop4': 0.4, 'drop5': 0.2, 'drop6': 0.7, 'drop7': 0.3, 'adjdrop': 0.1}
200 test loss: 0.03283, Val acc: 77.80, Test acc: 77.90
adjdrop: 0.0, drop: [0.6, 0.7, 0.8, 0.5, 0.3, 0.1, 0.3, 0.7]
[I 2024-02-11 16:31:12,540] Trial 41 finished with value: 79.4 and parameters: {'drop0': 0.6, 'drop1': 0.7, 'drop2': 0.8, 'drop3': 0.5, 'drop4': 0.3, 'drop5': 0.1, 'drop6': 0.3, 'drop7': 0.7, 'adjdrop': 0.0}
200 test loss: 0.02363, Val acc: 78.20, Test acc: 79.40
adjdrop: 0.1, drop: [0.7, 0.7, 0.8, 0.6, 0.3, 0.1, 0.5, 0.8]
[I 2024-02-11 16:31:42,621] Trial 42 finished with value: 79.8 and parameters: {'drop0': 0.7, 'drop1': 0.7, 'drop2': 0.8, 'drop3': 0.6, 'drop4': 0.3, 'drop5': 0.1, 'drop6': 0.5, 'drop7': 0.8, 'adjdrop': 0.1}
200 test loss: 0.11428, Val acc: 76.80, Test acc: 79.80
adjdrop: 0.1, drop: [0.7, 0.6, 0.8, 0.7, 0.2, 0.4, 0.6, 0.8]
[I 2024-02-11 16:32:12,999] Trial 43 finished with value: 79.2 and parameters: {'drop0': 0.7, 'drop1': 0.6, 'drop2': 0.8, 'drop3': 0.7, 'drop4': 0.2, 'drop5': 0.4, 'drop6': 0.6, 'drop7': 0.8, 'adjdrop': 0.1}
200 test loss: 0.07560, Val acc: 78.40, Test acc: 79.20
adjdrop: 0.2, drop: [0.8, 0.6, 0.7, 0.6, 0.4, 0.2, 0.4, 0.8]
[I 2024-02-11 16:32:43,135] Trial 44 finished with value: 79.6 and parameters: {'drop0': 0.8, 'drop1': 0.6, 'drop2': 0.7, 'drop3': 0.6, 'drop4': 0.4, 'drop5': 0.2, 'drop6': 0.4, 'drop7': 0.8, 'adjdrop': 0.2}
200 test loss: 0.16697, Val acc: 77.00, Test acc: 79.60
adjdrop: 0.3, drop: [0.8, 0.8, 0.8, 0.8, 0.4, 0.2, 0.5, 0.6]
[I 2024-02-11 16:33:12,860] Trial 45 finished with value: 76.9 and parameters: {'drop0': 0.8, 'drop1': 0.8, 'drop2': 0.8, 'drop3': 0.8, 'drop4': 0.4, 'drop5': 0.2, 'drop6': 0.5, 'drop7': 0.6, 'adjdrop': 0.3}
200 test loss: 0.30127, Val acc: 75.40, Test acc: 76.90
adjdrop: 0.2, drop: [0.8, 0.7, 0.7, 0.6, 0.3, 0.3, 0.5, 0.8]
[I 2024-02-11 16:33:42,626] Trial 46 finished with value: 79.3 and parameters: {'drop0': 0.8, 'drop1': 0.7, 'drop2': 0.7, 'drop3': 0.6, 'drop4': 0.3, 'drop5': 0.3, 'drop6': 0.5, 'drop7': 0.8, 'adjdrop': 0.2}
200 test loss: 0.21200, Val acc: 80.20, Test acc: 79.30
adjdrop: 0.3, drop: [0.8, 0.8, 0.8, 0.4, 0.4, 0.2, 0.7, 0.7]
[I 2024-02-11 16:34:12,234] Trial 47 finished with value: 77.8 and parameters: {'drop0': 0.8, 'drop1': 0.8, 'drop2': 0.8, 'drop3': 0.4, 'drop4': 0.4, 'drop5': 0.2, 'drop6': 0.7, 'drop7': 0.7, 'adjdrop': 0.3}
200 test loss: 0.16903, Val acc: 77.00, Test acc: 77.80
adjdrop: 0.1, drop: [0.1, 0.6, 0.6, 0.7, 0.3, 0.4, 0.6, 0.5]
[I 2024-02-11 16:34:41,547] Trial 48 finished with value: 79.0 and parameters: {'drop0': 0.1, 'drop1': 0.6, 'drop2': 0.6, 'drop3': 0.7, 'drop4': 0.3, 'drop5': 0.4, 'drop6': 0.6, 'drop7': 0.5, 'adjdrop': 0.1}
200 test loss: 0.01973, Val acc: 76.80, Test acc: 79.00
adjdrop: 0.4, drop: [0.3, 0.7, 0.8, 0.8, 0.5, 0.6, 0.5, 0.6]
[I 2024-02-11 16:35:12,184] Trial 49 finished with value: 77.6 and parameters: {'drop0': 0.3, 'drop1': 0.7, 'drop2': 0.8, 'drop3': 0.8, 'drop4': 0.5, 'drop5': 0.6, 'drop6': 0.5, 'drop7': 0.6, 'adjdrop': 0.4}
200 test loss: 0.26607, Val acc: 76.20, Test acc: 77.60
adjdrop: 0.6, drop: [0.0, 0.6, 0.7, 0.6, 0.4, 0.1, 0.4, 0.8]
[I 2024-02-11 16:35:39,082] Trial 50 finished with value: 75.1 and parameters: {'drop0': 0.0, 'drop1': 0.6, 'drop2': 0.7, 'drop3': 0.6, 'drop4': 0.4, 'drop5': 0.1, 'drop6': 0.4, 'drop7': 0.8, 'adjdrop': 0.6}
200 test loss: 0.72162, Val acc: 74.00, Test acc: 75.10
adjdrop: 0.1, drop: [0.7, 0.5, 0.7, 0.6, 0.2, 0.0, 0.4, 0.8]
[I 2024-02-11 16:36:09,096] Trial 51 finished with value: 76.1 and parameters: {'drop0': 0.7, 'drop1': 0.5, 'drop2': 0.7, 'drop3': 0.6, 'drop4': 0.2, 'drop5': 0.0, 'drop6': 0.4, 'drop7': 0.8, 'adjdrop': 0.1}
200 test loss: 0.08010, Val acc: 75.00, Test acc: 76.10
adjdrop: 0.2, drop: [0.8, 0.5, 0.7, 0.6, 0.2, 0.1, 0.4, 0.8]
[I 2024-02-11 16:36:40,555] Trial 52 finished with value: 79.9 and parameters: {'drop0': 0.8, 'drop1': 0.5, 'drop2': 0.7, 'drop3': 0.6, 'drop4': 0.2, 'drop5': 0.1, 'drop6': 0.4, 'drop7': 0.8, 'adjdrop': 0.2}
200 test loss: 0.17477, Val acc: 76.20, Test acc: 79.90
adjdrop: 0.2, drop: [0.8, 0.6, 0.8, 0.7, 0.0, 0.2, 0.5, 0.7]
[I 2024-02-11 16:37:12,770] Trial 53 finished with value: 80.1 and parameters: {'drop0': 0.8, 'drop1': 0.6, 'drop2': 0.8, 'drop3': 0.7, 'drop4': 0.0, 'drop5': 0.2, 'drop6': 0.5, 'drop7': 0.7, 'adjdrop': 0.2}
200 test loss: 0.16912, Val acc: 76.20, Test acc: 80.10
adjdrop: 0.3, drop: [0.8, 0.4, 0.8, 0.7, 0.0, 0.1, 0.6, 0.7]
[I 2024-02-11 16:37:45,221] Trial 54 finished with value: 78.9 and parameters: {'drop0': 0.8, 'drop1': 0.4, 'drop2': 0.8, 'drop3': 0.7, 'drop4': 0.0, 'drop5': 0.1, 'drop6': 0.6, 'drop7': 0.7, 'adjdrop': 0.3}
200 test loss: 0.20938, Val acc: 78.20, Test acc: 78.90
adjdrop: 0.2, drop: [0.7, 0.6, 0.8, 0.7, 0.2, 0.1, 0.5, 0.6]
[I 2024-02-11 16:38:16,454] Trial 55 finished with value: 78.3 and parameters: {'drop0': 0.7, 'drop1': 0.6, 'drop2': 0.8, 'drop3': 0.7, 'drop4': 0.2, 'drop5': 0.1, 'drop6': 0.5, 'drop7': 0.6, 'adjdrop': 0.2}
200 test loss: 0.13094, Val acc: 79.20, Test acc: 78.30
adjdrop: 0.3, drop: [0.8, 0.5, 0.8, 0.7, 0.0, 0.2, 0.6, 0.7]
[I 2024-02-11 16:38:49,305] Trial 56 finished with value: 77.2 and parameters: {'drop0': 0.8, 'drop1': 0.5, 'drop2': 0.8, 'drop3': 0.7, 'drop4': 0.0, 'drop5': 0.2, 'drop6': 0.6, 'drop7': 0.7, 'adjdrop': 0.3}
200 test loss: 0.23956, Val acc: 76.20, Test acc: 77.20
adjdrop: 0.2, drop: [0.8, 0.7, 0.8, 0.8, 0.1, 0.3, 0.5, 0.7]
[I 2024-02-11 16:39:21,710] Trial 57 finished with value: 79.6 and parameters: {'drop0': 0.8, 'drop1': 0.7, 'drop2': 0.8, 'drop3': 0.8, 'drop4': 0.1, 'drop5': 0.3, 'drop6': 0.5, 'drop7': 0.7, 'adjdrop': 0.2}
200 test loss: 0.21545, Val acc: 78.60, Test acc: 79.60
adjdrop: 0.1, drop: [0.4, 0.8, 0.7, 0.6, 0.1, 0.8, 0.6, 0.6]
[I 2024-02-11 16:39:54,874] Trial 58 finished with value: 78.0 and parameters: {'drop0': 0.4, 'drop1': 0.8, 'drop2': 0.7, 'drop3': 0.6, 'drop4': 0.1, 'drop5': 0.8, 'drop6': 0.6, 'drop7': 0.6, 'adjdrop': 0.1}
200 test loss: 0.03400, Val acc: 75.20, Test acc: 78.00
adjdrop: 0.4, drop: [0.5, 0.6, 0.6, 0.4, 0.3, 0.1, 0.7, 0.4]
[I 2024-02-11 16:40:27,528] Trial 59 finished with value: 78.3 and parameters: {'drop0': 0.5, 'drop1': 0.6, 'drop2': 0.6, 'drop3': 0.4, 'drop4': 0.3, 'drop5': 0.1, 'drop6': 0.7, 'drop7': 0.4, 'adjdrop': 0.4}
200 test loss: 0.13454, Val acc: 76.40, Test acc: 78.30
adjdrop: 0.5, drop: [0.7, 0.7, 0.2, 0.5, 0.6, 0.5, 0.0, 0.5]
[I 2024-02-11 16:40:59,688] Trial 60 finished with value: 74.4 and parameters: {'drop0': 0.7, 'drop1': 0.7, 'drop2': 0.2, 'drop3': 0.5, 'drop4': 0.6, 'drop5': 0.5, 'drop6': 0.0, 'drop7': 0.5, 'adjdrop': 0.5}
200 test loss: 0.25392, Val acc: 74.80, Test acc: 74.40
adjdrop: 0.2, drop: [0.8, 0.6, 0.7, 0.6, 0.4, 0.2, 0.4, 0.8]
[I 2024-02-11 16:41:32,240] Trial 61 finished with value: 78.0 and parameters: {'drop0': 0.8, 'drop1': 0.6, 'drop2': 0.7, 'drop3': 0.6, 'drop4': 0.4, 'drop5': 0.2, 'drop6': 0.4, 'drop7': 0.8, 'adjdrop': 0.2}
200 test loss: 0.26177, Val acc: 78.00, Test acc: 78.00
adjdrop: 0.2, drop: [0.8, 0.5, 0.8, 0.6, 0.5, 0.2, 0.4, 0.8]
[I 2024-02-11 16:42:04,810] Trial 62 finished with value: 80.8 and parameters: {'drop0': 0.8, 'drop1': 0.5, 'drop2': 0.8, 'drop3': 0.6, 'drop4': 0.5, 'drop5': 0.2, 'drop6': 0.4, 'drop7': 0.8, 'adjdrop': 0.2}
200 test loss: 0.25427, Val acc: 79.40, Test acc: 80.80
adjdrop: 0.2, drop: [0.8, 0.5, 0.8, 0.7, 0.5, 0.2, 0.3, 0.7]
[I 2024-02-11 16:42:35,240] Trial 63 finished with value: 76.8 and parameters: {'drop0': 0.8, 'drop1': 0.5, 'drop2': 0.8, 'drop3': 0.7, 'drop4': 0.5, 'drop5': 0.2, 'drop6': 0.3, 'drop7': 0.7, 'adjdrop': 0.2}
200 test loss: 0.19693, Val acc: 76.00, Test acc: 76.80
adjdrop: 0.8, drop: [0.7, 0.4, 0.8, 0.5, 0.5, 0.1, 0.5, 0.8]
[I 2024-02-11 16:43:07,515] Trial 64 finished with value: 57.7 and parameters: {'drop0': 0.7, 'drop1': 0.4, 'drop2': 0.8, 'drop3': 0.5, 'drop4': 0.5, 'drop5': 0.1, 'drop6': 0.5, 'drop7': 0.8, 'adjdrop': 0.8}
200 test loss: 1.01659, Val acc: 60.60, Test acc: 57.70
adjdrop: 0.1, drop: [0.2, 0.5, 0.8, 0.8, 0.0, 0.0, 0.4, 0.7]
[I 2024-02-11 16:43:39,254] Trial 65 finished with value: 75.9 and parameters: {'drop0': 0.2, 'drop1': 0.5, 'drop2': 0.8, 'drop3': 0.8, 'drop4': 0.0, 'drop5': 0.0, 'drop6': 0.4, 'drop7': 0.7, 'adjdrop': 0.1}
200 test loss: 0.06653, Val acc: 76.60, Test acc: 75.90
adjdrop: 0.3, drop: [0.8, 0.6, 0.8, 0.7, 0.6, 0.3, 0.4, 0.8]
[I 2024-02-11 16:44:11,453] Trial 66 finished with value: 78.9 and parameters: {'drop0': 0.8, 'drop1': 0.6, 'drop2': 0.8, 'drop3': 0.7, 'drop4': 0.6, 'drop5': 0.3, 'drop6': 0.4, 'drop7': 0.8, 'adjdrop': 0.3}
200 test loss: 0.39874, Val acc: 76.60, Test acc: 78.90
adjdrop: 0.1, drop: [0.7, 0.7, 0.7, 0.6, 0.3, 0.1, 0.8, 0.8]
[I 2024-02-11 16:44:43,399] Trial 67 finished with value: 77.9 and parameters: {'drop0': 0.7, 'drop1': 0.7, 'drop2': 0.7, 'drop3': 0.6, 'drop4': 0.3, 'drop5': 0.1, 'drop6': 0.8, 'drop7': 0.8, 'adjdrop': 0.1}
200 test loss: 0.06573, Val acc: 77.20, Test acc: 77.90
adjdrop: 0.2, drop: [0.8, 0.5, 0.8, 0.5, 0.6, 0.2, 0.5, 0.7]
[I 2024-02-11 16:45:15,552] Trial 68 finished with value: 78.0 and parameters: {'drop0': 0.8, 'drop1': 0.5, 'drop2': 0.8, 'drop3': 0.5, 'drop4': 0.6, 'drop5': 0.2, 'drop6': 0.5, 'drop7': 0.7, 'adjdrop': 0.2}
200 test loss: 0.17730, Val acc: 77.80, Test acc: 78.00
adjdrop: 0.0, drop: [0.6, 0.4, 0.4, 0.7, 0.2, 0.6, 0.3, 0.6]
[I 2024-02-11 16:45:48,611] Trial 69 finished with value: 79.8 and parameters: {'drop0': 0.6, 'drop1': 0.4, 'drop2': 0.4, 'drop3': 0.7, 'drop4': 0.2, 'drop5': 0.6, 'drop6': 0.3, 'drop7': 0.6, 'adjdrop': 0.0}
200 test loss: 0.00823, Val acc: 78.00, Test acc: 79.80
adjdrop: 0.3, drop: [0.6, 0.4, 0.4, 0.6, 0.3, 0.6, 0.3, 0.6]
[I 2024-02-11 16:46:21,119] Trial 70 finished with value: 78.1 and parameters: {'drop0': 0.6, 'drop1': 0.4, 'drop2': 0.4, 'drop3': 0.6, 'drop4': 0.3, 'drop5': 0.6, 'drop6': 0.3, 'drop7': 0.6, 'adjdrop': 0.3}
200 test loss: 0.12537, Val acc: 76.20, Test acc: 78.10
adjdrop: 0.0, drop: [0.7, 0.4, 0.4, 0.7, 0.2, 0.7, 0.4, 0.6]
[I 2024-02-11 16:46:55,152] Trial 71 finished with value: 75.9 and parameters: {'drop0': 0.7, 'drop1': 0.4, 'drop2': 0.4, 'drop3': 0.7, 'drop4': 0.2, 'drop5': 0.7, 'drop6': 0.4, 'drop7': 0.6, 'adjdrop': 0.0}
200 test loss: 0.02195, Val acc: 75.20, Test acc: 75.90
adjdrop: 0.0, drop: [0.8, 0.5, 0.3, 0.8, 0.2, 0.7, 0.2, 0.5]
[I 2024-02-11 16:47:28,518] Trial 72 finished with value: 76.7 and parameters: {'drop0': 0.8, 'drop1': 0.5, 'drop2': 0.3, 'drop3': 0.8, 'drop4': 0.2, 'drop5': 0.7, 'drop6': 0.2, 'drop7': 0.5, 'adjdrop': 0.0}
200 test loss: 0.03344, Val acc: 76.40, Test acc: 76.70
adjdrop: 0.1, drop: [0.7, 0.4, 0.5, 0.7, 0.1, 0.6, 0.3, 0.7]
[I 2024-02-11 16:48:01,147] Trial 73 finished with value: 75.3 and parameters: {'drop0': 0.7, 'drop1': 0.4, 'drop2': 0.5, 'drop3': 0.7, 'drop4': 0.1, 'drop5': 0.6, 'drop6': 0.3, 'drop7': 0.7, 'adjdrop': 0.1}
200 test loss: 0.06139, Val acc: 74.00, Test acc: 75.30
adjdrop: 0.0, drop: [0.8, 0.6, 0.4, 0.6, 0.5, 0.5, 0.4, 0.8]
[I 2024-02-11 16:48:32,582] Trial 74 finished with value: 78.9 and parameters: {'drop0': 0.8, 'drop1': 0.6, 'drop2': 0.4, 'drop3': 0.6, 'drop4': 0.5, 'drop5': 0.5, 'drop6': 0.4, 'drop7': 0.8, 'adjdrop': 0.0}
200 test loss: 0.04562, Val acc: 76.00, Test acc: 78.90
adjdrop: 0.0, drop: [0.5, 0.5, 0.8, 0.7, 0.2, 0.8, 0.2, 0.2]
[I 2024-02-11 16:49:06,932] Trial 75 finished with value: 75.4 and parameters: {'drop0': 0.5, 'drop1': 0.5, 'drop2': 0.8, 'drop3': 0.7, 'drop4': 0.2, 'drop5': 0.8, 'drop6': 0.2, 'drop7': 0.2, 'adjdrop': 0.0}
200 test loss: 0.00907, Val acc: 74.80, Test acc: 75.40
adjdrop: 0.2, drop: [0.6, 0.7, 0.7, 0.7, 0.2, 0.4, 0.5, 0.7]
[I 2024-02-11 16:49:39,739] Trial 76 finished with value: 75.6 and parameters: {'drop0': 0.6, 'drop1': 0.7, 'drop2': 0.7, 'drop3': 0.7, 'drop4': 0.2, 'drop5': 0.4, 'drop6': 0.5, 'drop7': 0.7, 'adjdrop': 0.2}
200 test loss: 0.08215, Val acc: 76.00, Test acc: 75.60
adjdrop: 0.7, drop: [0.8, 0.6, 0.3, 0.8, 0.5, 0.1, 0.3, 0.6]
[I 2024-02-11 16:50:12,345] Trial 77 finished with value: 75.3 and parameters: {'drop0': 0.8, 'drop1': 0.6, 'drop2': 0.3, 'drop3': 0.8, 'drop4': 0.5, 'drop5': 0.1, 'drop6': 0.3, 'drop7': 0.6, 'adjdrop': 0.7}
200 test loss: 0.62237, Val acc: 72.80, Test acc: 75.30
adjdrop: 0.1, drop: [0.4, 0.8, 0.2, 0.3, 0.1, 0.3, 0.5, 0.8]
[I 2024-02-11 16:50:44,450] Trial 78 finished with value: 78.3 and parameters: {'drop0': 0.4, 'drop1': 0.8, 'drop2': 0.2, 'drop3': 0.3, 'drop4': 0.1, 'drop5': 0.3, 'drop6': 0.5, 'drop7': 0.8, 'adjdrop': 0.1}
200 test loss: 0.02550, Val acc: 76.80, Test acc: 78.30
adjdrop: 0.6, drop: [0.7, 0.5, 0.8, 0.1, 0.3, 0.0, 0.4, 0.7]
[I 2024-02-11 16:51:17,237] Trial 79 finished with value: 75.3 and parameters: {'drop0': 0.7, 'drop1': 0.5, 'drop2': 0.8, 'drop3': 0.1, 'drop4': 0.3, 'drop5': 0.0, 'drop6': 0.4, 'drop7': 0.7, 'adjdrop': 0.6}
200 test loss: 0.42006, Val acc: 78.20, Test acc: 75.30
adjdrop: 0.0, drop: [0.7, 0.7, 0.7, 0.6, 0.0, 0.2, 0.5, 0.5]
[I 2024-02-11 16:51:49,262] Trial 80 finished with value: 77.1 and parameters: {'drop0': 0.7, 'drop1': 0.7, 'drop2': 0.7, 'drop3': 0.6, 'drop4': 0.0, 'drop5': 0.2, 'drop6': 0.5, 'drop7': 0.5, 'adjdrop': 0.0}
200 test loss: 0.03258, Val acc: 77.00, Test acc: 77.10
adjdrop: 0.2, drop: [0.8, 0.6, 0.6, 0.6, 0.4, 0.2, 0.4, 0.8]
[I 2024-02-11 16:52:21,989] Trial 81 finished with value: 77.2 and parameters: {'drop0': 0.8, 'drop1': 0.6, 'drop2': 0.6, 'drop3': 0.6, 'drop4': 0.4, 'drop5': 0.2, 'drop6': 0.4, 'drop7': 0.8, 'adjdrop': 0.2}
200 test loss: 0.17856, Val acc: 75.40, Test acc: 77.20
adjdrop: 0.2, drop: [0.8, 0.6, 0.7, 0.6, 0.4, 0.2, 0.4, 0.8]
[I 2024-02-11 16:52:54,177] Trial 82 finished with value: 76.4 and parameters: {'drop0': 0.8, 'drop1': 0.6, 'drop2': 0.7, 'drop3': 0.6, 'drop4': 0.4, 'drop5': 0.2, 'drop6': 0.4, 'drop7': 0.8, 'adjdrop': 0.2}
200 test loss: 0.20105, Val acc: 74.80, Test acc: 76.40
adjdrop: 0.2, drop: [0.8, 0.6, 0.8, 0.5, 0.4, 0.3, 0.3, 0.8]
[I 2024-02-11 16:53:26,347] Trial 83 finished with value: 78.8 and parameters: {'drop0': 0.8, 'drop1': 0.6, 'drop2': 0.8, 'drop3': 0.5, 'drop4': 0.4, 'drop5': 0.3, 'drop6': 0.3, 'drop7': 0.8, 'adjdrop': 0.2}
200 test loss: 0.16526, Val acc: 77.60, Test acc: 78.80
adjdrop: 0.3, drop: [0.8, 0.7, 0.7, 0.7, 0.3, 0.2, 0.4, 0.8]
[I 2024-02-11 16:53:57,933] Trial 84 finished with value: 75.4 and parameters: {'drop0': 0.8, 'drop1': 0.7, 'drop2': 0.7, 'drop3': 0.7, 'drop4': 0.3, 'drop5': 0.2, 'drop6': 0.4, 'drop7': 0.8, 'adjdrop': 0.3}
200 test loss: 0.35577, Val acc: 74.60, Test acc: 75.40
adjdrop: 0.1, drop: [0.7, 0.5, 0.5, 0.7, 0.2, 0.1, 0.6, 0.7]
[I 2024-02-11 16:54:30,287] Trial 85 finished with value: 76.3 and parameters: {'drop0': 0.7, 'drop1': 0.5, 'drop2': 0.5, 'drop3': 0.7, 'drop4': 0.2, 'drop5': 0.1, 'drop6': 0.6, 'drop7': 0.7, 'adjdrop': 0.1}
200 test loss: 0.04874, Val acc: 75.40, Test acc: 76.30
adjdrop: 0.1, drop: [0.8, 0.6, 0.8, 0.0, 0.5, 0.1, 0.5, 0.6]
[I 2024-02-11 16:54:56,439] Trial 86 finished with value: 77.7 and parameters: {'drop0': 0.8, 'drop1': 0.6, 'drop2': 0.8, 'drop3': 0.0, 'drop4': 0.5, 'drop5': 0.1, 'drop6': 0.5, 'drop7': 0.6, 'adjdrop': 0.1}
200 test loss: 0.08963, Val acc: 77.20, Test acc: 77.70
adjdrop: 0.2, drop: [0.8, 0.3, 0.8, 0.6, 0.4, 0.1, 0.2, 0.8]
[I 2024-02-11 16:55:29,617] Trial 87 finished with value: 79.1 and parameters: {'drop0': 0.8, 'drop1': 0.3, 'drop2': 0.8, 'drop3': 0.6, 'drop4': 0.4, 'drop5': 0.1, 'drop6': 0.2, 'drop7': 0.8, 'adjdrop': 0.2}
200 test loss: 0.13323, Val acc: 76.80, Test acc: 79.10
adjdrop: 0.3, drop: [0.7, 0.7, 0.7, 0.6, 0.3, 0.2, 0.4, 0.7]
[I 2024-02-11 16:56:03,348] Trial 88 finished with value: 76.9 and parameters: {'drop0': 0.7, 'drop1': 0.7, 'drop2': 0.7, 'drop3': 0.6, 'drop4': 0.3, 'drop5': 0.2, 'drop6': 0.4, 'drop7': 0.7, 'adjdrop': 0.3}
200 test loss: 0.20874, Val acc: 76.60, Test acc: 76.90
adjdrop: 0.4, drop: [0.6, 0.4, 0.8, 0.4, 0.5, 0.0, 0.5, 0.8]
[I 2024-02-11 16:56:33,831] Trial 89 finished with value: 76.8 and parameters: {'drop0': 0.6, 'drop1': 0.4, 'drop2': 0.8, 'drop3': 0.4, 'drop4': 0.5, 'drop5': 0.0, 'drop6': 0.5, 'drop7': 0.8, 'adjdrop': 0.4}
200 test loss: 0.33949, Val acc: 75.60, Test acc: 76.80
adjdrop: 0.2, drop: [0.8, 0.6, 0.8, 0.7, 0.1, 0.4, 0.3, 0.7]
[I 2024-02-11 16:57:05,046] Trial 90 finished with value: 77.6 and parameters: {'drop0': 0.8, 'drop1': 0.6, 'drop2': 0.8, 'drop3': 0.7, 'drop4': 0.1, 'drop5': 0.4, 'drop6': 0.3, 'drop7': 0.7, 'adjdrop': 0.2}
200 test loss: 0.13983, Val acc: 77.80, Test acc: 77.60
adjdrop: 0.2, drop: [0.8, 0.7, 0.8, 0.8, 0.1, 0.3, 0.5, 0.6]
[I 2024-02-11 16:57:37,381] Trial 91 finished with value: 80.7 and parameters: {'drop0': 0.8, 'drop1': 0.7, 'drop2': 0.8, 'drop3': 0.8, 'drop4': 0.1, 'drop5': 0.3, 'drop6': 0.5, 'drop7': 0.6, 'adjdrop': 0.2}
200 test loss: 0.13691, Val acc: 78.60, Test acc: 80.70
adjdrop: 0.2, drop: [0.8, 0.7, 0.8, 0.8, 0.1, 0.3, 0.5, 0.6]
[I 2024-02-11 16:58:09,271] Trial 92 finished with value: 77.4 and parameters: {'drop0': 0.8, 'drop1': 0.7, 'drop2': 0.8, 'drop3': 0.8, 'drop4': 0.1, 'drop5': 0.3, 'drop6': 0.5, 'drop7': 0.6, 'adjdrop': 0.2}
200 test loss: 0.12958, Val acc: 78.00, Test acc: 77.40
adjdrop: 0.1, drop: [0.8, 0.7, 0.8, 0.8, 0.0, 0.3, 0.6, 0.6]
[I 2024-02-11 16:58:39,804] Trial 93 finished with value: 78.1 and parameters: {'drop0': 0.8, 'drop1': 0.7, 'drop2': 0.8, 'drop3': 0.8, 'drop4': 0.0, 'drop5': 0.3, 'drop6': 0.6, 'drop7': 0.6, 'adjdrop': 0.1}
200 test loss: 0.12346, Val acc: 77.40, Test acc: 78.10
adjdrop: 0.2, drop: [0.8, 0.8, 0.8, 0.8, 0.2, 0.2, 0.5, 0.6]
[I 2024-02-11 16:59:10,765] Trial 94 finished with value: 78.9 and parameters: {'drop0': 0.8, 'drop1': 0.8, 'drop2': 0.8, 'drop3': 0.8, 'drop4': 0.2, 'drop5': 0.2, 'drop6': 0.5, 'drop7': 0.6, 'adjdrop': 0.2}
200 test loss: 0.10568, Val acc: 77.00, Test acc: 78.90
adjdrop: 0.5, drop: [0.7, 0.6, 0.7, 0.7, 0.1, 0.4, 0.4, 0.5]
[I 2024-02-11 16:59:41,694] Trial 95 finished with value: 78.9 and parameters: {'drop0': 0.7, 'drop1': 0.6, 'drop2': 0.7, 'drop3': 0.7, 'drop4': 0.1, 'drop5': 0.4, 'drop6': 0.4, 'drop7': 0.5, 'adjdrop': 0.5}
200 test loss: 0.25447, Val acc: 79.80, Test acc: 78.90
adjdrop: 0.0, drop: [0.3, 0.7, 0.8, 0.6, 0.2, 0.5, 0.5, 0.7]
[I 2024-02-11 17:00:10,868] Trial 96 finished with value: 76.2 and parameters: {'drop0': 0.3, 'drop1': 0.7, 'drop2': 0.8, 'drop3': 0.6, 'drop4': 0.2, 'drop5': 0.5, 'drop6': 0.5, 'drop7': 0.7, 'adjdrop': 0.0}
200 test loss: 0.01756, Val acc: 74.80, Test acc: 76.20
adjdrop: 0.3, drop: [0.8, 0.6, 0.7, 0.5, 0.0, 0.2, 0.6, 0.8]
[I 2024-02-11 17:00:44,155] Trial 97 finished with value: 80.0 and parameters: {'drop0': 0.8, 'drop1': 0.6, 'drop2': 0.7, 'drop3': 0.5, 'drop4': 0.0, 'drop5': 0.2, 'drop6': 0.6, 'drop7': 0.8, 'adjdrop': 0.3}
200 test loss: 0.23734, Val acc: 78.00, Test acc: 80.00
adjdrop: 0.3, drop: [0.8, 0.5, 0.4, 0.5, 0.0, 0.2, 0.8, 0.4]
[I 2024-02-11 17:01:14,663] Trial 98 finished with value: 80.1 and parameters: {'drop0': 0.8, 'drop1': 0.5, 'drop2': 0.4, 'drop3': 0.5, 'drop4': 0.0, 'drop5': 0.2, 'drop6': 0.8, 'drop7': 0.4, 'adjdrop': 0.3}
200 test loss: 0.08382, Val acc: 77.80, Test acc: 80.10
adjdrop: 0.3, drop: [0.0, 0.5, 0.4, 0.4, 0.0, 0.3, 0.8, 0.4]
[I 2024-02-11 17:01:45,186] Trial 99 finished with value: 71.7 and parameters: {'drop0': 0.0, 'drop1': 0.5, 'drop2': 0.4, 'drop3': 0.4, 'drop4': 0.0, 'drop5': 0.3, 'drop6': 0.8, 'drop7': 0.4, 'adjdrop': 0.3}
200 test loss: 0.03296, Val acc: 71.20, Test acc: 71.70
{'drop0': 0.1, 'drop1': 0.6, 'drop2': 0.8, 'drop3': 0.1, 'drop4': 0.5, 'drop5': 0.4, 'drop6': 0.8, 'drop7': 0.6, 'adjdrop': 0.6}

In [None]:
# @title optuna result follow hmpnn

[I 2024-02-10 12:13:40,705] A new study created in memory with name: no-name-f4306189-0b95-467e-9bbc-9bdb32941a2c
adjdrop: 0.1, drop: [0.5, 0.0, 0.8, 0.0, 0.8, 0.4, 0.8, 0.4]
[I 2024-02-10 12:14:12,962] Trial 0 finished with value: 56.7 and parameters: {'drop0': 0.5, 'drop1': 0.0, 'drop2': 0.8, 'drop3': 0.0, 'drop4': 0.8, 'drop5': 0.4, 'drop6': 0.8, 'drop7': 0.4, 'adjdrop': 0.1}. Best is trial 0 with value: 56.7.
200 test loss: 0.13570, Val acc: 56.80, Test acc: 56.70
adjdrop: 0.3, drop: [0.1, 0.8, 0.4, 0.8, 0.2, 0.5, 0.0, 0.3]
[I 2024-02-10 12:15:02,359] Trial 1 finished with value: 60.7 and parameters: {'drop0': 0.1, 'drop1': 0.8, 'drop2': 0.4, 'drop3': 0.8, 'drop4': 0.2, 'drop5': 0.5, 'drop6': 0.0, 'drop7': 0.3, 'adjdrop': 0.3}. Best is trial 1 with value: 60.7.
200 test loss: 0.63495, Val acc: 62.60, Test acc: 60.70
adjdrop: 0.1, drop: [0.7, 0.3, 0.6, 0.2, 0.8, 0.4, 0.7, 0.3]
[I 2024-02-10 12:15:48,836] Trial 2 finished with value: 56.1 and parameters: {'drop0': 0.7, 'drop1': 0.3, 'drop2': 0.6, 'drop3': 0.2, 'drop4': 0.8, 'drop5': 0.4, 'drop6': 0.7, 'drop7': 0.3, 'adjdrop': 0.1}. Best is trial 1 with value: 60.7.
200 test loss: 0.08710, Val acc: 58.80, Test acc: 56.10
adjdrop: 0.5, drop: [0.1, 0.2, 0.5, 0.6, 0.4, 0.2, 0.7, 0.2]
[I 2024-02-10 12:16:36,232] Trial 3 finished with value: 54.0 and parameters: {'drop0': 0.1, 'drop1': 0.2, 'drop2': 0.5, 'drop3': 0.6, 'drop4': 0.4, 'drop5': 0.2, 'drop6': 0.7, 'drop7': 0.2, 'adjdrop': 0.5}. Best is trial 1 with value: 60.7.
200 test loss: 0.22353, Val acc: 57.80, Test acc: 54.00
adjdrop: 0.4, drop: [0.8, 0.3, 0.1, 0.0, 0.8, 0.5, 0.6, 0.1]
[I 2024-02-10 12:17:14,849] Trial 4 finished with value: 58.6 and parameters: {'drop0': 0.8, 'drop1': 0.3, 'drop2': 0.1, 'drop3': 0.0, 'drop4': 0.8, 'drop5': 0.5, 'drop6': 0.6, 'drop7': 0.1, 'adjdrop': 0.4}. Best is trial 1 with value: 60.7.
200 test loss: 0.02035, Val acc: 62.60, Test acc: 58.60
adjdrop: 0.6, drop: [0.2, 0.0, 0.3, 0.5, 0.6, 0.0, 0.1, 0.8]
[I 2024-02-10 12:17:56,224] Trial 5 finished with value: 44.9 and parameters: {'drop0': 0.2, 'drop1': 0.0, 'drop2': 0.3, 'drop3': 0.5, 'drop4': 0.6, 'drop5': 0.0, 'drop6': 0.1, 'drop7': 0.8, 'adjdrop': 0.6}. Best is trial 1 with value: 60.7.
200 test loss: 1.18217, Val acc: 43.00, Test acc: 44.90
adjdrop: 0.1, drop: [0.5, 0.5, 0.4, 0.7, 0.7, 0.3, 0.3, 0.8]
[I 2024-02-10 12:18:45,325] Trial 6 finished with value: 24.4 and parameters: {'drop0': 0.5, 'drop1': 0.5, 'drop2': 0.4, 'drop3': 0.7, 'drop4': 0.7, 'drop5': 0.3, 'drop6': 0.3, 'drop7': 0.8, 'adjdrop': 0.1}. Best is trial 1 with value: 60.7.
200 test loss: 1.79269, Val acc: 24.80, Test acc: 24.40
adjdrop: 0.3, drop: [0.5, 0.6, 0.8, 0.4, 0.7, 0.6, 0.6, 0.2]
[I 2024-02-10 12:19:31,406] Trial 7 finished with value: 57.4 and parameters: {'drop0': 0.5, 'drop1': 0.6, 'drop2': 0.8, 'drop3': 0.4, 'drop4': 0.7, 'drop5': 0.6, 'drop6': 0.6, 'drop7': 0.2, 'adjdrop': 0.3}. Best is trial 1 with value: 60.7.
200 test loss: 0.10767, Val acc: 60.00, Test acc: 57.40
adjdrop: 0.5, drop: [0.6, 0.6, 0.7, 0.1, 0.0, 0.7, 0.5, 0.7]
[I 2024-02-10 12:20:19,178] Trial 8 finished with value: 53.8 and parameters: {'drop0': 0.6, 'drop1': 0.6, 'drop2': 0.7, 'drop3': 0.1, 'drop4': 0.0, 'drop5': 0.7, 'drop6': 0.5, 'drop7': 0.7, 'adjdrop': 0.5}. Best is trial 1 with value: 60.7.
200 test loss: 0.75113, Val acc: 56.20, Test acc: 53.80
adjdrop: 0.7, drop: [0.1, 0.7, 0.7, 0.7, 0.2, 0.4, 0.2, 0.2]
[I 2024-02-10 12:21:06,567] Trial 9 finished with value: 57.5 and parameters: {'drop0': 0.1, 'drop1': 0.7, 'drop2': 0.7, 'drop3': 0.7, 'drop4': 0.2, 'drop5': 0.4, 'drop6': 0.2, 'drop7': 0.2, 'adjdrop': 0.7}. Best is trial 1 with value: 60.7.
200 test loss: 0.40385, Val acc: 59.60, Test acc: 57.50
adjdrop: 0.3, drop: [0.3, 0.8, 0.0, 0.8, 0.3, 0.7, 0.0, 0.5]
[I 2024-02-10 12:21:53,949] Trial 10 finished with value: 65.0 and parameters: {'drop0': 0.3, 'drop1': 0.8, 'drop2': 0.0, 'drop3': 0.8, 'drop4': 0.3, 'drop5': 0.7, 'drop6': 0.0, 'drop7': 0.5, 'adjdrop': 0.3}. Best is trial 10 with value: 65.0.
200 test loss: 0.68127, Val acc: 63.20, Test acc: 65.00
adjdrop: 0.3, drop: [0.3, 0.8, 0.0, 0.8, 0.3, 0.8, 0.0, 0.6]
[I 2024-02-10 12:22:43,236] Trial 11 finished with value: 57.0 and parameters: {'drop0': 0.3, 'drop1': 0.8, 'drop2': 0.0, 'drop3': 0.8, 'drop4': 0.3, 'drop5': 0.8, 'drop6': 0.0, 'drop7': 0.6, 'adjdrop': 0.3}. Best is trial 10 with value: 65.0.
200 test loss: 0.88343, Val acc: 56.60, Test acc: 57.00
adjdrop: 0.3, drop: [0.0, 0.8, 0.2, 0.8, 0.1, 0.6, 0.0, 0.5]
[I 2024-02-10 12:23:29,400] Trial 12 finished with value: 57.8 and parameters: {'drop0': 0.0, 'drop1': 0.8, 'drop2': 0.2, 'drop3': 0.8, 'drop4': 0.1, 'drop5': 0.6, 'drop6': 0.0, 'drop7': 0.5, 'adjdrop': 0.3}. Best is trial 10 with value: 65.0.
200 test loss: 0.94430, Val acc: 56.20, Test acc: 57.80
adjdrop: 0.8, drop: [0.3, 0.8, 0.0, 0.4, 0.4, 0.8, 0.2, 0.0]
[I 2024-02-10 12:24:20,735] Trial 13 finished with value: 34.4 and parameters: {'drop0': 0.3, 'drop1': 0.8, 'drop2': 0.0, 'drop3': 0.4, 'drop4': 0.4, 'drop5': 0.8, 'drop6': 0.2, 'drop7': 0.0, 'adjdrop': 0.8}. Best is trial 10 with value: 65.0.
200 test loss: 0.02965, Val acc: 33.80, Test acc: 34.40
adjdrop: 0.0, drop: [0.3, 0.6, 0.3, 0.6, 0.2, 0.6, 0.4, 0.4]
[I 2024-02-10 12:25:09,625] Trial 14 finished with value: 61.5 and parameters: {'drop0': 0.3, 'drop1': 0.6, 'drop2': 0.3, 'drop3': 0.6, 'drop4': 0.2, 'drop5': 0.6, 'drop6': 0.4, 'drop7': 0.4, 'adjdrop': 0.0}. Best is trial 10 with value: 65.0.
200 test loss: 0.26754, Val acc: 62.60, Test acc: 61.50
adjdrop: 0.0, drop: [0.3, 0.6, 0.2, 0.6, 0.5, 0.7, 0.4, 0.5]
[I 2024-02-10 12:25:58,462] Trial 15 finished with value: 57.6 and parameters: {'drop0': 0.3, 'drop1': 0.6, 'drop2': 0.2, 'drop3': 0.6, 'drop4': 0.5, 'drop5': 0.7, 'drop6': 0.4, 'drop7': 0.5, 'adjdrop': 0.0}. Best is trial 10 with value: 65.0.
200 test loss: 0.50858, Val acc: 57.00, Test acc: 57.60
adjdrop: 0.0, drop: [0.4, 0.5, 0.2, 0.3, 0.2, 0.6, 0.4, 0.5]
[I 2024-02-10 12:26:46,329] Trial 16 finished with value: 68.3 and parameters: {'drop0': 0.4, 'drop1': 0.5, 'drop2': 0.2, 'drop3': 0.3, 'drop4': 0.2, 'drop5': 0.6, 'drop6': 0.4, 'drop7': 0.5, 'adjdrop': 0.0}. Best is trial 16 with value: 68.3.
200 test loss: 0.25448, Val acc: 67.80, Test acc: 68.30
adjdrop: 0.2, drop: [0.4, 0.4, 0.1, 0.2, 0.3, 0.7, 0.2, 0.6]
[I 2024-02-10 12:27:33,258] Trial 17 finished with value: 58.7 and parameters: {'drop0': 0.4, 'drop1': 0.4, 'drop2': 0.1, 'drop3': 0.2, 'drop4': 0.3, 'drop5': 0.7, 'drop6': 0.2, 'drop7': 0.6, 'adjdrop': 0.2}. Best is trial 16 with value: 68.3.
200 test loss: 0.36083, Val acc: 58.20, Test acc: 58.70
adjdrop: 0.2, drop: [0.4, 0.4, 0.1, 0.3, 0.0, 0.8, 0.3, 0.6]
[I 2024-02-10 12:28:20,170] Trial 18 finished with value: 67.6 and parameters: {'drop0': 0.4, 'drop1': 0.4, 'drop2': 0.1, 'drop3': 0.3, 'drop4': 0.0, 'drop5': 0.8, 'drop6': 0.3, 'drop7': 0.6, 'adjdrop': 0.2}. Best is trial 16 with value: 68.3.
200 test loss: 0.38354, Val acc: 67.20, Test acc: 67.60
adjdrop: 0.0, drop: [0.6, 0.4, 0.2, 0.3, 0.0, 0.8, 0.3, 0.7]
[I 2024-02-10 12:29:08,000] Trial 19 finished with value: 54.8 and parameters: {'drop0': 0.6, 'drop1': 0.4, 'drop2': 0.2, 'drop3': 0.3, 'drop4': 0.0, 'drop5': 0.8, 'drop6': 0.3, 'drop7': 0.7, 'adjdrop': 0.0}. Best is trial 16 with value: 68.3.
200 test loss: 0.69308, Val acc: 56.20, Test acc: 54.80
adjdrop: 0.2, drop: [0.4, 0.2, 0.1, 0.3, 0.1, 0.1, 0.5, 0.6]
[I 2024-02-10 12:29:55,115] Trial 20 finished with value: 62.9 and parameters: {'drop0': 0.4, 'drop1': 0.2, 'drop2': 0.1, 'drop3': 0.3, 'drop4': 0.1, 'drop5': 0.1, 'drop6': 0.5, 'drop7': 0.6, 'adjdrop': 0.2}. Best is trial 16 with value: 68.3.
200 test loss: 0.38862, Val acc: 62.60, Test acc: 62.90
adjdrop: 0.2, drop: [0.4, 0.5, 0.0, 0.4, 0.1, 0.7, 0.3, 0.5]
[I 2024-02-10 12:30:42,893] Trial 21 finished with value: 69.5 and parameters: {'drop0': 0.4, 'drop1': 0.5, 'drop2': 0.0, 'drop3': 0.4, 'drop4': 0.1, 'drop5': 0.7, 'drop6': 0.3, 'drop7': 0.5, 'adjdrop': 0.2}. Best is trial 21 with value: 69.5.
200 test loss: 0.22922, Val acc: 67.60, Test acc: 69.50
adjdrop: 0.2, drop: [0.4, 0.5, 0.1, 0.4, 0.1, 0.8, 0.3, 0.5]
[I 2024-02-10 12:31:33,582] Trial 22 finished with value: 65.5 and parameters: {'drop0': 0.4, 'drop1': 0.5, 'drop2': 0.1, 'drop3': 0.4, 'drop4': 0.1, 'drop5': 0.8, 'drop6': 0.3, 'drop7': 0.5, 'adjdrop': 0.2}. Best is trial 21 with value: 69.5.
200 test loss: 0.26512, Val acc: 65.40, Test acc: 65.50
adjdrop: 0.1, drop: [0.6, 0.5, 0.3, 0.3, 0.0, 0.6, 0.5, 0.7]
[I 2024-02-10 12:32:21,870] Trial 23 finished with value: 57.4 and parameters: {'drop0': 0.6, 'drop1': 0.5, 'drop2': 0.3, 'drop3': 0.3, 'drop4': 0.0, 'drop5': 0.6, 'drop6': 0.5, 'drop7': 0.7, 'adjdrop': 0.1}. Best is trial 21 with value: 69.5.
200 test loss: 0.59703, Val acc: 55.80, Test acc: 57.40
adjdrop: 0.2, drop: [0.5, 0.3, 0.0, 0.2, 0.1, 0.5, 0.3, 0.6]
[I 2024-02-10 12:33:03,797] Trial 24 finished with value: 76.4 and parameters: {'drop0': 0.5, 'drop1': 0.3, 'drop2': 0.0, 'drop3': 0.2, 'drop4': 0.1, 'drop5': 0.5, 'drop6': 0.3, 'drop7': 0.6, 'adjdrop': 0.2}. Best is trial 24 with value: 76.4.
200 test loss: 0.31320, Val acc: 75.00, Test acc: 76.40
adjdrop: 0.4, drop: [0.5, 0.3, 0.0, 0.2, 0.1, 0.5, 0.4, 0.4]
[I 2024-02-10 12:33:51,096] Trial 25 finished with value: 66.2 and parameters: {'drop0': 0.5, 'drop1': 0.3, 'drop2': 0.0, 'drop3': 0.2, 'drop4': 0.1, 'drop5': 0.5, 'drop6': 0.4, 'drop7': 0.4, 'adjdrop': 0.4}. Best is trial 24 with value: 76.4.
200 test loss: 0.13208, Val acc: 68.40, Test acc: 66.20
adjdrop: 0.0, drop: [0.7, 0.2, 0.2, 0.1, 0.2, 0.5, 0.4, 0.3]
[I 2024-02-10 12:34:39,058] Trial 26 finished with value: 61.8 and parameters: {'drop0': 0.7, 'drop1': 0.2, 'drop2': 0.2, 'drop3': 0.1, 'drop4': 0.2, 'drop5': 0.5, 'drop6': 0.4, 'drop7': 0.3, 'adjdrop': 0.0}. Best is trial 24 with value: 76.4.
200 test loss: 0.11273, Val acc: 63.00, Test acc: 61.80
adjdrop: 0.1, drop: [0.2, 0.1, 0.0, 0.5, 0.1, 0.6, 0.1, 0.5]
[I 2024-02-10 12:35:23,154] Trial 27 finished with value: 75.9 and parameters: {'drop0': 0.2, 'drop1': 0.1, 'drop2': 0.0, 'drop3': 0.5, 'drop4': 0.1, 'drop5': 0.6, 'drop6': 0.1, 'drop7': 0.5, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 0.27158, Val acc: 75.20, Test acc: 75.90
adjdrop: 0.2, drop: [0.2, 0.1, 0.0, 0.5, 0.1, 0.4, 0.1, 0.7]
[I 2024-02-10 12:36:09,440] Trial 28 finished with value: 70.4 and parameters: {'drop0': 0.2, 'drop1': 0.1, 'drop2': 0.0, 'drop3': 0.5, 'drop4': 0.1, 'drop5': 0.4, 'drop6': 0.1, 'drop7': 0.7, 'adjdrop': 0.2}. Best is trial 24 with value: 76.4.
200 test loss: 0.71531, Val acc: 68.80, Test acc: 70.40
adjdrop: 0.1, drop: [0.2, 0.0, 0.0, 0.5, 0.3, 0.3, 0.1, 0.7]
[I 2024-02-10 12:36:45,358] Trial 29 finished with value: 66.2 and parameters: {'drop0': 0.2, 'drop1': 0.0, 'drop2': 0.0, 'drop3': 0.5, 'drop4': 0.3, 'drop5': 0.3, 'drop6': 0.1, 'drop7': 0.7, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 0.59499, Val acc: 67.00, Test acc: 66.20
adjdrop: 0.4, drop: [0.0, 0.1, 0.1, 0.5, 0.4, 0.3, 0.1, 0.8]
[I 2024-02-10 12:37:23,062] Trial 30 finished with value: 54.8 and parameters: {'drop0': 0.0, 'drop1': 0.1, 'drop2': 0.1, 'drop3': 0.5, 'drop4': 0.4, 'drop5': 0.3, 'drop6': 0.1, 'drop7': 0.8, 'adjdrop': 0.4}. Best is trial 24 with value: 76.4.
200 test loss: 1.09412, Val acc: 52.20, Test acc: 54.80
adjdrop: 0.2, drop: [0.2, 0.1, 0.0, 0.5, 0.1, 0.4, 0.2, 0.6]
[I 2024-02-10 12:38:07,465] Trial 31 finished with value: 68.1 and parameters: {'drop0': 0.2, 'drop1': 0.1, 'drop2': 0.0, 'drop3': 0.5, 'drop4': 0.1, 'drop5': 0.4, 'drop6': 0.2, 'drop7': 0.6, 'adjdrop': 0.2}. Best is trial 24 with value: 76.4.
200 test loss: 0.38957, Val acc: 66.80, Test acc: 68.10
adjdrop: 0.1, drop: [0.2, 0.1, 0.0, 0.4, 0.1, 0.5, 0.1, 0.7]
[I 2024-02-10 12:38:50,670] Trial 32 finished with value: 69.7 and parameters: {'drop0': 0.2, 'drop1': 0.1, 'drop2': 0.0, 'drop3': 0.4, 'drop4': 0.1, 'drop5': 0.5, 'drop6': 0.1, 'drop7': 0.7, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 0.60596, Val acc: 67.00, Test acc: 69.70
adjdrop: 0.1, drop: [0.2, 0.1, 0.1, 0.6, 0.0, 0.5, 0.1, 0.7]
[I 2024-02-10 12:39:34,648] Trial 33 finished with value: 53.0 and parameters: {'drop0': 0.2, 'drop1': 0.1, 'drop2': 0.1, 'drop3': 0.6, 'drop4': 0.0, 'drop5': 0.5, 'drop6': 0.1, 'drop7': 0.7, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 0.83001, Val acc: 48.00, Test acc: 53.00
adjdrop: 0.1, drop: [0.1, 0.1, 0.5, 0.5, 0.2, 0.4, 0.1, 0.8]
[I 2024-02-10 12:40:17,888] Trial 34 finished with value: 48.4 and parameters: {'drop0': 0.1, 'drop1': 0.1, 'drop2': 0.5, 'drop3': 0.5, 'drop4': 0.2, 'drop5': 0.4, 'drop6': 0.1, 'drop7': 0.8, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 1.25998, Val acc: 49.40, Test acc: 48.40
adjdrop: 0.1, drop: [0.1, 0.0, 0.0, 0.7, 0.1, 0.5, 0.0, 0.6]
[I 2024-02-10 12:40:54,344] Trial 35 finished with value: 68.0 and parameters: {'drop0': 0.1, 'drop1': 0.0, 'drop2': 0.0, 'drop3': 0.7, 'drop4': 0.1, 'drop5': 0.5, 'drop6': 0.0, 'drop7': 0.6, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 0.61302, Val acc: 64.20, Test acc: 68.00
adjdrop: 0.2, drop: [0.2, 0.2, 0.1, 0.0, 0.0, 0.2, 0.2, 0.7]
[I 2024-02-10 12:41:33,980] Trial 36 finished with value: 66.9 and parameters: {'drop0': 0.2, 'drop1': 0.2, 'drop2': 0.1, 'drop3': 0.0, 'drop4': 0.0, 'drop5': 0.2, 'drop6': 0.2, 'drop7': 0.7, 'adjdrop': 0.2}. Best is trial 24 with value: 76.4.
200 test loss: 0.51550, Val acc: 70.60, Test acc: 66.90
adjdrop: 0.5, drop: [0.1, 0.1, 0.0, 0.6, 0.2, 0.5, 0.1, 0.3]
[I 2024-02-10 12:42:22,460] Trial 37 finished with value: 65.2 and parameters: {'drop0': 0.1, 'drop1': 0.1, 'drop2': 0.0, 'drop3': 0.6, 'drop4': 0.2, 'drop5': 0.5, 'drop6': 0.1, 'drop7': 0.3, 'adjdrop': 0.5}. Best is trial 24 with value: 76.4.
200 test loss: 0.21178, Val acc: 65.80, Test acc: 65.20
adjdrop: 0.1, drop: [0.0, 0.3, 0.3, 0.1, 0.1, 0.3, 0.1, 0.8]
[I 2024-02-10 12:43:05,347] Trial 38 finished with value: 64.7 and parameters: {'drop0': 0.0, 'drop1': 0.3, 'drop2': 0.3, 'drop3': 0.1, 'drop4': 0.1, 'drop5': 0.3, 'drop6': 0.1, 'drop7': 0.8, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 1.04958, Val acc: 66.40, Test acc: 64.70
adjdrop: 0.3, drop: [0.2, 0.0, 0.1, 0.4, 0.5, 0.4, 0.0, 0.6]
[I 2024-02-10 12:43:47,093] Trial 39 finished with value: 64.0 and parameters: {'drop0': 0.2, 'drop1': 0.0, 'drop2': 0.1, 'drop3': 0.4, 'drop4': 0.5, 'drop5': 0.4, 'drop6': 0.0, 'drop7': 0.6, 'adjdrop': 0.3}. Best is trial 24 with value: 76.4.
200 test loss: 0.42941, Val acc: 65.60, Test acc: 64.00
adjdrop: 0.4, drop: [0.5, 0.2, 0.5, 0.2, 0.2, 0.2, 0.2, 0.7]
[I 2024-02-10 12:44:35,569] Trial 40 finished with value: 58.7 and parameters: {'drop0': 0.5, 'drop1': 0.2, 'drop2': 0.5, 'drop3': 0.2, 'drop4': 0.2, 'drop5': 0.2, 'drop6': 0.2, 'drop7': 0.7, 'adjdrop': 0.4}. Best is trial 24 with value: 76.4.
200 test loss: 0.71107, Val acc: 56.80, Test acc: 58.70
adjdrop: 0.2, drop: [0.7, 0.3, 0.0, 0.5, 0.1, 0.6, 0.3, 0.5]
[I 2024-02-10 12:45:21,616] Trial 41 finished with value: 65.6 and parameters: {'drop0': 0.7, 'drop1': 0.3, 'drop2': 0.0, 'drop3': 0.5, 'drop4': 0.1, 'drop5': 0.6, 'drop6': 0.3, 'drop7': 0.5, 'adjdrop': 0.2}. Best is trial 24 with value: 76.4.
200 test loss: 0.32881, Val acc: 69.00, Test acc: 65.60
adjdrop: 0.1, drop: [0.3, 0.1, 0.0, 0.4, 0.0, 0.7, 0.2, 0.4]
[I 2024-02-10 12:46:09,397] Trial 42 finished with value: 72.0 and parameters: {'drop0': 0.3, 'drop1': 0.1, 'drop2': 0.0, 'drop3': 0.4, 'drop4': 0.0, 'drop5': 0.7, 'drop6': 0.2, 'drop7': 0.4, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 0.14230, Val acc: 68.60, Test acc: 72.00
adjdrop: 0.1, drop: [0.3, 0.1, 0.0, 0.4, 0.0, 0.5, 0.2, 0.4]
[I 2024-02-10 12:46:58,144] Trial 43 finished with value: 73.5 and parameters: {'drop0': 0.3, 'drop1': 0.1, 'drop2': 0.0, 'drop3': 0.4, 'drop4': 0.0, 'drop5': 0.5, 'drop6': 0.2, 'drop7': 0.4, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 0.11702, Val acc: 72.60, Test acc: 73.50
adjdrop: 0.0, drop: [0.3, 0.0, 0.1, 0.5, 0.0, 0.6, 0.2, 0.4]
[I 2024-02-10 12:47:31,549] Trial 44 finished with value: 71.4 and parameters: {'drop0': 0.3, 'drop1': 0.0, 'drop2': 0.1, 'drop3': 0.5, 'drop4': 0.0, 'drop5': 0.6, 'drop6': 0.2, 'drop7': 0.4, 'adjdrop': 0.0}. Best is trial 24 with value: 76.4.
200 test loss: 0.19875, Val acc: 72.20, Test acc: 71.40
adjdrop: 0.0, drop: [0.3, 0.0, 0.1, 0.6, 0.0, 0.7, 0.8, 0.4]
[I 2024-02-10 12:48:14,070] Trial 45 finished with value: 68.0 and parameters: {'drop0': 0.3, 'drop1': 0.0, 'drop2': 0.1, 'drop3': 0.6, 'drop4': 0.0, 'drop5': 0.7, 'drop6': 0.8, 'drop7': 0.4, 'adjdrop': 0.0}. Best is trial 24 with value: 76.4.
200 test loss: 0.38036, Val acc: 68.80, Test acc: 68.00
adjdrop: 0.0, drop: [0.3, 0.0, 0.0, 0.5, 0.0, 0.6, 0.2, 0.3]
[I 2024-02-10 12:48:55,510] Trial 46 finished with value: 67.9 and parameters: {'drop0': 0.3, 'drop1': 0.0, 'drop2': 0.0, 'drop3': 0.5, 'drop4': 0.0, 'drop5': 0.6, 'drop6': 0.2, 'drop7': 0.3, 'adjdrop': 0.0}. Best is trial 24 with value: 76.4.
200 test loss: 0.10584, Val acc: 65.00, Test acc: 67.90
adjdrop: 0.1, drop: [0.3, 0.2, 0.6, 0.4, 0.0, 0.6, 0.2, 0.4]
[I 2024-02-10 12:49:43,649] Trial 47 finished with value: 60.2 and parameters: {'drop0': 0.3, 'drop1': 0.2, 'drop2': 0.6, 'drop3': 0.4, 'drop4': 0.0, 'drop5': 0.6, 'drop6': 0.2, 'drop7': 0.4, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 0.18929, Val acc: 61.80, Test acc: 60.20
adjdrop: 0.0, drop: [0.5, 0.0, 0.2, 0.7, 0.0, 0.7, 0.3, 0.2]
[I 2024-02-10 12:50:26,564] Trial 48 finished with value: 63.8 and parameters: {'drop0': 0.5, 'drop1': 0.0, 'drop2': 0.2, 'drop3': 0.7, 'drop4': 0.0, 'drop5': 0.7, 'drop6': 0.3, 'drop7': 0.2, 'adjdrop': 0.0}. Best is trial 24 with value: 76.4.
200 test loss: 0.18558, Val acc: 65.40, Test acc: 63.80
adjdrop: 0.1, drop: [0.4, 0.2, 0.1, 0.3, 0.0, 0.5, 0.2, 0.4]
[I 2024-02-10 12:51:15,208] Trial 49 finished with value: 66.8 and parameters: {'drop0': 0.4, 'drop1': 0.2, 'drop2': 0.1, 'drop3': 0.3, 'drop4': 0.0, 'drop5': 0.5, 'drop6': 0.2, 'drop7': 0.4, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 0.24296, Val acc: 66.80, Test acc: 66.80
adjdrop: 0.0, drop: [0.3, 0.3, 0.8, 0.2, 0.0, 0.6, 0.3, 0.3]
[I 2024-02-10 12:52:03,220] Trial 50 finished with value: 56.9 and parameters: {'drop0': 0.3, 'drop1': 0.3, 'drop2': 0.8, 'drop3': 0.2, 'drop4': 0.0, 'drop5': 0.6, 'drop6': 0.3, 'drop7': 0.3, 'adjdrop': 0.0}. Best is trial 24 with value: 76.4.
200 test loss: 0.12039, Val acc: 59.80, Test acc: 56.90
adjdrop: 0.3, drop: [0.2, 0.1, 0.0, 0.5, 0.1, 0.4, 0.2, 0.4]
[I 2024-02-10 12:52:51,299] Trial 51 finished with value: 69.3 and parameters: {'drop0': 0.2, 'drop1': 0.1, 'drop2': 0.0, 'drop3': 0.5, 'drop4': 0.1, 'drop5': 0.4, 'drop6': 0.2, 'drop7': 0.4, 'adjdrop': 0.3}. Best is trial 24 with value: 76.4.
200 test loss: 0.18396, Val acc: 69.40, Test acc: 69.30
adjdrop: 0.1, drop: [0.3, 0.1, 0.0, 0.5, 0.1, 0.5, 0.1, 0.5]
[I 2024-02-10 12:53:39,210] Trial 52 finished with value: 68.6 and parameters: {'drop0': 0.3, 'drop1': 0.1, 'drop2': 0.0, 'drop3': 0.5, 'drop4': 0.1, 'drop5': 0.5, 'drop6': 0.1, 'drop7': 0.5, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 0.28961, Val acc: 69.20, Test acc: 68.60
adjdrop: 0.2, drop: [0.2, 0.1, 0.0, 0.4, 0.0, 0.6, 0.0, 0.4]
[I 2024-02-10 12:54:27,295] Trial 53 finished with value: 67.5 and parameters: {'drop0': 0.2, 'drop1': 0.1, 'drop2': 0.0, 'drop3': 0.4, 'drop4': 0.0, 'drop5': 0.6, 'drop6': 0.0, 'drop7': 0.4, 'adjdrop': 0.2}. Best is trial 24 with value: 76.4.
200 test loss: 0.11722, Val acc: 67.80, Test acc: 67.50
adjdrop: 0.0, drop: [0.5, 0.0, 0.1, 0.6, 0.7, 0.7, 0.2, 0.5]
[I 2024-02-10 12:55:10,077] Trial 54 finished with value: 70.1 and parameters: {'drop0': 0.5, 'drop1': 0.0, 'drop2': 0.1, 'drop3': 0.6, 'drop4': 0.7, 'drop5': 0.7, 'drop6': 0.2, 'drop7': 0.5, 'adjdrop': 0.0}. Best is trial 24 with value: 76.4.
200 test loss: 0.29716, Val acc: 71.80, Test acc: 70.10
adjdrop: 0.1, drop: [0.1, 0.2, 0.0, 0.4, 0.2, 0.5, 0.1, 0.3]
[I 2024-02-10 12:55:59,060] Trial 55 finished with value: 73.1 and parameters: {'drop0': 0.1, 'drop1': 0.2, 'drop2': 0.0, 'drop3': 0.4, 'drop4': 0.2, 'drop5': 0.5, 'drop6': 0.1, 'drop7': 0.3, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 0.10616, Val acc: 74.00, Test acc: 73.10
adjdrop: 0.0, drop: [0.1, 0.2, 0.1, 0.4, 0.3, 0.5, 0.3, 0.1]
[I 2024-02-10 12:56:46,988] Trial 56 finished with value: 66.8 and parameters: {'drop0': 0.1, 'drop1': 0.2, 'drop2': 0.1, 'drop3': 0.4, 'drop4': 0.3, 'drop5': 0.5, 'drop6': 0.3, 'drop7': 0.1, 'adjdrop': 0.0}. Best is trial 24 with value: 76.4.
200 test loss: 0.03948, Val acc: 68.00, Test acc: 66.80
adjdrop: 0.7, drop: [0.3, 0.2, 0.0, 0.1, 0.2, 0.7, 0.2, 0.3]
[I 2024-02-10 12:57:34,834] Trial 57 finished with value: 57.4 and parameters: {'drop0': 0.3, 'drop1': 0.2, 'drop2': 0.0, 'drop3': 0.1, 'drop4': 0.2, 'drop5': 0.7, 'drop6': 0.2, 'drop7': 0.3, 'adjdrop': 0.7}. Best is trial 24 with value: 76.4.
200 test loss: 0.08437, Val acc: 58.20, Test acc: 57.40
adjdrop: 0.1, drop: [0.4, 0.4, 0.1, 0.3, 0.0, 0.6, 0.1, 0.2]
[I 2024-02-10 12:58:23,807] Trial 58 finished with value: 61.3 and parameters: {'drop0': 0.4, 'drop1': 0.4, 'drop2': 0.1, 'drop3': 0.3, 'drop4': 0.0, 'drop5': 0.6, 'drop6': 0.1, 'drop7': 0.2, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 0.14342, Val acc: 60.40, Test acc: 61.30
adjdrop: 0.1, drop: [0.1, 0.3, 0.2, 0.4, 0.1, 0.5, 0.2, 0.3]
[I 2024-02-10 12:59:15,487] Trial 59 finished with value: 68.0 and parameters: {'drop0': 0.1, 'drop1': 0.3, 'drop2': 0.2, 'drop3': 0.4, 'drop4': 0.1, 'drop5': 0.5, 'drop6': 0.2, 'drop7': 0.3, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 0.13905, Val acc: 67.40, Test acc: 68.00
adjdrop: 0.3, drop: [0.6, 0.2, 0.0, 0.3, 0.3, 0.6, 0.3, 0.4]
[I 2024-02-10 13:00:03,874] Trial 60 finished with value: 65.9 and parameters: {'drop0': 0.6, 'drop1': 0.2, 'drop2': 0.0, 'drop3': 0.3, 'drop4': 0.3, 'drop5': 0.6, 'drop6': 0.3, 'drop7': 0.4, 'adjdrop': 0.3}. Best is trial 24 with value: 76.4.
200 test loss: 0.20094, Val acc: 63.80, Test acc: 65.90
adjdrop: 0.2, drop: [0.2, 0.1, 0.0, 0.6, 0.1, 0.4, 0.1, 0.4]
[I 2024-02-10 13:00:53,375] Trial 61 finished with value: 72.9 and parameters: {'drop0': 0.2, 'drop1': 0.1, 'drop2': 0.0, 'drop3': 0.6, 'drop4': 0.1, 'drop5': 0.4, 'drop6': 0.1, 'drop7': 0.4, 'adjdrop': 0.2}. Best is trial 24 with value: 76.4.
200 test loss: 0.20338, Val acc: 72.20, Test acc: 72.90
adjdrop: 0.2, drop: [0.3, 0.1, 0.0, 0.6, 0.2, 0.4, 0.0, 0.4]
[I 2024-02-10 13:01:43,168] Trial 62 finished with value: 58.6 and parameters: {'drop0': 0.3, 'drop1': 0.1, 'drop2': 0.0, 'drop3': 0.6, 'drop4': 0.2, 'drop5': 0.4, 'drop6': 0.0, 'drop7': 0.4, 'adjdrop': 0.2}. Best is trial 24 with value: 76.4.
200 test loss: 0.30712, Val acc: 61.20, Test acc: 58.60
adjdrop: 0.1, drop: [0.8, 0.1, 0.4, 0.7, 0.8, 0.5, 0.1, 0.5]
[I 2024-02-10 13:02:31,695] Trial 63 finished with value: 58.5 and parameters: {'drop0': 0.8, 'drop1': 0.1, 'drop2': 0.4, 'drop3': 0.7, 'drop4': 0.8, 'drop5': 0.5, 'drop6': 0.1, 'drop7': 0.5, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 0.77887, Val acc: 58.60, Test acc: 58.50
adjdrop: 0.0, drop: [0.1, 0.0, 0.0, 0.5, 0.1, 0.4, 0.2, 0.4]
[I 2024-02-10 13:03:13,405] Trial 64 finished with value: 69.7 and parameters: {'drop0': 0.1, 'drop1': 0.0, 'drop2': 0.0, 'drop3': 0.5, 'drop4': 0.1, 'drop5': 0.4, 'drop6': 0.2, 'drop7': 0.4, 'adjdrop': 0.0}. Best is trial 24 with value: 76.4.
200 test loss: 0.18318, Val acc: 67.40, Test acc: 69.70
adjdrop: 0.2, drop: [0.2, 0.2, 0.1, 0.4, 0.0, 0.0, 0.1, 0.3]
[I 2024-02-10 13:03:56,476] Trial 65 finished with value: 67.7 and parameters: {'drop0': 0.2, 'drop1': 0.2, 'drop2': 0.1, 'drop3': 0.4, 'drop4': 0.0, 'drop5': 0.0, 'drop6': 0.1, 'drop7': 0.3, 'adjdrop': 0.2}. Best is trial 24 with value: 76.4.
200 test loss: 0.21206, Val acc: 69.80, Test acc: 67.70
adjdrop: 0.1, drop: [0.4, 0.3, 0.0, 0.6, 0.1, 0.3, 0.0, 0.5]
[I 2024-02-10 13:04:40,704] Trial 66 finished with value: 72.0 and parameters: {'drop0': 0.4, 'drop1': 0.3, 'drop2': 0.0, 'drop3': 0.6, 'drop4': 0.1, 'drop5': 0.3, 'drop6': 0.0, 'drop7': 0.5, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 0.24495, Val acc: 72.40, Test acc: 72.00
adjdrop: 0.2, drop: [0.4, 0.3, 0.0, 0.6, 0.5, 0.3, 0.0, 0.5]
[I 2024-02-10 13:05:23,873] Trial 67 finished with value: 67.7 and parameters: {'drop0': 0.4, 'drop1': 0.3, 'drop2': 0.0, 'drop3': 0.6, 'drop4': 0.5, 'drop5': 0.3, 'drop6': 0.0, 'drop7': 0.5, 'adjdrop': 0.2}. Best is trial 24 with value: 76.4.
200 test loss: 0.38382, Val acc: 67.80, Test acc: 67.70
adjdrop: 0.1, drop: [0.5, 0.4, 0.7, 0.7, 0.2, 0.3, 0.0, 0.6]
[I 2024-02-10 13:06:09,618] Trial 68 finished with value: 59.7 and parameters: {'drop0': 0.5, 'drop1': 0.4, 'drop2': 0.7, 'drop3': 0.7, 'drop4': 0.2, 'drop5': 0.3, 'drop6': 0.0, 'drop7': 0.6, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 0.96103, Val acc: 57.60, Test acc: 59.70
adjdrop: 0.2, drop: [0.4, 0.3, 0.0, 0.6, 0.1, 0.1, 0.0, 0.5]
[I 2024-02-10 13:06:54,249] Trial 69 finished with value: 73.4 and parameters: {'drop0': 0.4, 'drop1': 0.3, 'drop2': 0.0, 'drop3': 0.6, 'drop4': 0.1, 'drop5': 0.1, 'drop6': 0.0, 'drop7': 0.5, 'adjdrop': 0.2}. Best is trial 24 with value: 76.4.
200 test loss: 0.36762, Val acc: 75.40, Test acc: 73.40
adjdrop: 0.3, drop: [0.0, 0.4, 0.0, 0.0, 0.1, 0.0, 0.1, 0.2]
[I 2024-02-10 13:07:24,578] Trial 70 finished with value: 65.5 and parameters: {'drop0': 0.0, 'drop1': 0.4, 'drop2': 0.0, 'drop3': 0.0, 'drop4': 0.1, 'drop5': 0.0, 'drop6': 0.1, 'drop7': 0.2, 'adjdrop': 0.3}. Best is trial 24 with value: 76.4.
200 test loss: 0.02181, Val acc: 68.00, Test acc: 65.50
adjdrop: 0.2, drop: [0.4, 0.3, 0.0, 0.6, 0.1, 0.1, 0.0, 0.5]
[I 2024-02-10 13:08:09,703] Trial 71 finished with value: 71.9 and parameters: {'drop0': 0.4, 'drop1': 0.3, 'drop2': 0.0, 'drop3': 0.6, 'drop4': 0.1, 'drop5': 0.1, 'drop6': 0.0, 'drop7': 0.5, 'adjdrop': 0.2}. Best is trial 24 with value: 76.4.
200 test loss: 0.38440, Val acc: 71.00, Test acc: 71.90
adjdrop: 0.1, drop: [0.4, 0.2, 0.0, 0.6, 0.2, 0.2, 0.0, 0.6]
[I 2024-02-10 13:08:54,132] Trial 72 finished with value: 70.9 and parameters: {'drop0': 0.4, 'drop1': 0.2, 'drop2': 0.0, 'drop3': 0.6, 'drop4': 0.2, 'drop5': 0.2, 'drop6': 0.0, 'drop7': 0.6, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 0.47277, Val acc: 68.00, Test acc: 70.90
adjdrop: 0.2, drop: [0.4, 0.3, 0.0, 0.7, 0.1, 0.1, 0.0, 0.5]
[I 2024-02-10 13:09:33,816] Trial 73 finished with value: 66.3 and parameters: {'drop0': 0.4, 'drop1': 0.3, 'drop2': 0.0, 'drop3': 0.7, 'drop4': 0.1, 'drop5': 0.1, 'drop6': 0.0, 'drop7': 0.5, 'adjdrop': 0.2}. Best is trial 24 with value: 76.4.
200 test loss: 0.48424, Val acc: 66.60, Test acc: 66.30
adjdrop: 0.1, drop: [0.3, 0.3, 0.1, 0.8, 0.1, 0.4, 0.1, 0.5]
[I 2024-02-10 13:10:20,524] Trial 74 finished with value: 54.7 and parameters: {'drop0': 0.3, 'drop1': 0.3, 'drop2': 0.1, 'drop3': 0.8, 'drop4': 0.1, 'drop5': 0.4, 'drop6': 0.1, 'drop7': 0.5, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 0.67350, Val acc: 54.00, Test acc: 54.70
adjdrop: 0.5, drop: [0.5, 0.1, 0.0, 0.6, 0.1, 0.3, 0.1, 0.4]
[I 2024-02-10 13:11:07,000] Trial 75 finished with value: 61.1 and parameters: {'drop0': 0.5, 'drop1': 0.1, 'drop2': 0.0, 'drop3': 0.6, 'drop4': 0.1, 'drop5': 0.3, 'drop6': 0.1, 'drop7': 0.4, 'adjdrop': 0.5}. Best is trial 24 with value: 76.4.
200 test loss: 0.33474, Val acc: 60.80, Test acc: 61.10
adjdrop: 0.1, drop: [0.1, 0.2, 0.0, 0.5, 0.2, 0.1, 0.0, 0.5]
[I 2024-02-10 13:11:52,469] Trial 76 finished with value: 69.0 and parameters: {'drop0': 0.1, 'drop1': 0.2, 'drop2': 0.0, 'drop3': 0.5, 'drop4': 0.2, 'drop5': 0.1, 'drop6': 0.0, 'drop7': 0.5, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 0.27275, Val acc: 69.80, Test acc: 69.00
adjdrop: 0.2, drop: [0.2, 0.7, 0.1, 0.6, 0.1, 0.2, 0.1, 0.6]
[I 2024-02-10 13:12:40,814] Trial 77 finished with value: 69.3 and parameters: {'drop0': 0.2, 'drop1': 0.7, 'drop2': 0.1, 'drop3': 0.6, 'drop4': 0.1, 'drop5': 0.2, 'drop6': 0.1, 'drop7': 0.6, 'adjdrop': 0.2}. Best is trial 24 with value: 76.4.
200 test loss: 0.52095, Val acc: 71.00, Test acc: 69.30
adjdrop: 0.3, drop: [0.4, 0.4, 0.0, 0.4, 0.0, 0.4, 0.0, 0.4]
[I 2024-02-10 13:13:27,080] Trial 78 finished with value: 69.8 and parameters: {'drop0': 0.4, 'drop1': 0.4, 'drop2': 0.0, 'drop3': 0.4, 'drop4': 0.0, 'drop5': 0.4, 'drop6': 0.0, 'drop7': 0.4, 'adjdrop': 0.3}. Best is trial 24 with value: 76.4.
200 test loss: 0.14717, Val acc: 70.40, Test acc: 69.80
adjdrop: 0.2, drop: [0.3, 0.1, 0.0, 0.3, 0.2, 0.5, 0.1, 0.5]
[I 2024-02-10 13:14:13,992] Trial 79 finished with value: 71.9 and parameters: {'drop0': 0.3, 'drop1': 0.1, 'drop2': 0.0, 'drop3': 0.3, 'drop4': 0.2, 'drop5': 0.5, 'drop6': 0.1, 'drop7': 0.5, 'adjdrop': 0.2}. Best is trial 24 with value: 76.4.
200 test loss: 0.21929, Val acc: 72.40, Test acc: 71.90
adjdrop: 0.1, drop: [0.6, 0.2, 0.1, 0.5, 0.0, 0.8, 0.7, 0.3]
[I 2024-02-10 13:15:01,247] Trial 80 finished with value: 64.4 and parameters: {'drop0': 0.6, 'drop1': 0.2, 'drop2': 0.1, 'drop3': 0.5, 'drop4': 0.0, 'drop5': 0.8, 'drop6': 0.7, 'drop7': 0.3, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 0.17157, Val acc: 68.00, Test acc: 64.40
adjdrop: 0.2, drop: [0.4, 0.3, 0.0, 0.6, 0.1, 0.1, 0.0, 0.5]
[I 2024-02-10 13:15:45,629] Trial 81 finished with value: 69.4 and parameters: {'drop0': 0.4, 'drop1': 0.3, 'drop2': 0.0, 'drop3': 0.6, 'drop4': 0.1, 'drop5': 0.1, 'drop6': 0.0, 'drop7': 0.5, 'adjdrop': 0.2}. Best is trial 24 with value: 76.4.
200 test loss: 0.33904, Val acc: 71.00, Test acc: 69.40
adjdrop: 0.2, drop: [0.5, 0.3, 0.0, 0.6, 0.1, 0.1, 0.0, 0.5]
[I 2024-02-10 13:16:31,458] Trial 82 finished with value: 61.7 and parameters: {'drop0': 0.5, 'drop1': 0.3, 'drop2': 0.0, 'drop3': 0.6, 'drop4': 0.1, 'drop5': 0.1, 'drop6': 0.0, 'drop7': 0.5, 'adjdrop': 0.2}. Best is trial 24 with value: 76.4.
200 test loss: 0.34663, Val acc: 59.20, Test acc: 61.70
adjdrop: 0.1, drop: [0.4, 0.3, 0.0, 0.7, 0.1, 0.0, 0.6, 0.4]
[I 2024-02-10 13:17:16,741] Trial 83 finished with value: 70.4 and parameters: {'drop0': 0.4, 'drop1': 0.3, 'drop2': 0.0, 'drop3': 0.7, 'drop4': 0.1, 'drop5': 0.0, 'drop6': 0.6, 'drop7': 0.4, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 0.28581, Val acc: 71.00, Test acc: 70.40
adjdrop: 0.2, drop: [0.4, 0.4, 0.1, 0.6, 0.2, 0.1, 0.0, 0.6]
[I 2024-02-10 13:18:02,270] Trial 84 finished with value: 70.4 and parameters: {'drop0': 0.4, 'drop1': 0.4, 'drop2': 0.1, 'drop3': 0.6, 'drop4': 0.2, 'drop5': 0.1, 'drop6': 0.0, 'drop7': 0.6, 'adjdrop': 0.2}. Best is trial 24 with value: 76.4.
200 test loss: 0.53966, Val acc: 70.80, Test acc: 70.40
adjdrop: 0.1, drop: [0.3, 0.2, 0.0, 0.5, 0.0, 0.4, 0.1, 0.5]
[I 2024-02-10 13:18:49,068] Trial 85 finished with value: 69.6 and parameters: {'drop0': 0.3, 'drop1': 0.2, 'drop2': 0.0, 'drop3': 0.5, 'drop4': 0.0, 'drop5': 0.4, 'drop6': 0.1, 'drop7': 0.5, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 0.21151, Val acc: 69.40, Test acc: 69.60
adjdrop: 0.3, drop: [0.5, 0.3, 0.0, 0.6, 0.1, 0.7, 0.0, 0.6]
[I 2024-02-10 13:19:35,213] Trial 86 finished with value: 72.6 and parameters: {'drop0': 0.5, 'drop1': 0.3, 'drop2': 0.0, 'drop3': 0.6, 'drop4': 0.1, 'drop5': 0.7, 'drop6': 0.0, 'drop7': 0.6, 'adjdrop': 0.3}. Best is trial 24 with value: 76.4.
200 test loss: 0.47090, Val acc: 72.60, Test acc: 72.60
adjdrop: 0.3, drop: [0.5, 0.1, 0.1, 0.4, 0.1, 0.7, 0.1, 0.6]
[I 2024-02-10 13:20:22,955] Trial 87 finished with value: 66.0 and parameters: {'drop0': 0.5, 'drop1': 0.1, 'drop2': 0.1, 'drop3': 0.4, 'drop4': 0.1, 'drop5': 0.7, 'drop6': 0.1, 'drop7': 0.6, 'adjdrop': 0.3}. Best is trial 24 with value: 76.4.
200 test loss: 0.42546, Val acc: 67.80, Test acc: 66.00
adjdrop: 0.4, drop: [0.6, 0.2, 0.0, 0.7, 0.0, 0.7, 0.4, 0.6]
[I 2024-02-10 13:21:10,148] Trial 88 finished with value: 67.3 and parameters: {'drop0': 0.6, 'drop1': 0.2, 'drop2': 0.0, 'drop3': 0.7, 'drop4': 0.0, 'drop5': 0.7, 'drop6': 0.4, 'drop7': 0.6, 'adjdrop': 0.4}. Best is trial 24 with value: 76.4.
200 test loss: 0.77951, Val acc: 66.60, Test acc: 67.30
adjdrop: 0.3, drop: [0.5, 0.1, 0.2, 0.5, 0.1, 0.8, 0.3, 0.3]
[I 2024-02-10 13:21:55,247] Trial 89 finished with value: 61.2 and parameters: {'drop0': 0.5, 'drop1': 0.1, 'drop2': 0.2, 'drop3': 0.5, 'drop4': 0.1, 'drop5': 0.8, 'drop6': 0.3, 'drop7': 0.3, 'adjdrop': 0.3}. Best is trial 24 with value: 76.4.
200 test loss: 0.17964, Val acc: 61.80, Test acc: 61.20
adjdrop: 0.1, drop: [0.2, 0.3, 0.1, 0.3, 0.0, 0.5, 0.0, 0.4]
[I 2024-02-10 13:22:38,513] Trial 90 finished with value: 67.5 and parameters: {'drop0': 0.2, 'drop1': 0.3, 'drop2': 0.1, 'drop3': 0.3, 'drop4': 0.0, 'drop5': 0.5, 'drop6': 0.0, 'drop7': 0.4, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 0.12770, Val acc: 67.60, Test acc: 67.50
adjdrop: 0.4, drop: [0.4, 0.3, 0.0, 0.6, 0.1, 0.7, 0.0, 0.5]
[I 2024-02-10 13:23:24,075] Trial 91 finished with value: 71.5 and parameters: {'drop0': 0.4, 'drop1': 0.3, 'drop2': 0.0, 'drop3': 0.6, 'drop4': 0.1, 'drop5': 0.7, 'drop6': 0.0, 'drop7': 0.5, 'adjdrop': 0.4}. Best is trial 24 with value: 76.4.
200 test loss: 0.26368, Val acc: 70.00, Test acc: 71.50
adjdrop: 0.2, drop: [0.5, 0.4, 0.0, 0.6, 0.1, 0.6, 0.0, 0.6]
[I 2024-02-10 13:24:09,336] Trial 92 finished with value: 66.3 and parameters: {'drop0': 0.5, 'drop1': 0.4, 'drop2': 0.0, 'drop3': 0.6, 'drop4': 0.1, 'drop5': 0.6, 'drop6': 0.0, 'drop7': 0.6, 'adjdrop': 0.2}. Best is trial 24 with value: 76.4.
200 test loss: 0.54983, Val acc: 68.80, Test acc: 66.30
adjdrop: 0.3, drop: [0.4, 0.3, 0.0, 0.6, 0.1, 0.2, 0.0, 0.4]
[I 2024-02-10 13:24:53,819] Trial 93 finished with value: 70.4 and parameters: {'drop0': 0.4, 'drop1': 0.3, 'drop2': 0.0, 'drop3': 0.6, 'drop4': 0.1, 'drop5': 0.2, 'drop6': 0.0, 'drop7': 0.4, 'adjdrop': 0.3}. Best is trial 24 with value: 76.4.
200 test loss: 0.25499, Val acc: 68.60, Test acc: 70.40
adjdrop: 0.2, drop: [0.5, 0.3, 0.0, 0.7, 0.2, 0.5, 0.1, 0.5]
[I 2024-02-10 13:25:39,120] Trial 94 finished with value: 65.2 and parameters: {'drop0': 0.5, 'drop1': 0.3, 'drop2': 0.0, 'drop3': 0.7, 'drop4': 0.2, 'drop5': 0.5, 'drop6': 0.1, 'drop7': 0.5, 'adjdrop': 0.2}. Best is trial 24 with value: 76.4.
200 test loss: 0.50053, Val acc: 66.40, Test acc: 65.20
adjdrop: 0.3, drop: [0.3, 0.2, 0.0, 0.6, 0.0, 0.6, 0.2, 0.6]
[I 2024-02-10 13:26:24,471] Trial 95 finished with value: 59.7 and parameters: {'drop0': 0.3, 'drop1': 0.2, 'drop2': 0.0, 'drop3': 0.6, 'drop4': 0.0, 'drop5': 0.6, 'drop6': 0.2, 'drop7': 0.6, 'adjdrop': 0.3}. Best is trial 24 with value: 76.4.
200 test loss: 0.60145, Val acc: 59.20, Test acc: 59.70
adjdrop: 0.1, drop: [0.1, 0.1, 0.0, 0.5, 0.3, 0.5, 0.1, 0.4]
[I 2024-02-10 13:27:09,494] Trial 96 finished with value: 74.7 and parameters: {'drop0': 0.1, 'drop1': 0.1, 'drop2': 0.0, 'drop3': 0.5, 'drop4': 0.3, 'drop5': 0.5, 'drop6': 0.1, 'drop7': 0.4, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 0.18409, Val acc: 74.60, Test acc: 74.70
adjdrop: 0.1, drop: [0.0, 0.1, 0.5, 0.4, 0.3, 0.5, 0.1, 0.3]
[I 2024-02-10 13:27:51,032] Trial 97 finished with value: 60.8 and parameters: {'drop0': 0.0, 'drop1': 0.1, 'drop2': 0.5, 'drop3': 0.4, 'drop4': 0.3, 'drop5': 0.5, 'drop6': 0.1, 'drop7': 0.3, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 0.11492, Val acc: 63.20, Test acc: 60.80
adjdrop: 0.0, drop: [0.1, 0.1, 0.1, 0.5, 0.4, 0.5, 0.2, 0.4]
[I 2024-02-10 13:28:35,897] Trial 98 finished with value: 64.2 and parameters: {'drop0': 0.1, 'drop1': 0.1, 'drop2': 0.1, 'drop3': 0.5, 'drop4': 0.4, 'drop5': 0.5, 'drop6': 0.2, 'drop7': 0.4, 'adjdrop': 0.0}. Best is trial 24 with value: 76.4.
200 test loss: 0.22672, Val acc: 63.40, Test acc: 64.20
adjdrop: 0.1, drop: [0.1, 0.1, 0.0, 0.5, 0.3, 0.6, 0.1, 0.4]
[I 2024-02-10 13:29:18,256] Trial 99 finished with value: 71.3 and parameters: {'drop0': 0.1, 'drop1': 0.1, 'drop2': 0.0, 'drop3': 0.5, 'drop4': 0.3, 'drop5': 0.6, 'drop6': 0.1, 'drop7': 0.4, 'adjdrop': 0.1}. Best is trial 24 with value: 76.4.
200 test loss: 0.17025, Val acc: 69.40, Test acc: 71.30
{'drop0': 0.5, 'drop1': 0.3, 'drop2': 0.0, 'drop3': 0.2, 'drop4': 0.1, 'drop5': 0.5, 'drop6': 0.3, 'drop7': 0.6, 'adjdrop': 0.2}

In [None]:
# @title optuna result me reg

d_model: 16.0, lamb: 1.0, adjdrop: 0.7, drop: [0.2, 0.3, 0.3, 0.6, 0.3, 0.5, 0.5, 0.4, 0.3, 0.6]
200 test loss: 0.67123, Val acc: 75.40, Test acc: 74.70
d_model: 16.0, lamb: 1.0, adjdrop: 0.8, drop: [0.5, 0.3, 0.0, 0.6, 0.8, 0.1, 0.0, 0.4, 0.8, 0.3]
200 test loss: 0.46951, Val acc: 73.20, Test acc: 75.10
d_model: 16.0, lamb: 100.0, adjdrop: 0.6, drop: [0.0, 0.5, 0.8, 0.7, 0.0, 0.1, 0.2, 0.3, 0.4, 0.0]
200 test loss: 44.78912, Val acc: 70.60, Test acc: 75.20
d_model: 16.0, lamb: 1000.0, adjdrop: 0.4, drop: [0.1, 0.4, 0.1, 0.7, 0.7, 0.1, 0.5, 0.1, 0.5, 0.1]
200 test loss: 512.87408, Val acc: 75.00, Test acc: 77.70
d_model: 16.0, lamb: 1.0, adjdrop: 0.7, drop: [0.3, 0.7, 0.4, 0.5, 0.0, 0.1, 0.4, 0.0, 0.3, 0.8]
200 test loss: 0.99535, Val acc: 76.20, Test acc: 75.60
d_model: 16.0, lamb: 100.0, adjdrop: 0.0, drop: [0.3, 0.4, 0.6, 0.2, 0.8, 0.4, 0.0, 0.7, 0.6, 0.6]
200 test loss: 43.08358, Val acc: 75.60, Test acc: 77.50
d_model: 16.0, lamb: 1000.0, adjdrop: 0.0, drop: [0.4, 0.7, 0.4, 0.1, 0.5, 0.0, 0.1, 0.0, 0.4, 0.0]
200 test loss: 367.17908, Val acc: 75.40, Test acc: 76.80
d_model: 16.0, lamb: 0.1, adjdrop: 0.6, drop: [0.8, 0.2, 0.7, 0.1, 0.0, 0.5, 0.3, 0.3, 0.8, 0.6]
200 test loss: 0.56363, Val acc: 76.80, Test acc: 79.00
d_model: 16.0, lamb: 1000.0, adjdrop: 0.3, drop: [0.6, 0.2, 0.5, 0.8, 0.8, 0.3, 0.6, 0.6, 0.3, 0.0]
200 test loss: 404.48767, Val acc: 79.00, Test acc: 78.60
d_model: 16.0, lamb: 1000.0, adjdrop: 0.1, drop: [0.7, 0.3, 0.4, 0.0, 0.5, 0.8, 0.4, 0.1, 0.6, 0.4]
200 test loss: 407.18674, Val acc: 77.80, Test acc: 79.50
d_model: 16.0, lamb: 100.0, adjdrop: 0.2, drop: [0.8, 0.0, 0.2, 0.0, 0.4, 0.8, 0.8, 0.2, 0.0, 0.3]
200 test loss: 38.91430, Val acc: 76.80, Test acc: 80.30
d_model: 16.0, lamb: 100.0, adjdrop: 0.2, drop: [0.8, 0.0, 0.2, 0.0, 0.4, 0.8, 0.8, 0.2, 0.6, 0.3]
200 test loss: 37.08807, Val acc: 75.20, Test acc: 75.00
d_model: 16.0, lamb: 10.0, adjdrop: 0.2, drop: [0.7, 0.0, 0.2, 0.3, 0.5, 0.8, 0.8, 0.2, 0.0, 0.4]
200 test loss: 4.21230, Val acc: 77.00, Test acc: 78.70
d_model: 16.0, lamb: 100.0, adjdrop: 0.1, drop: [0.6, 0.1, 0.3, 0.0, 0.2, 0.7, 0.6, 0.1, 0.0, 0.2]
200 test loss: 41.89497, Val acc: 77.00, Test acc: 78.00
d_model: 16.0, lamb: 10.0, adjdrop: 0.2, drop: [0.7, 0.6, 0.0, 0.3, 0.6, 0.6, 0.7, 0.6, 0.1, 0.5]
200 test loss: 4.43142, Val acc: 76.40, Test acc: 77.70
d_model: 16.0, lamb: 1000.0, adjdrop: 0.4, drop: [0.8, 0.1, 0.5, 0.2, 0.2, 0.7, 0.3, 0.2, 0.6, 0.4]
200 test loss: 359.21863, Val acc: 77.40, Test acc: 79.80
d_model: 16.0, lamb: 100.0, adjdrop: 0.4, drop: [0.8, 0.1, 0.6, 0.2, 0.2, 0.6, 0.2, 0.5, 0.2, 0.2]
200 test loss: 35.68521, Val acc: 74.40, Test acc: 77.30
d_model: 16.0, lamb: 1000.0, adjdrop: 0.5, drop: [0.5, 0.0, 0.2, 0.4, 0.2, 0.7, 0.3, 0.3, 0.7, 0.8]
200 test loss: 393.42053, Val acc: 77.80, Test acc: 73.90
d_model: 16.0, lamb: 10.0, adjdrop: 0.4, drop: [0.6, 0.1, 0.5, 0.2, 0.3, 0.3, 0.6, 0.8, 0.5, 0.4]
200 test loss: 4.29348, Val acc: 75.60, Test acc: 76.60
d_model: 16.0, lamb: 100.0, adjdrop: 0.3, drop: [0.8, 0.2, 0.1, 0.1, 0.1, 0.7, 0.3, 0.2, 0.1, 0.2]
200 test loss: 40.83488, Val acc: 77.80, Test acc: 78.80
d_model: 16.0, lamb: 1000.0, adjdrop: 0.3, drop: [0.5, 0.1, 0.3, 0.3, 0.4, 0.6, 0.7, 0.5, 0.7, 0.5]
200 test loss: 456.03943, Val acc: 75.20, Test acc: 75.00
d_model: 16.0, lamb: 1000.0, adjdrop: 0.1, drop: [0.7, 0.3, 0.5, 0.0, 0.5, 0.8, 0.4, 0.1, 0.6, 0.4]
200 test loss: 383.16013, Val acc: 76.60, Test acc: 78.50
d_model: 16.0, lamb: 1000.0, adjdrop: 0.1, drop: [0.7, 0.0, 0.6, 0.1, 0.6, 0.8, 0.5, 0.1, 0.5, 0.3]
200 test loss: 387.50656, Val acc: 77.00, Test acc: 78.10
d_model: 16.0, lamb: 100.0, adjdrop: 0.2, drop: [0.8, 0.2, 0.4, 0.0, 0.4, 0.7, 0.2, 0.2, 0.7, 0.5]
200 test loss: 38.08430, Val acc: 75.20, Test acc: 77.40
d_model: 16.0, lamb: 1000.0, adjdrop: 0.0, drop: [0.7, 0.5, 0.3, 0.2, 0.3, 0.8, 0.4, 0.0, 0.5, 0.3]
200 test loss: 410.63513, Val acc: 77.40, Test acc: 77.00
d_model: 16.0, lamb: 100.0, adjdrop: 0.5, drop: [0.6, 0.8, 0.5, 0.1, 0.6, 0.7, 0.3, 0.3, 0.6, 0.4]
200 test loss: 41.12093, Val acc: 76.20, Test acc: 76.40
d_model: 16.0, lamb: 1000.0, adjdrop: 0.1, drop: [0.8, 0.1, 0.7, 0.0, 0.1, 0.5, 0.1, 0.1, 0.2, 0.5]
200 test loss: 345.60272, Val acc: 77.80, Test acc: 76.20
d_model: 16.0, lamb: 100.0, adjdrop: 0.3, drop: [0.7, 0.3, 0.1, 0.4, 0.4, 0.6, 0.5, 0.2, 0.7, 0.1]
200 test loss: 42.01152, Val acc: 77.00, Test acc: 79.30
d_model: 16.0, lamb: 10.0, adjdrop: 0.5, drop: [0.4, 0.0, 0.4, 0.1, 0.3, 0.8, 0.7, 0.4, 0.4, 0.4]
200 test loss: 4.47062, Val acc: 75.20, Test acc: 75.90
d_model: 16.0, lamb: 0.1, adjdrop: 0.2, drop: [0.6, 0.3, 0.2, 0.2, 0.5, 0.5, 0.4, 0.0, 0.3, 0.7]
200 test loss: 0.40478, Val acc: 72.80, Test acc: 75.20
d_model: 16.0, lamb: 1000.0, adjdrop: 0.4, drop: [0.8, 0.2, 0.4, 0.4, 0.1, 0.7, 0.4, 0.3, 0.8, 0.2]
200 test loss: 377.55005, Val acc: 76.00, Test acc: 78.00
d_model: 16.0, lamb: 100.0, adjdrop: 0.3, drop: [0.7, 0.3, 0.1, 0.5, 0.4, 0.6, 0.5, 0.2, 0.7, 0.1]
200 test loss: 41.40982, Val acc: 77.40, Test acc: 77.00
d_model: 16.0, lamb: 100.0, adjdrop: 0.3, drop: [0.7, 0.5, 0.0, 0.5, 0.3, 0.6, 0.6, 0.2, 0.7, 0.1]
200 test loss: 44.66141, Val acc: 77.80, Test acc: 79.90
d_model: 16.0, lamb: 1.0, adjdrop: 0.1, drop: [0.8, 0.6, 0.3, 0.5, 0.2, 0.7, 0.8, 0.4, 0.6, 0.3]
200 test loss: 0.48399, Val acc: 78.40, Test acc: 80.20
d_model: 16.0, lamb: 1.0, adjdrop: 0.3, drop: [0.8, 0.5, 0.0, 0.5, 0.2, 0.4, 0.8, 0.5, 0.8, 0.3]
200 test loss: 0.49394, Val acc: 76.60, Test acc: 79.80
d_model: 16.0, lamb: 1.0, adjdrop: 0.2, drop: [0.5, 0.6, 0.3, 0.6, 0.3, 0.7, 0.7, 0.4, 0.4, 0.1]
200 test loss: 0.46630, Val acc: 76.40, Test acc: 77.50
d_model: 16.0, lamb: 10.0, adjdrop: 0.8, drop: [0.0, 0.6, 0.0, 0.7, 0.2, 0.6, 0.8, 0.4, 0.5, 0.3]
200 test loss: 4.48036, Val acc: 74.20, Test acc: 73.50
d_model: 16.0, lamb: 0.1, adjdrop: 0.6, drop: [0.8, 0.5, 0.1, 0.5, 0.1, 0.4, 0.6, 0.3, 0.6, 0.2]
200 test loss: 0.26906, Val acc: 75.00, Test acc: 74.80
d_model: 16.0, lamb: 1.0, adjdrop: 0.4, drop: [0.2, 0.4, 0.3, 0.6, 0.3, 0.5, 0.7, 0.4, 0.7, 0.1]
200 test loss: 0.50591, Val acc: 75.00, Test acc: 75.40
d_model: 16.0, lamb: 10.0, adjdrop: 0.0, drop: [0.4, 0.5, 0.2, 0.4, 0.3, 0.7, 0.8, 0.3, 0.5, 0.0]
200 test loss: 4.33879, Val acc: 75.80, Test acc: 76.40
d_model: 16.0, lamb: 1.0, adjdrop: 0.5, drop: [0.8, 0.7, 0.0, 0.7, 0.0, 0.2, 0.6, 0.6, 0.8, 0.3]
200 test loss: 0.57454, Val acc: 77.20, Test acc: 78.90
d_model: 16.0, lamb: 1.0, adjdrop: 0.3, drop: [0.8, 0.5, 0.0, 0.5, 0.2, 0.3, 0.8, 0.5, 0.8, 0.3]
200 test loss: 0.51719, Val acc: 76.60, Test acc: 77.00
d_model: 16.0, lamb: 1.0, adjdrop: 0.3, drop: [0.7, 0.6, 0.1, 0.6, 0.2, 0.4, 0.8, 0.5, 0.8, 0.2]
200 test loss: 0.47245, Val acc: 77.60, Test acc: 77.80
d_model: 16.0, lamb: 0.1, adjdrop: 0.4, drop: [0.8, 0.4, 0.0, 0.5, 0.1, 0.5, 0.7, 0.6, 0.7, 0.3]
200 test loss: 0.18859, Val acc: 75.60, Test acc: 77.30
d_model: 16.0, lamb: 10.0, adjdrop: 0.2, drop: [0.7, 0.7, 0.1, 0.4, 0.2, 0.4, 0.8, 0.7, 0.6, 0.4]
200 test loss: 4.33414, Val acc: 75.60, Test acc: 77.20
d_model: 16.0, lamb: 1.0, adjdrop: 0.1, drop: [0.8, 0.5, 0.0, 0.5, 0.2, 0.0, 0.7, 0.2, 0.8, 0.6]
200 test loss: 0.61424, Val acc: 77.80, Test acc: 78.80
d_model: 16.0, lamb: 10.0, adjdrop: 0.3, drop: [0.6, 0.6, 0.6, 0.3, 0.4, 0.8, 0.8, 0.3, 0.7, 0.3]
200 test loss: 3.87435, Val acc: 77.20, Test acc: 78.80
d_model: 16.0, lamb: 0.1, adjdrop: 0.2, drop: [0.7, 0.4, 0.2, 0.6, 0.3, 0.3, 0.2, 0.5, 0.6, 0.5]
200 test loss: 0.14335, Val acc: 75.20, Test acc: 78.30
d_model: 16.0, lamb: 1.0, adjdrop: 0.4, drop: [0.3, 0.8, 0.2, 0.4, 0.3, 0.6, 0.6, 0.1, 0.2, 0.0]
200 test loss: 0.47545, Val acc: 73.60, Test acc: 76.50
d_model: 16.0, lamb: 100.0, adjdrop: 0.2, drop: [0.8, 0.7, 0.8, 0.8, 0.1, 0.4, 0.8, 0.7, 0.4, 0.2]
200 test loss: 34.83788, Val acc: 77.60, Test acc: 77.40
d_model: 16.0, lamb: 10.0, adjdrop: 0.1, drop: [0.6, 0.5, 0.5, 0.5, 0.0, 0.2, 0.1, 0.4, 0.1, 0.7]
200 test loss: 4.38582, Val acc: 75.60, Test acc: 77.20
d_model: 16.0, lamb: 1000.0, adjdrop: 0.0, drop: [0.7, 0.4, 0.4, 0.0, 0.5, 0.8, 0.3, 0.1, 0.6, 0.4]
200 test loss: 378.39975, Val acc: 75.80, Test acc: 77.50
d_model: 16.0, lamb: 1000.0, adjdrop: 0.1, drop: [0.8, 0.1, 0.5, 0.1, 0.7, 0.7, 0.5, 0.0, 0.7, 0.4]
200 test loss: 357.43356, Val acc: 77.60, Test acc: 79.00
d_model: 16.0, lamb: 100.0, adjdrop: 0.3, drop: [0.7, 0.0, 0.4, 0.0, 0.5, 0.8, 0.7, 0.1, 0.6, 0.3]
200 test loss: 40.15672, Val acc: 76.80, Test acc: 76.80
d_model: 16.0, lamb: 1000.0, adjdrop: 0.1, drop: [0.8, 0.6, 0.3, 0.3, 0.4, 0.7, 0.4, 0.2, 0.5, 0.5]
200 test loss: 374.41141, Val acc: 78.00, Test acc: 80.30
d_model: 16.0, lamb: 1000.0, adjdrop: 0.2, drop: [0.8, 0.6, 0.3, 0.3, 0.2, 0.7, 0.3, 0.2, 0.5, 0.6]
200 test loss: 371.70966, Val acc: 77.00, Test acc: 77.90
d_model: 16.0, lamb: 100.0, adjdrop: 0.1, drop: [0.8, 0.6, 0.3, 0.3, 0.4, 0.6, 0.5, 0.2, 0.0, 0.5]
200 test loss: 37.61924, Val acc: 74.20, Test acc: 75.80
d_model: 16.0, lamb: 1000.0, adjdrop: 0.3, drop: [0.8, 0.5, 0.3, 0.2, 0.3, 0.7, 0.4, 0.3, 0.5, 0.5]
200 test loss: 387.17142, Val acc: 76.60, Test acc: 76.40
d_model: 16.0, lamb: 100.0, adjdrop: 0.0, drop: [0.7, 0.7, 0.2, 0.4, 0.4, 0.8, 0.8, 0.2, 0.8, 0.4]
200 test loss: 42.03011, Val acc: 76.20, Test acc: 76.50
d_model: 16.0, lamb: 1.0, adjdrop: 0.2, drop: [0.8, 0.6, 0.1, 0.6, 0.2, 0.7, 0.2, 0.4, 0.4, 0.5]
200 test loss: 0.64428, Val acc: 77.00, Test acc: 76.40
d_model: 16.0, lamb: 1000.0, adjdrop: 0.4, drop: [0.7, 0.5, 0.0, 0.5, 0.3, 0.6, 0.7, 0.3, 0.6, 0.4]
200 test loss: 422.42850, Val acc: 76.40, Test acc: 78.40
d_model: 16.0, lamb: 1000.0, adjdrop: 0.1, drop: [0.8, 0.0, 0.4, 0.1, 0.6, 0.8, 0.4, 0.1, 0.6, 0.3]
200 test loss: 368.11841, Val acc: 78.00, Test acc: 76.50
d_model: 16.0, lamb: 1000.0, adjdrop: 0.1, drop: [0.7, 0.1, 0.4, 0.3, 0.5, 0.8, 0.3, 0.2, 0.7, 0.4]
200 test loss: 403.72473, Val acc: 77.20, Test acc: 78.40
d_model: 16.0, lamb: 1000.0, adjdrop: 0.0, drop: [0.8, 0.2, 0.5, 0.0, 0.4, 0.7, 0.4, 0.2, 0.5, 0.2]
200 test loss: 358.35999, Val acc: 79.20, Test acc: 81.30
d_model: 16.0, lamb: 1000.0, adjdrop: 0.0, drop: [0.8, 0.2, 0.6, 0.2, 0.4, 0.7, 0.4, 0.2, 0.5, 0.2]
200 test loss: 348.56143, Val acc: 76.80, Test acc: 76.60
d_model: 16.0, lamb: 100.0, adjdrop: 0.0, drop: [0.8, 0.1, 0.7, 0.0, 0.4, 0.7, 0.3, 0.2, 0.5, 0.1]
200 test loss: 33.35593, Val acc: 78.40, Test acc: 79.00
d_model: 16.0, lamb: 1000.0, adjdrop: 0.0, drop: [0.8, 0.0, 0.5, 0.1, 0.3, 0.6, 0.5, 0.3, 0.5, 0.2]
200 test loss: 347.79471, Val acc: 76.80, Test acc: 74.60
d_model: 16.0, lamb: 1000.0, adjdrop: 0.2, drop: [0.7, 0.1, 0.5, 0.4, 0.2, 0.7, 0.6, 0.5, 0.4, 0.3]
200 test loss: 410.62897, Val acc: 76.80, Test acc: 79.80
d_model: 16.0, lamb: 100.0, adjdrop: 0.1, drop: [0.8, 0.4, 0.6, 0.5, 0.3, 0.5, 0.4, 0.3, 0.3, 0.2]
200 test loss: 35.69234, Val acc: 75.20, Test acc: 78.90
d_model: 16.0, lamb: 1000.0, adjdrop: 0.7, drop: [0.1, 0.6, 0.3, 0.3, 0.4, 0.6, 0.8, 0.1, 0.7, 0.1]
200 test loss: 392.15405, Val acc: 74.20, Test acc: 74.50
d_model: 16.0, lamb: 100.0, adjdrop: 0.5, drop: [0.7, 0.2, 0.2, 0.2, 0.4, 0.7, 0.5, 0.2, 0.6, 0.3]
200 test loss: 42.58828, Val acc: 75.40, Test acc: 76.10
d_model: 16.0, lamb: 1000.0, adjdrop: 0.2, drop: [0.6, 0.1, 0.5, 0.4, 0.2, 0.7, 0.6, 0.5, 0.1, 0.3]
200 test loss: 409.38577, Val acc: 79.60, Test acc: 79.60
d_model: 16.0, lamb: 1000.0, adjdrop: 0.2, drop: [0.7, 0.1, 0.5, 0.4, 0.2, 0.7, 0.4, 0.5, 0.4, 0.2]
200 test loss: 388.57040, Val acc: 78.60, Test acc: 78.00
d_model: 16.0, lamb: 1000.0, adjdrop: 0.3, drop: [0.8, 0.0, 0.4, 0.5, 0.1, 0.6, 0.6, 0.5, 0.2, 0.3]
200 test loss: 382.03873, Val acc: 78.60, Test acc: 78.20
d_model: 16.0, lamb: 1000.0, adjdrop: 0.1, drop: [0.7, 0.1, 0.5, 0.4, 0.2, 0.8, 0.8, 0.6, 0.6, 0.3]
200 test loss: 366.72025, Val acc: 75.80, Test acc: 76.80
d_model: 16.0, lamb: 10.0, adjdrop: 0.3, drop: [0.8, 0.2, 0.6, 0.5, 0.1, 0.7, 0.7, 0.6, 0.3, 0.2]
200 test loss: 3.73613, Val acc: 78.00, Test acc: 78.10
d_model: 16.0, lamb: 1000.0, adjdrop: 0.4, drop: [0.8, 0.0, 0.5, 0.0, 0.3, 0.8, 0.3, 0.4, 0.5, 0.1]
200 test loss: 333.56818, Val acc: 77.00, Test acc: 74.60
d_model: 16.0, lamb: 1000.0, adjdrop: 0.2, drop: [0.7, 0.5, 0.0, 0.3, 0.2, 0.7, 0.5, 0.2, 0.8, 0.4]
200 test loss: 438.05225, Val acc: 74.80, Test acc: 77.10
d_model: 16.0, lamb: 100.0, adjdrop: 0.1, drop: [0.8, 0.1, 0.4, 0.6, 0.3, 0.6, 0.8, 0.1, 0.0, 0.3]
200 test loss: 37.47978, Val acc: 77.60, Test acc: 77.50
d_model: 16.0, lamb: 1.0, adjdrop: 0.3, drop: [0.5, 0.2, 0.3, 0.4, 0.2, 0.8, 0.7, 0.8, 0.7, 0.2]
200 test loss: 0.50145, Val acc: 75.80, Test acc: 78.30
d_model: 16.0, lamb: 1000.0, adjdrop: 0.0, drop: [0.6, 0.7, 0.1, 0.2, 0.3, 0.4, 0.6, 0.5, 0.4, 0.4]
200 test loss: 443.84793, Val acc: 74.60, Test acc: 76.90
d_model: 16.0, lamb: 1000.0, adjdrop: 0.2, drop: [0.6, 0.1, 0.5, 0.4, 0.2, 0.7, 0.6, 0.5, 0.1, 0.3]
200 test loss: 433.85144, Val acc: 76.20, Test acc: 76.40
d_model: 16.0, lamb: 1000.0, adjdrop: 0.2, drop: [0.8, 0.1, 0.5, 0.4, 0.1, 0.7, 0.6, 0.6, 0.0, 0.3]
200 test loss: 372.10468, Val acc: 78.00, Test acc: 77.10
d_model: 16.0, lamb: 1000.0, adjdrop: 0.2, drop: [0.6, 0.0, 0.5, 0.5, 0.2, 0.7, 0.5, 0.4, 0.2, 0.3]
200 test loss: 431.20328, Val acc: 76.20, Test acc: 78.70
d_model: 16.0, lamb: 1000.0, adjdrop: 0.3, drop: [0.7, 0.4, 0.6, 0.4, 0.2, 0.6, 0.7, 0.5, 0.1, 0.3]
200 test loss: 391.49472, Val acc: 77.20, Test acc: 77.80
d_model: 16.0, lamb: 1.0, adjdrop: 0.1, drop: [0.5, 0.6, 0.4, 0.5, 0.1, 0.8, 0.7, 0.5, 0.0, 0.4]
200 test loss: 0.51267, Val acc: 76.60, Test acc: 78.00
d_model: 16.0, lamb: 1000.0, adjdrop: 0.4, drop: [0.8, 0.2, 0.6, 0.3, 0.4, 0.5, 0.4, 0.2, 0.7, 0.2]
200 test loss: 363.73798, Val acc: 72.80, Test acc: 75.30
d_model: 16.0, lamb: 0.1, adjdrop: 0.2, drop: [0.7, 0.5, 0.5, 0.4, 0.3, 0.7, 0.8, 0.3, 0.3, 0.0]
200 test loss: 0.06335, Val acc: 75.60, Test acc: 76.90
d_model: 16.0, lamb: 10.0, adjdrop: 0.3, drop: [0.8, 0.3, 0.0, 0.0, 0.5, 0.3, 0.0, 0.4, 0.1, 0.3]
200 test loss: 4.12133, Val acc: 77.00, Test acc: 78.20
d_model: 16.0, lamb: 100.0, adjdrop: 0.2, drop: [0.7, 0.1, 0.2, 0.1, 0.2, 0.7, 0.3, 0.2, 0.6, 0.6]
200 test loss: 40.79874, Val acc: 75.20, Test acc: 76.40
d_model: 16.0, lamb: 1000.0, adjdrop: 0.1, drop: [0.8, 0.1, 0.4, 0.5, 0.4, 0.6, 0.6, 0.5, 0.5, 0.1]
200 test loss: 372.33459, Val acc: 76.00, Test acc: 77.80
d_model: 16.0, lamb: 1000.0, adjdrop: 0.1, drop: [0.7, 0.3, 0.3, 0.0, 0.5, 0.8, 0.4, 0.0, 0.6, 0.4]
200 test loss: 413.51611, Val acc: 77.00, Test acc: 78.20
d_model: 16.0, lamb: 1000.0, adjdrop: 0.0, drop: [0.6, 0.3, 0.4, 0.0, 0.5, 0.8, 0.4, 0.1, 0.6, 0.5]
200 test loss: 404.69110, Val acc: 77.80, Test acc: 78.10
d_model: 16.0, lamb: 1000.0, adjdrop: 0.1, drop: [0.8, 0.2, 0.5, 0.1, 0.6, 0.8, 0.5, 0.0, 0.7, 0.3]
200 test loss: 377.84906, Val acc: 78.60, Test acc: 80.90
d_model: 16.0, lamb: 1000.0, adjdrop: 0.1, drop: [0.8, 0.2, 0.5, 0.1, 0.8, 0.8, 0.5, 0.0, 0.8, 0.3]
200 test loss: 360.13806, Val acc: 76.00, Test acc: 78.40
d_model: 16.0, lamb: 1000.0, adjdrop: 0.2, drop: [0.8, 0.2, 0.6, 0.1, 0.2, 0.7, 0.5, 0.1, 0.7, 0.2]
200 test loss: 365.95651, Val acc: 76.80, Test acc: 76.10
d_model: 16.0, lamb: 1000.0, adjdrop: 0.2, drop: [0.8, 0.0, 0.5, 0.2, 0.7, 0.8, 0.6, 0.3, 0.8, 0.3]
200 test loss: 354.74802, Val acc: 77.20, Test acc: 78.80
d_model: 16.0, lamb: 10.0, adjdrop: 0.3, drop: [0.8, 0.6, 0.1, 0.3, 0.3, 0.7, 0.6, 0.6, 0.4, 0.3]
200 test loss: 3.99735, Val acc: 78.20, Test acc: 79.50
d_model: 16.0, lamb: 1.0, adjdrop: 0.0, drop: [0.7, 0.1, 0.5, 0.6, 0.4, 0.4, 0.8, 0.0, 0.7, 0.4]
200 test loss: 0.42367, Val acc: 75.00, Test acc: 76.90
d_model: 16.0, lamb: 1000.0, adjdrop: 0.4, drop: [0.8, 0.5, 0.5, 0.1, 0.6, 0.7, 0.5, 0.4, 0.0, 0.2]
200 test loss: 365.93625, Val acc: 75.00, Test acc: 75.90
{'drop0': 0.8, 'drop1': 0.2, 'drop2': 0.5, 'drop3': 0.0, 'drop4': 0.4, 'drop5': 0.7, 'drop6': 0.4, 'drop7': 0.2, 'drop8': 0.5, 'drop9': 0.2, 'adjdrop': 0.0, 'lamb': 3}


In [None]:
# @title old og hg attn
Hypergraph Convolution and Hypergraph Attention
(https://arxiv.org/pdf/1901.08150.pdf).
import argparse
import dgl.sparse as dglsp
import torch
import torch.nn as nn
import torch.nn.functional as F
import tqdm
from dgl.data import CoraGraphDataset
def accuracy(yhat, y): return (yhat.argmax(1) == y).type(torch.float).sum().item()/y.shape[0]


def hypergraph_laplacian(H):
    ###########################################################
    # (HIGHLIGHT) Compute the Laplacian with Sparse Matrix API
    ###########################################################
    d_V = H.sum(1)  # node degree
    d_E = H.sum(0)  # edge degree
    n_edges = d_E.shape[0]
    D_V_invsqrt = dglsp.diag(d_V**-0.5)  # D_V ** (-1/2)
    D_E_inv = dglsp.diag(d_E**-1)  # D_E ** (-1)
    W = dglsp.identity((n_edges, n_edges))
    return D_V_invsqrt @ H @ W @ D_E_inv @ H.T @ D_V_invsqrt


class HypergraphAttention(nn.Module):
    """Hypergraph Attention module as in the paper
    `Hypergraph Convolution and Hypergraph Attention
    <https://arxiv.org/pdf/1901.08150.pdf>`_.
    """

    def __init__(self, in_size, out_size):
        super().__init__()

        self.P = nn.Linear(in_size, out_size)
        self.a = nn.Linear(2 * out_size, 1)

    def forward(self, H, X, X_edges):
        Z = self.P(X)
        Z_edges = self.P(X_edges)
        # print("H",H.shape) # 2708, 2708
        # print("H.row,H.col",H.row.shape,H.col.shape) # H.row,H.col tensor([   0,    0,    0,  ..., 2707, 2707, 2707]) tensor([   0,  633, 1862,  ..., 1473, 2706, 2707]) # [13264], [13264]
        # print("Z[H.row], Z_edges[H.col]",Z[H.row], Z_edges[H.col].shape) # [13264, 16], [13264, 16]
        print(Z,Z_edges.shape) # [2708, 16], [2708, 16]

        sim = self.a(torch.cat([Z[H.row], Z_edges[H.col]], 1))
        sim = F.leaky_relu(sim, 0.2).squeeze(1)
        # Reassign the hypergraph new weights.
        H_att = dglsp.val_like(H, sim)
        H_att = H_att.softmax()
        return hypergraph_laplacian(H_att) @ Z


class Net(nn.Module):
    def __init__(self, in_size, out_size, hidden_size=16):
        super().__init__()
        self.layer1 = HypergraphAttention(in_size, hidden_size)
        self.layer2 = HypergraphAttention(hidden_size, out_size)

    def forward(self, H, X):
        Z = self.layer1(H, X, X)
        Z = F.elu(Z)
        Z = self.layer2(H, Z, Z)
        return Z


def train(model, optimizer, H, X, Y, train_mask):
    model.train()
    Y_hat = model(H, X)
    loss = F.cross_entropy(Y_hat[train_mask], Y[train_mask])
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    return loss.item()


def evaluate(model, H, X, Y, val_mask, test_mask, num_classes):
    model.eval()
    Y_hat = model(H, X)
    val_acc = accuracy(Y_hat[val_mask], Y[val_mask])
    test_acc = accuracy(Y_hat[test_mask], Y[test_mask])
    return val_acc, test_acc


def load_data():
    dataset = CoraGraphDataset()
    graph = dataset[0]
    indices = torch.stack(graph.edges())
    H = dglsp.spmatrix(indices)
    H = H + dglsp.identity(H.shape)
    X = graph.ndata["feat"]
    Y = graph.ndata["label"]
    train_mask = graph.ndata["train_mask"]
    val_mask = graph.ndata["val_mask"]
    test_mask = graph.ndata["test_mask"]
    return H, X, Y, dataset.num_classes, train_mask, val_mask, test_mask


H, X, Y, num_classes, train_mask, val_mask, test_mask = load_data()
model = Net(X.shape[1], num_classes)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

with tqdm.trange(2) as tq:
    for epoch in tq:
        loss = train(model, optimizer, H, X, Y, train_mask)
        val_acc, test_acc = evaluate(
            model, H, X, Y, val_mask, test_mask, num_classes
        )
        tq.set_postfix(
            {
                "Loss": f"{loss:.5f}",
                "Val acc": f"{val_acc:.5f}",
                "Test acc": f"{test_acc:.5f}",
            },
            refresh=False,
        )

print(f"Test acc: {test_acc:.3f}")

