# CSE 881 Project
**Group Names: Edmond Anderson, Sarah Bradford, Lacey Hamilton**

In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import json
from scipy import sparse
from pathlib import Path

In [3]:
path = Path('data_2024')
adj_matrix = sparse.load_npz(path/'adj.npz')
feat  = np.load(path/'features.npy')
labels = np.load(path/'labels.npy')
splits = json.load(open(path/'splits.json'))
idx_train, idx_test = splits['idx_train'], splits['idx_test']

In [14]:
feat

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 0]])

In [3]:
adj_matrix = sparse.csr_matrix((adj['data'], adj['indices'], adj['indptr']), shape=adj['shape'])

NameError: name 'adj' is not defined

In [5]:
adj_matrix = torch.FloatTensor(adj_matrix.toarray())
node_features = torch.FloatTensor(feat)
labels = torch.LongTensor(labels)

In [6]:
class GraphDataset(Dataset):
    def __init__(self, adj_matrix, node_features, labels):
        self.adj_matrix = adj_matrix
        self.node_features = node_features
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.adj_matrix[idx], self.node_features[idx], self.labels[idx]

In [7]:
print(splits.keys())

dict_keys(['idx_train', 'idx_test'])


In [7]:
idx_train = splits['idx_train']
idx_test = splits['idx_test']
idx_train = [idx for idx in idx_train if idx < len(labels)]
idx_test = [idx for idx in idx_test if idx < len(labels)]
# idx_train.sort()
# idx_test.sort()

In [8]:
train_dataset = GraphDataset(adj_matrix[idx_train], node_features[idx_train], labels[idx_train])
test_dataset = GraphDataset(adj_matrix[idx_test], node_features[idx_test], labels[idx_test])
#val_dataset = GraphDataset(adj_matrix[idx_val], node_features[idx_val], labels[idx_val])

In [9]:
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
#val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)

In [10]:
# defining the GAT layer
class GATLayer(nn.Module):
    def __init__(self, in_features, out_features):
        super(GATLayer, self).__init__()
        self.fc = nn.Linear(in_features, out_features)
        self.attn_fc = nn.Linear(2 * out_features, 1)

    def forward(self, adj_matrix, node_features):
        h = self.fc(node_features)
        N = h.size()[0]

        # Compute attention scores
        attn_scores = torch.zeros(N, N)
        for i in range(N):
            for j in range(N):
                if adj_matrix[i, j] == 1:
                    attn_input = torch.cat([h[i], h[j]], dim=0)
                    attn_scores[i, j] = self.attn_fc(attn_input).squeeze()

        # Compute attention coefficients
        attn_coefficients = nn.functional.softmax(attn_scores, dim=1)

        # Compute output features
        h_prime = torch.zeros(N, h.size()[1])
        for i in range(N):
            for j in range(N):
                h_prime[i] += attn_coefficients[i, j] * h[j]

        return h_prime
# defining th GAT model
class GAT(nn.Module):
    def __init__(self, in_features, hidden_features, out_features):
        super(GAT, self).__init__()
        self.layer1 = GATLayer(in_features, hidden_features)
        self.layer2 = GATLayer(hidden_features, out_features)

    def forward(self, adj_matrix, node_features):
        h = self.layer1(adj_matrix, node_features)
        h = torch.relu(h)
        h = self.layer2(adj_matrix, h)
        return h
# GAT model
model = GAT(in_features=node_features.size()[1], hidden_features=8, out_features=7)



In [11]:
# loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
# train the model
model.train()
for epoch in range(50):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        adj_matrix, node_features, labels = data
        optimizer.zero_grad()
        outputs = model(adj_matrix, node_features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print('[%d] loss: %.3f' % (epoch + 1, running_loss / len(train_loader)))
# model testing
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        adj_matrix, node_features, labels = data
        outputs = model(adj_matrix, node_features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print('Accuracy: %d %%' % (100 * correct / total))


[1] loss: 1.895
[2] loss: 1.478
[3] loss: 1.005
[4] loss: 0.588
[5] loss: 0.374
[6] loss: 0.278
[7] loss: 0.234
[8] loss: 0.203
[9] loss: 0.188
[10] loss: 0.169
[11] loss: 0.168
[12] loss: 0.120
[13] loss: 0.103
[14] loss: 0.079
[15] loss: 0.080
[16] loss: 0.069
[17] loss: 0.065
[18] loss: 0.066
[19] loss: 0.055
[20] loss: 0.052
[21] loss: 0.062
[22] loss: 0.033
[23] loss: 0.035
[24] loss: 0.032
[25] loss: 0.038
[26] loss: 0.029
[27] loss: 0.025
[28] loss: 0.023
[29] loss: 0.022
[30] loss: 0.022
[31] loss: 0.021
[32] loss: 0.023
[33] loss: 0.020
[34] loss: 0.018
[35] loss: 0.023
[36] loss: 0.027
[37] loss: 0.017
[38] loss: 0.013
[39] loss: 0.013
[40] loss: 0.012
[41] loss: 0.012
[42] loss: 0.011
[43] loss: 0.011
[44] loss: 0.010
[45] loss: 0.010
[46] loss: 0.009
[47] loss: 0.009
[48] loss: 0.008
[49] loss: 0.008
[50] loss: 0.008
Accuracy: 24 %


In [12]:
# improve the accuracy by adding more layers
class ImprovedGAT(nn.Module):
    def __init__(self, in_features, hidden_features, out_features):
        super(ImprovedGAT, self).__init__()
        self.layer1 = GATLayer(in_features, hidden_features)
        self.layer2 = GATLayer(hidden_features, hidden_features)
        self.layer3 = GATLayer(hidden_features, out_features)

    def forward(self, adj_matrix, node_features):
        h = self.layer1(adj_matrix, node_features)
        h = torch.relu(h)
        h = self.layer2(adj_matrix, h)
        h = torch.relu(h)
        h = self.layer3(adj_matrix, h)
        return h
# Improved GAT model
model = ImprovedGAT(in_features=node_features.size()[1], hidden_features=8, out_features=7)
# loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
# train the model
model.train()
for epoch in range(100):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        adj_matrix, node_features, labels = data
        optimizer.zero_grad()
        outputs = model(adj_matrix, node_features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print('[%d] loss: %.3f' % (epoch + 1, running_loss / len(train_loader)))
# model testing
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        adj_matrix, node_features, labels = data
        outputs = model(adj_matrix, node_features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print('Accuracy: %d %%' % (100 * correct / total))


[1] loss: 1.877
[2] loss: 1.544
[3] loss: 1.101
[4] loss: 0.814
[5] loss: 0.733
[6] loss: 0.659
[7] loss: 0.548
[8] loss: 0.468
[9] loss: 0.454
[10] loss: 0.487
[11] loss: 0.378
[12] loss: 0.332
[13] loss: 0.301
[14] loss: 0.268
[15] loss: 0.239
[16] loss: 0.228
[17] loss: 0.222
[18] loss: 0.228
[19] loss: 0.225
[20] loss: 0.234
[21] loss: 0.242
[22] loss: 0.222
[23] loss: 0.222
[24] loss: 0.220
[25] loss: 0.220
[26] loss: 0.221
[27] loss: 0.217
[28] loss: 0.220
[29] loss: 0.221
[30] loss: 0.217
[31] loss: 0.234
[32] loss: 0.314
[33] loss: 0.452
[34] loss: 0.204
[35] loss: 0.198
[36] loss: 0.199
[37] loss: 0.189
[38] loss: 0.186
[39] loss: 0.192
[40] loss: 0.191
[41] loss: 0.186
[42] loss: 0.184
[43] loss: 0.185
[44] loss: 0.182
[45] loss: 0.182
[46] loss: 0.187
[47] loss: 0.184
[48] loss: 0.181
[49] loss: 0.184
[50] loss: 0.181
[51] loss: 0.181
[52] loss: 0.181
[53] loss: 0.182
[54] loss: 0.263
[55] loss: 0.185
[56] loss: 0.181
[57] loss: 0.180
[58] loss: 0.183
[59] loss: 0.185
[60] l

In [13]:
pred = model(test_loader,node_features)

TypeError: 'DataLoader' object is not subscriptable

In [27]:
preds = pred[idx_test]
np.savetxt('submission.txt', preds, fmt='%d')

NameError: name 'pred' is not defined