In [4]:
import pandas as pd

labels_df  = pd.read_csv('data/SS-Butterfly_labels.tsv', sep='\t')
weights_df  = pd.read_csv('data/SS-Butterfly_weights.tsv', sep='\t')

In [5]:
labels_df  = labels_df .rename(columns={'# Node_ID': 'Node_ID', 'Species': 'species'})
labels_df .head()

Unnamed: 0,Node_ID,species
0,0,1
1,1,1
2,2,1
3,3,1
4,4,1


In [6]:
weights_df  = weights_df .rename(columns={'# NodeID1': 'NodeID1', 'NodeID2': 'NodeID2', 'Image similarity weight': 'Image similarity weight'})
weights_df .head()


Unnamed: 0,NodeID1,NodeID2,Image similarity weight
0,0,4,0.029663
1,0,5,0.250146
2,0,7,0.052237
3,0,10,0.096204
4,0,12,0.10205


In [7]:
import dgl

# Create a DGL graph from the weights DataFrame
graph = dgl.graph((weights_df["NodeID1"], weights_df["NodeID2"]))

In [9]:
import torch

# Add the species labels as node features to the graph
species = torch.tensor(labels_df['species'].values)
graph.ndata['species'] = species

In [11]:
import torch.nn as nn

# Define the model
class GCN(nn.Module):
    def __init__(self, in_feats, hidden_size, num_classes):
        super(GCN, self).__init__()
        self.conv1 = dgl.nn.GraphConv(in_feats, hidden_size)
        self.conv2 = dgl.nn.GraphConv(hidden_size, num_classes)

    def forward(self, g, inputs):
        h = self.conv1(g, inputs)
        h = F.relu(h)
        h = self.conv2(g, h)
        return h

In [12]:
# Set the model parameters
in_feats = 3 # number of input features per node (i.e., the number of dimensions in your node features tensor)
hidden_size = 16
num_classes = len(labels_df['species'].unique()) # number of unique species labels in the dataset


In [13]:
# Create the model
model = GCN(in_feats, hidden_size, num_classes)

In [14]:
# Define the loss function
loss_fn = nn.CrossEntropyLoss()

In [15]:
# Set the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [None]:
# Train the model
num_epochs = 100
for epoch in range(num_epochs):
    # Forward pass
    logits = model(graph, feature_tensor.float())
    loss = loss_fn(logits[train_mask], label_tensor[train_mask])

    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Evaluate the model on the validation set
    with torch.no_grad():
        val_logits = model(graph, feature_tensor.float())
        val_loss = loss_fn(val_logits[val_mask], label_tensor[val_mask])
        val_acc = (val_logits[val_mask].argmax(dim=1) == label_tensor[val_mask]).float().mean()

    print(f"Epoch {epoch+1}: train_loss={loss.item():.4f} val_loss={val_loss.item():.4f} val_acc={val_acc.item():.4f}")


In [4]:
import networkx as nx

G = nx.Graph()

# add nodes to the graph
for node_id, species in node_df[['Node_ID', 'species']].values:
    G.add_node(node_id, species=species)

# add edges to the graph
for node1, node2, weight in edge_df[['NodeID1', 'NodeID2', 'Image similarity weight']].values:
    G.add_edge(node1, node2, weight=weight)


In [29]:
print(G)

Graph with 832 nodes and 86528 edges


In [14]:
# If you have installed dgl-cuXX package, please uninstall it first.
!pip install  dgl -f https://data.dgl.ai/wheels/repo.html
!pip install  dglgo -f https://data.dgl.ai/wheels-test/repo.html

Looking in links: https://data.dgl.ai/wheels/repo.html
Collecting dgl
  Downloading dgl-1.1.0-cp39-cp39-win_amd64.whl (3.2 MB)
     ---------------------------------------- 3.2/3.2 MB 9.0 MB/s eta 0:00:00
Installing collected packages: dgl
Successfully installed dgl-1.1.0



[notice] A new release of pip is available: 23.0.1 -> 23.1.2
[notice] To update, run: C:\Users\laura\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


Looking in links: https://data.dgl.ai/wheels-test/repo.html
Collecting dglgo
  Downloading dglgo-0.0.2-py3-none-any.whl (63 kB)
     ---------------------------------------- 63.5/63.5 kB 1.1 MB/s eta 0:00:00
Collecting numpydoc>=1.1.0
  Downloading numpydoc-1.5.0-py3-none-any.whl (52 kB)
     ---------------------------------------- 52.4/52.4 kB 2.6 MB/s eta 0:00:00
Collecting typer>=0.4.0
  Downloading typer-0.9.0-py3-none-any.whl (45 kB)
     ---------------------------------------- 45.9/45.9 kB 2.2 MB/s eta 0:00:00
Collecting rdkit-pypi
  Downloading rdkit_pypi-2022.9.5-cp39-cp39-win_amd64.whl (20.5 MB)
     ---------------------------------------- 20.5/20.5 MB 4.4 MB/s eta 0:00:00
Collecting ruamel.yaml>=0.17.20
  Downloading ruamel.yaml-0.17.22-py3-none-any.whl (107 kB)
     -------------------------------------- 107.8/107.8 kB 2.1 MB/s eta 0:00:00
Collecting isort>=5.10.1
  Downloading isort-5.12.0-py3-none-any.whl (91 kB)
     -------------------------------------- 91.2/91.2 kB 

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pandas-profiling 3.1.1 requires markupsafe~=2.0.1, but you have markupsafe 2.1.2 which is incompatible.

[notice] A new release of pip is available: 23.0.1 -> 23.1.2
[notice] To update, run: C:\Users\laura\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [32]:
import dgl
import torch

dgl_G = dgl.from_networkx(G)
feature_tensor = torch.rand((dgl_G.number_of_nodes(), 10))
dgl_G.ndata['species'] = feature_tensor

In [33]:
import torch.nn as nn
import dgl.nn as dglnn

class GNN(nn.Module):
    def __init__(self, in_feats, hidden_feats, out_feats):
        super().__init__()
        self.conv1 = dglnn.GraphConv(in_feats, hidden_feats)
        self.conv2 = dglnn.GraphConv(hidden_feats, out_feats)
        
    def forward(self, graph):
        x = graph.ndata['species']
        x = self.conv1(graph, x)
        x = nn.functional.relu(x)
        x = self.conv2(graph, x)
        graph.ndata['h'] = x
        hg = dgl.mean_nodes(graph, 'h')
        return hg


In [34]:
import torch.optim as optim

model = GNN(in_feats=1, hidden_feats=16, out_feats=2)
optimizer = optim.Adam(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

num_epochs = 1

for epoch in range(num_epochs):
    # forward pass
    logits = model(dgl_G)
    loss = criterion(logits, dgl_G.ndata['species'])
    
    # backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


  assert input.numel() == input.storage().size(), (


RuntimeError: mat1 and mat2 shapes cannot be multiplied (832x10 and 1x16)

In [35]:
import matplotlib.pyplot as plt

# Convert the DGL graph to a NetworkX graph
nx_graph = dgl_G.to_networkx()

# Draw the graph using NetworkX
nx.draw(nx_graph, with_labels=True)

# Show the plot
plt.show()


KeyboardInterrupt: 