In [1]:
import torch
import torch.nn.functional as F
import numpy as np
from torch_geometric.datasets import QM9
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv, global_mean_pool

In [3]:
def read_xyz(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
    
    atom_count = int(lines[0].strip())
    comment = lines[1].strip()
    
    atom_types = []
    coordinates = []
    for line in lines[2:]:
        parts = line.split()
        if len(parts) == 4:
            atom_types.append(parts[0])
            coordinates.append([float(parts[1]), float(parts[2]), float(parts[3])])
    
    return atom_types, np.array(coordinates)

# Example usage
file_path = '/Users/victorsu-ortiz/Desktop/Fe-Boryl_complexes/data/xyz_molsimp/monosubstituted_0001.xyz'
atom_types, coordinates = read_xyz(file_path)
tensor = torch.tensor(coordinates)

print("Atom Types:", atom_types)
print("Coordinates Tensor:", tensor)


Atom Types: ['Fe', 'B', 'N', 'C', 'C', 'N', 'C', 'C', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'O', 'H', 'H', 'O', 'H', 'H', 'O', 'H', 'H', 'O', 'H', 'H', 'O', 'H', 'H']
Coordinates Tensor: tensor([[ 0.0000,  0.0000,  0.0000],
        [ 0.0000,  2.1000, -0.0000],
        [-1.3158,  2.8750,  0.2655],
        [-1.6295,  3.7201, -0.8956],
        [-2.4111,  1.9197,  0.4865],
        [ 1.3158,  2.8750, -0.2655],
        [ 2.4111,  1.9198, -0.4865],
        [ 1.1548,  3.7201, -1.4574],
        [-1.7468,  3.1050, -1.7632],
        [-2.5379,  4.2551, -0.7124],
        [-0.8323,  4.4154, -1.0565],
        [-2.5283,  1.3046, -0.3811],
        [-2.1827,  1.3046,  1.3317],
        [-3.3194,  2.4547,  0.6698],
        [ 2.1827,  1.3046, -1.3317],
        [ 3.3194,  2.4548, -0.6698],
        [ 2.5283,  1.3046,  0.3811],
        [ 0.9264,  3.1050, -2.3026],
        [ 0.3575,  4.4154, -1.2966],
        [ 2.0631,  4.2551, -1.6407],
        [ 2.0800, -0.0000, -0.0000],
        [ 2.71

In [4]:
from scipy.spatial.distance import pdist, squareform

def create_graph(coordinates, threshold=1.5):
    num_atoms = coordinates.shape[0]
    edge_index = []
    
    # Calculate pairwise distances
    distances = squareform(pdist(coordinates))
    
    # Create edges based on the distance threshold
    for i in range(num_atoms):
        for j in range(i + 1, num_atoms):
            if distances[i, j] <= threshold:
                edge_index.append([i, j])
                edge_index.append([j, i])
    
    edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
    return edge_index

# Create edge index
edge_index = create_graph(coordinates)
print("Edge Index:", edge_index)
print("Edge Index Shape:", edge_index.shape)

Edge Index: tensor([[ 2,  3,  2,  4,  3,  8,  3,  9,  3, 10,  4, 11,  4, 12,  4, 13,  5,  6,
          5,  7,  6, 14,  6, 15,  6, 16,  7, 17,  7, 18,  7, 19, 10, 18, 11, 26,
         11, 27, 12, 28, 14, 22, 16, 20, 16, 21, 20, 21, 20, 22, 23, 24, 23, 25,
         26, 27, 26, 28, 29, 30, 29, 31, 32, 33, 32, 34],
        [ 3,  2,  4,  2,  8,  3,  9,  3, 10,  3, 11,  4, 12,  4, 13,  4,  6,  5,
          7,  5, 14,  6, 15,  6, 16,  6, 17,  7, 18,  7, 19,  7, 18, 10, 26, 11,
         27, 11, 28, 12, 22, 14, 20, 16, 21, 16, 21, 20, 22, 20, 24, 23, 25, 23,
         27, 26, 28, 26, 30, 29, 31, 29, 33, 32, 34, 32]])
Edge Index Shape: torch.Size([2, 66])


In [10]:
import torch
from torch_geometric.data import Data

# Assuming `data` is the Data object created previously
def save_graph_data(data, file_path):
    torch.save(data, file_path)
    print(f"Graph data saved to {file_path}")

# Example usage
save_path = 'graph_data.pth'
save_graph_data(edge_index, save_path)



Graph data saved to graph_data.pth


In [5]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
    def __init__(self, num_node_features, hidden_channels, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, num_classes)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

# Example usage
num_node_features = 3  # Assuming 3 features per node (x, y, z coordinates)
hidden_channels = 16
num_classes = 3  # Example number of output classes, modify as needed

model = GCN(num_node_features, hidden_channels, num_classes)
print(model)


GCN(
  (conv1): GCNConv(3, 16)
  (conv2): GCNConv(16, 3)
)


In [7]:
from torch_geometric.data import Data
# Convert coordinates to tensor
coordinates_tensor = torch.tensor(coordinates, dtype=torch.float)

# Create the graph data object
data = Data(x=coordinates_tensor, edge_index=edge_index)

# Training the model (example)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

# Example training loop
model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    # Assuming labels are available (for simplicity, using dummy labels here)
    labels = torch.randint(0, num_classes, (coordinates_tensor.size(0),))
    loss = criterion(out, labels)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')


Epoch 0, Loss: 1.1623022556304932
Epoch 10, Loss: 1.1320455074310303
Epoch 20, Loss: 1.0984742641448975
Epoch 30, Loss: 1.1089043617248535
Epoch 40, Loss: 1.1205363273620605
Epoch 50, Loss: 1.1085790395736694
Epoch 60, Loss: 1.0914461612701416
Epoch 70, Loss: 1.0596809387207031
Epoch 80, Loss: 1.1232807636260986
Epoch 90, Loss: 1.1249876022338867
Epoch 100, Loss: 1.1060377359390259
Epoch 110, Loss: 1.1225318908691406
Epoch 120, Loss: 1.0868167877197266
Epoch 130, Loss: 1.1160945892333984
Epoch 140, Loss: 1.0989574193954468
Epoch 150, Loss: 1.1189559698104858
Epoch 160, Loss: 1.1121286153793335
Epoch 170, Loss: 1.0843645334243774
Epoch 180, Loss: 1.107606053352356
Epoch 190, Loss: 1.1289457082748413
