In [9]:
import os
import csv
import torch
import torch.nn.functional as F
import numpy as np
from torch_geometric.datasets import QM9
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv, global_mean_pool

In [10]:
def read_xyz(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
    
    atom_count = int(lines[0].strip())
    comment = lines[1].strip()
    
    atom_types = []
    coordinates = []
    for line in lines[2:]:
        parts = line.split()
        if len(parts) == 4:
            atom_types.append(parts[0])
            coordinates.append([float(parts[1]), float(parts[2]), float(parts[3])])
    
    return atom_types, np.array(coordinates)

# Example usage
file_path = '/Users/victorsu-ortiz/Desktop/Fe-Boryl_complexes/data/xyz_molsimp/monosubstituted_0001.xyz'
atom_types, coordinates = read_xyz(file_path)
tensor = torch.tensor(coordinates)

print("Atom Types:", atom_types)
print("Coordinates Tensor:", tensor)


Atom Types: ['Fe', 'B', 'N', 'C', 'C', 'N', 'C', 'C', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'O', 'H', 'H', 'O', 'H', 'H', 'O', 'H', 'H', 'O', 'H', 'H', 'O', 'H', 'H']
Coordinates Tensor: tensor([[ 0.0000,  0.0000,  0.0000],
        [ 0.0000,  2.1000, -0.0000],
        [-1.3158,  2.8750,  0.2655],
        [-1.6295,  3.7201, -0.8956],
        [-2.4111,  1.9197,  0.4865],
        [ 1.3158,  2.8750, -0.2655],
        [ 2.4111,  1.9198, -0.4865],
        [ 1.1548,  3.7201, -1.4574],
        [-1.7468,  3.1050, -1.7632],
        [-2.5379,  4.2551, -0.7124],
        [-0.8323,  4.4154, -1.0565],
        [-2.5283,  1.3046, -0.3811],
        [-2.1827,  1.3046,  1.3317],
        [-3.3194,  2.4547,  0.6698],
        [ 2.1827,  1.3046, -1.3317],
        [ 3.3194,  2.4548, -0.6698],
        [ 2.5283,  1.3046,  0.3811],
        [ 0.9264,  3.1050, -2.3026],
        [ 0.3575,  4.4154, -1.2966],
        [ 2.0631,  4.2551, -1.6407],
        [ 2.0800, -0.0000, -0.0000],
        [ 2.71

In [16]:
from scipy.spatial.distance import pdist, squareform

def create_graph(coordinates, threshold=1.5):
    num_atoms = coordinates.shape[0]
    edge_index = []
    
    # Calculate pairwise distances
    distances = squareform(pdist(coordinates))
    
    # Create edges based on the distance threshold
    for i in range(num_atoms):
        for j in range(i + 1, num_atoms):
            if distances[i, j] <= threshold:
                edge_index.append([i, j])
                edge_index.append([j, i])
    
    edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
    return edge_index

# Create edge index
edge_index = create_graph(coordinates)
print("Edge Index:", edge_index)
print("Edge Index Shape:", edge_index.shape)

Edge Index: tensor([[ 2,  3,  2,  4,  3,  8,  3,  9,  3, 10,  4, 11,  4, 12,  4, 13,  5,  6,
          5,  7,  6, 14,  6, 15,  6, 16,  7, 17,  7, 18,  7, 19, 10, 18, 11, 26,
         11, 27, 12, 28, 14, 22, 16, 20, 16, 21, 20, 21, 20, 22, 23, 24, 23, 25,
         26, 27, 26, 28, 29, 30, 29, 31, 32, 33, 32, 34],
        [ 3,  2,  4,  2,  8,  3,  9,  3, 10,  3, 11,  4, 12,  4, 13,  4,  6,  5,
          7,  5, 14,  6, 15,  6, 16,  6, 17,  7, 18,  7, 19,  7, 18, 10, 26, 11,
         27, 11, 28, 12, 22, 14, 20, 16, 21, 16, 21, 20, 22, 20, 24, 23, 25, 23,
         27, 26, 28, 26, 30, 29, 31, 29, 33, 32, 34, 32]])
Edge Index Shape: torch.Size([2, 66])


In [17]:
import torch
from torch_geometric.data import Data

# Assuming `data` is the Data object created previously
def save_graph_data(data, file_path):
    torch.save(data, file_path)
    print(f"Graph data saved to {file_path}")

# Example usage
save_path = './data/torch_processed/graph_data.pth'
save_graph_data(edge_index, save_path)



Graph data saved to ./data/torch_processed/graph_data.pth


In [11]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
    def __init__(self, num_node_features, hidden_channels, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, num_classes)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

# Example usage
num_node_features = 3  # Assuming 3 features per node (x, y, z coordinates)
hidden_channels = 16
num_classes = 3  # Example number of output classes, modify as needed

model = GCN(num_node_features, hidden_channels, num_classes)
print(model)


GCN(
  (conv1): GCNConv(3, 16)
  (conv2): GCNConv(16, 3)
)


In [14]:
from torch_geometric.data import Data
# Convert coordinates to tensor
coordinates_tensor = torch.tensor(coordinates, dtype=torch.float)

# Create the graph data object
data = Data(x=coordinates_tensor, edge_index=edge_index)

# Training the model (example)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.MSELoss()

val_losses = []

# Example training loop
model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    # Assuming labels are available (for simplicity, using dummy labels here)
    labels = torch.randint(0, num_classes, (coordinates_tensor.size(0),))
    loss = criterion(out, labels)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')
    val_losses.append([epoch+1, loss.item()])


Epoch 0, Loss: 1.0882610082626343
Epoch 10, Loss: 1.147044062614441
Epoch 20, Loss: 1.1178487539291382
Epoch 30, Loss: 1.1612259149551392
Epoch 40, Loss: 1.0923107862472534
Epoch 50, Loss: 1.073821783065796
Epoch 60, Loss: 1.0721259117126465
Epoch 70, Loss: 1.0980325937271118
Epoch 80, Loss: 1.0845186710357666
Epoch 90, Loss: 1.1043436527252197
Epoch 100, Loss: 1.086771011352539
Epoch 110, Loss: 1.103747844696045
Epoch 120, Loss: 1.083693265914917
Epoch 130, Loss: 1.0696842670440674
Epoch 140, Loss: 1.1011320352554321
Epoch 150, Loss: 1.0985819101333618
Epoch 160, Loss: 1.1353219747543335
Epoch 170, Loss: 1.1184213161468506
Epoch 180, Loss: 1.0772175788879395
Epoch 190, Loss: 1.1302759647369385


In [22]:
import torch
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv, global_mean_pool
import torch.nn.functional as F

# Example graph neural network model
class GCN(torch.nn.Module):
    def __init__(self, num_node_features, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(num_node_features, 16)
        self.conv2 = GCNConv(16, num_classes)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

# Load data from .pth file
data_path = '/Users/victorsu-ortiz/Desktop/Fe-Boryl_complexes/data/torch_processed/graph_data.pth'
data = torch.load(data_path)
edge_index = data

# Convert coordinates and labels to tensor
coordinates_tensor = torch.tensor(coordinates, dtype=torch.float)
labels_tensor = torch.tensor(labels, dtype=torch.long)

# Create the graph data object
data = Data(x=coordinates_tensor, edge_index=edge_index, y=labels_tensor)

# Initialize model, loss function, and optimizer
num_node_features = coordinates_tensor.size(1)
num_classes = len(set(labels))
model = GCN(num_node_features, num_classes)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

val_losses = []

# Training loop
model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = criterion(out, data.y)
    loss.backward()
    optimizer.step()
    
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')
    
    val_losses.append([epoch+1, loss.item()])

# Save model
torch.save(model.state_dict(), 'model.pth')

# Print validation losses
print("Validation Losses:", val_losses)


Epoch 0, Loss: 3.7778289318084717
Epoch 10, Loss: 2.2074599266052246
Epoch 20, Loss: 1.2938276529312134
Epoch 30, Loss: 0.9356461763381958
Epoch 40, Loss: 0.8730111718177795
Epoch 50, Loss: 0.8396367430686951
Epoch 60, Loss: 0.8275805711746216
Epoch 70, Loss: 0.8168777823448181
Epoch 80, Loss: 0.8089335560798645
Epoch 90, Loss: 0.8011478781700134
Epoch 100, Loss: 0.7924127578735352
Epoch 110, Loss: 0.7834661602973938
Epoch 120, Loss: 0.7728803753852844
Epoch 130, Loss: 0.7564015984535217
Epoch 140, Loss: 0.7386890649795532
Epoch 150, Loss: 0.724384069442749
Epoch 160, Loss: 0.7102684378623962
Epoch 170, Loss: 0.6959591507911682
Epoch 180, Loss: 0.6804864406585693
Epoch 190, Loss: 0.6644735336303711
Validation Losses: [[1, 3.7778289318084717], [2, 3.595705270767212], [3, 3.4188263416290283], [4, 3.2470011711120605], [5, 3.080409049987793], [6, 2.919478416442871], [7, 2.7642438411712646], [8, 2.614863872528076], [9, 2.4720184803009033], [10, 2.3361003398895264], [11, 2.2074599266052246],

  labels_tensor = torch.tensor(labels, dtype=torch.long)


In [15]:
os.makedirs(f'./val_losses/gcn_noxyz', exist_ok=True)
with open(f'./val_losses/gcn_noxyz/val_losses.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['Epoch', 'Validation Loss'])
    writer.writerows(val_losses)

In [23]:
import re

file_name = "monosubstituted_0001.xyz"

# Define the regex pattern to extract the number before the file extension
pattern = r'_(\d+)\.'

# Search for the pattern in the file name
match = re.search(pattern, file_name)

# Extract the number if found
if match:
    number = match.group(1)
    print(number)
else:
    print("No number found in the file name.")


0001


In [24]:
def save_graph_data(data, file_path):
    torch.save(data, file_path)
    print(f"Graph data saved to {file_path}")

save_graph_data({"id": "001", "tensor": edge_index}, "sample_0001")

Graph data saved to sample_0001
