In [1]:
import os
from torch_geometric.datasets import QM9

dataset_path = 'data/QM9'

if os.path.exists(dataset_path) and os.listdir(dataset_path):
    print("Dataset sudah ada, langsung load.")
else:
    print("Dataset tidak ditemukan, akan download terlebih dahulu.")

dataset = QM9(root=dataset_path)



Dataset sudah ada, langsung load.


In [2]:
import pandas as pd

df = pd.read_csv(os.path.join(dataset_path, "raw", "gdb9.sdf.csv"))
mol_id_to_u0 = {row['mol_id']: row['u0'] for _, row in df.iterrows()}
mol_id_to_u0

{'gdb_1': -40.47893,
 'gdb_2': -56.525887,
 'gdb_3': -76.404702,
 'gdb_4': -77.308427,
 'gdb_5': -93.411888,
 'gdb_6': -114.483613,
 'gdb_7': -79.764152,
 'gdb_8': -115.679136,
 'gdb_9': -116.609549,
 'gdb_10': -132.71815,
 'gdb_11': -153.787612,
 'gdb_12': -169.860788,
 'gdb_13': -119.052475,
 'gdb_14': -154.972731,
 'gdb_15': -154.960361,
 'gdb_16': -117.824798,
 'gdb_17': -153.742562,
 'gdb_18': -193.08834,
 'gdb_19': -209.159302,
 'gdb_20': -225.221461,
 'gdb_21': -158.342346,
 'gdb_22': -194.267232,
 'gdb_23': -153.459846,
 'gdb_24': -169.557758,
 'gdb_25': -185.648533,
 'gdb_26': -190.624631,
 'gdb_27': -206.721858,
 'gdb_28': -227.798785,
 'gdb_29': -155.908941,
 'gdb_30': -155.897345,
 'gdb_31': -172.006141,
 'gdb_32': -188.042067,
 'gdb_33': -191.810916,
 'gdb_34': -207.916786,
 'gdb_35': -193.075202,
 'gdb_36': -209.144909,
 'gdb_37': -229.013797,
 'gdb_38': -228.992613,
 'gdb_39': -158.340943,
 'gdb_40': -194.261089,
 'gdb_41': -194.254127,
 'gdb_42': -230.183076,
 'gdb_43':

In [3]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [4]:
torch.cuda.is_available()

True

In [5]:
torch.__version__

'2.5.1'

In [6]:
def get_u0(batch):
    return torch.tensor([mol_id_to_u0[mol_id] for mol_id in batch.name], dtype=torch.float).to(device).unsqueeze(1)

In [23]:
from torch_geometric.loader import DataLoader

train_dataset = dataset[:100000]
val_dataset = dataset[100000:110000]
test_dataset = dataset[110000:]

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [24]:

import torch.nn.functional as F
from torch_geometric.nn import SchNet

model = SchNet(hidden_channels=128, num_filters=128, num_interactions=6, num_gaussians=50, cutoff=10.0)
model = model.to(device)

In [25]:
train_loader.batch_size

64

In [27]:
from torch.optim import Adam

optimizer = Adam(model.parameters(), lr=0.001)
criterion = torch.nn.L1Loss()

def train(epoch):
    model.train()
    # print(f'Starting epoch {epoch}')  # Debug print
    for i, batch in enumerate(train_loader):
        batch = batch.to(device)
        optimizer.zero_grad()
        pred = model(batch.z, batch.pos, batch.batch)
        u0 = get_u0(batch)
        loss = criterion(pred, u0)
        loss.backward()
        optimizer.step()
    # print(f'Completed epoch {epoch}')  # Debug print

def validate():
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in val_loader:
            batch = batch.to(device)
            pred = model(batch.z, batch.pos, batch.batch)
            u0 = get_u0(batch)
            loss = criterion(pred, u0)
            total_loss += loss.item()
    return total_loss / len(val_loader)

for epoch in range(1, 11):
    print(f'Epoch #{epoch}', end=" ")
    train(epoch)
    val_loss = validate()
    print(f', Validation Loss: {val_loss:.4f}')

Epoch #1 , Validation Loss: 1.3183
Epoch #2 , Validation Loss: 0.9978
Epoch #3 , Validation Loss: 0.8506
Epoch #4 , Validation Loss: 0.9264
Epoch #5 , Validation Loss: 1.0904
Epoch #6 , Validation Loss: 1.1781
Epoch #7 , Validation Loss: 0.7436
Epoch #8 , Validation Loss: 0.3383
Epoch #9 , Validation Loss: 0.5226
Epoch #10 , Validation Loss: 1.7867
