In [10]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import KFold
import numpy as np
import matplotlib.pyplot as plt

from datasets import *
from models import *
from train import *

# Main
if __name__ == "__main__":
    # Hyperparameters
    input_dim = 61  # Feature size
    hidden_dim_gcn = 1
    hidden_dim_mlp = 3
    output_dim = 1
    batch_size = 15
    epochs = 1
    learning_rate = 0.0001

    # Load dataset
    dataset_path = "C:/Users/user/Downloads/VAE_GCN/CSD_EES_DB_with_ST_split.csv"
    dataset = MolecularDataset(dataset_path, target_col="ST_split")
    sample = dataset[0]  # Get the first sample
    print("Sample length:", len(sample))  # Should be 5
    print("Sample content:", sample)  


    # K-Fold Cross-Validation
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    
    fold = 1

    for train_index, val_index in kf.split(dataset):
        print(f"Starting Fold {fold}...")

        train_subset = Subset(dataset, train_index)
        val_subset = Subset(dataset, val_index)

        train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)

        # Initialize model, loss, and optimizer
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        gcn_model = GCN(input_dim, hidden_dim_gcn).to(device)
        mlp_model = MLP(hidden_dim_gcn, hidden_dim_mlp, output_dim).to(device)
        criterion = nn.MSELoss()
        optimizer = optim.Adam(list(gcn_model.parameters()) + list(mlp_model.parameters()), lr=learning_rate)

        # Train and Validate
        train_mse_list, val_mse_list = train_and_validate(gcn_model, mlp_model, train_loader, val_loader, optimizer, epochs, fold, low_energy_weight=1.0)

        # Save MSE values to CSV
        mse_df = pd.DataFrame({
            "Epoch": list(range(1, epochs + 1)),
            "Train_MSE": train_mse_list,
            "Validation_MSE": val_mse_list
        })
        mse_csv_path = f"C:/Users/user/Downloads/VAE_GCN/VAE_GCN_results/MSE/mse_train_validation_fold_{fold}.csv"
        mse_df.to_csv(mse_csv_path, index=False)
        print(f"MSE values for Fold {fold} saved to '{mse_csv_path}'")

        # Save the trained model
        model_path = f"C:/Users/user/Downloads/VAE_GCN/VAE_GCN_results/models/gcn_model_fold_{fold}.pth"
        torch.save(gcn_model.state_dict(), model_path)
        print(f"Model for Fold {fold} saved to '{model_path}'")

        # Save MSE values and plots
        plt.figure(figsize=(10, 5))
        plt.plot(range(1, epochs + 1), train_mse_list, label="Train MSE")
        plt.plot(range(1, epochs + 1), val_mse_list, label="Validation MSE")
        plt.xlabel("Epochs")
        plt.ylabel("Mean Squared Error (eV$^{2}$)")
        plt.title(f"Train and Validation MSE Over Epochs (Fold {fold})")
        plt.legend()
        plot_path = f"C:/Users/user/Downloads/VAE_GCN/VAE_GCN_results/MSE/train_val_mse_fold_{fold}.png"
        plt.savefig(plot_path)
        print(f"Train and Validation MSE plot for Fold {fold} saved as '{plot_path}'")

        fold += 1




[15:22:04] Explicit valence for atom # 20 N, 4, is greater than permitted
[15:22:04] Explicit valence for atom # 31 N, 4, is greater than permitted
[15:22:04] Explicit valence for atom # 26 B, 4, is greater than permitted
[15:22:04] Explicit valence for atom # 26 N, 4, is greater than permitted
[15:22:04] Explicit valence for atom # 7 B, 4, is greater than permitted
[15:22:04] Explicit valence for atom # 10 N, 4, is greater than permitted
[15:22:04] Explicit valence for atom # 10 N, 4, is greater than permitted
[15:22:04] Explicit valence for atom # 10 N, 4, is greater than permitted
[15:22:04] Explicit valence for atom # 8 N, 4, is greater than permitted
[15:22:04] Explicit valence for atom # 12 N, 4, is greater than permitted
[15:22:04] Explicit valence for atom # 12 N, 4, is greater than permitted
[15:22:04] Explicit valence for atom # 12 N, 4, is greater than permitted
[15:22:04] Explicit valence for atom # 12 N, 4, is greater than permitted
[15:22:04] Explicit valence for atom # 2

Sample length: 4
Sample content: (tensor([[0.6667, 0.2887, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.2887, 0.5000, 0.2041,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.2041, 0.3333,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 1.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 1.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 1.0000]]), tensor([[0., 0., 0.,  ..., 0., 0., 1.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]]), tensor(0.9112), tensor(36))
Using device: cpu
Starting Fold 1...




Epoch [1/1], Train Loss: 2.1366, Val Loss: 1.7992


ValueError: not enough values to unpack (expected 5, got 4)