Size-extensive neural net

this neural net takes in molecules-embs of different sizes makes a proposition about each atomistic contribution to a size-extensive property


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torch.nn.functional as F
import pandas as pd


# Create the neural network instance
class atomwise_nn(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(atomwise_nn, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        # Define layers
        self.fc1 = nn.Linear(input_size, hidden_size).double()
        self.fc2 = nn.Linear(hidden_size, output_size).double()

    def forward(self, x):
        mp = 0
        for each_atomemb in range(len(x)):
            # Forward pass through the network
            emb = x[each_atomemb]
            emb = F.relu(self.fc1(emb))
            emb = self.fc2(emb)

            mp = mp + emb

        return mp

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Define hyperparameters
input_size = 128
hidden_size = 200
output_size = 1
learning_rate = 0.001
num_epochs = 10000
num_train_samples = 450
num_val_samples = 100

model = atomwise_nn(input_size, hidden_size, output_size)

# Define the loss function (criterion) and the optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)



In [3]:

embspath = '../data/datasets/embsMP/embslayer5.csv'
mps_path = '../data/datasets/embsMP//mps.csv'
embs = pd.read_csv(embspath)
mps_true = pd.read_csv(mps_path)

embs_128 = embs.iloc[:,0:128].values
normalize_embs_128 = nn.BatchNorm1d(128).double()
embs_128norm = normalize_embs_128(torch.tensor(embs_128))

embs_norm = np.hstack((embs_128norm.detach().numpy(),embs.iloc[:,128:].values))

In [4]:
print(np.shape(embs_norm))

(18733, 138)


In [5]:


# Training loop
for epoch in range(num_epochs):
    total_train_loss = 0.0
    model.train()
    train_loss = []
    val_loss = []
    for each_molecule in range(num_train_samples):
        # Generate random data for each batch
        X = embs_norm[embs_norm[:,128] == each_molecule]
        X = X[:,0:128]
        y = mps_true.iloc[each_molecule].values
        
        # Convert data to PyTorch tensors
        X_tensor = torch.tensor(X)
        y_tensor = torch.tensor(y)

        # Zero gradients, forward pass, backward pass, and update weights
        optimizer.zero_grad()
        output = model(X_tensor)

        loss = criterion(output, y_tensor)
        loss.backward()
        optimizer.step()

        # Accumulate loss for this epoch
        total_train_loss += loss.item()

    model.eval()
    outputs = []
    truths = []
    total_val_loss = 0.0
    with torch.no_grad():
        for each_molecule in range(num_val_samples):
            # Generate random data for each batch
            X = embs_norm[embs_norm[:,128] == each_molecule]
            X = X[:,0:128]
            y = mps_true.iloc[each_molecule].values
            
            # Convert data to PyTorch tensors
            X_tensor = torch.tensor(X)
            y_tensor = torch.tensor(y)

            # Zero gradients, forward pass, backward pass, and update weights
            optimizer.zero_grad()
            output = model(X_tensor)

            loss = criterion(output, y_tensor)

            # Accumulate loss for this epoch
            total_val_loss += loss.item()            

            outputs.append(output)
            truths.append(y)

    # Print the average loss for this epoch
    average_train_loss = total_train_loss / num_train_samples
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {average_train_loss:.4f}")

    # Print the average loss for this epoch
    average_val_loss = total_val_loss / num_val_samples
    print(f"Epoch {epoch+1}/{num_epochs}, Val Loss: {average_val_loss:.4f}")
    
    train_loss.append([epoch,average_train_loss])
    val_loss.append([epoch,average_val_loss])

Epoch 1/10000, Train Loss: 9266.9003
Epoch 1/10000, Val Loss: 4582.9909
Epoch 2/10000, Train Loss: 5647.0260
Epoch 2/10000, Val Loss: 4025.1713
Epoch 3/10000, Train Loss: 5052.5312
Epoch 3/10000, Val Loss: 3671.9804
Epoch 4/10000, Train Loss: 4700.9276
Epoch 4/10000, Val Loss: 3437.6505
Epoch 5/10000, Train Loss: 4457.1018
Epoch 5/10000, Val Loss: 3272.9053
Epoch 6/10000, Train Loss: 4266.0710
Epoch 6/10000, Val Loss: 3161.3833
Epoch 7/10000, Train Loss: 4108.9616
Epoch 7/10000, Val Loss: 3085.4127
Epoch 8/10000, Train Loss: 3973.0461
Epoch 8/10000, Val Loss: 3030.2388
Epoch 9/10000, Train Loss: 3853.3538
Epoch 9/10000, Val Loss: 2994.3806
Epoch 10/10000, Train Loss: 3746.0587
Epoch 10/10000, Val Loss: 2968.5624
Epoch 11/10000, Train Loss: 3654.6674
Epoch 11/10000, Val Loss: 2945.7042
Epoch 12/10000, Train Loss: 3565.5008
Epoch 12/10000, Val Loss: 2926.2675
Epoch 13/10000, Train Loss: 3489.6496
Epoch 13/10000, Val Loss: 2903.6343
Epoch 14/10000, Train Loss: 3415.9077
Epoch 14/10000, Va