In [1]:
import torch
from torch_geometric.data import InMemoryDataset, download_url, Data, Batch
from torch import nn
from torch.nn import functional as F
import os
import pandas as pd
import numpy as np
import pickle
import itertools
import jax
from jax import numpy as jnp
import networkx as nx
from scipy.spatial.distance import pdist, squareform
from sklearn.preprocessing import MinMaxScaler
import mendeleev
import ray
from ray import tune
from ray.tune.schedulers import ASHAScheduler, HyperBandForBOHB
from ray.tune.suggest.bohb import TuneBOHB
import os

In [2]:
import os
from torch.utils.data import TensorDataset, DataLoader

In [3]:
files_train = os.listdir("/mnt/10edb508-27ad-4f92-9467-37a536784b53/temp/train/")

In [4]:
data = []
labels = []
directory = "/mnt/10edb508-27ad-4f92-9467-37a536784b53/temp/train/"
files_train = os.listdir(directory)
for file in np.sort(files_train):
    path = os.path.join(directory, file)
    if file[0] == "d":
        data.append(pd.read_hdf(path))
    else:
        labels.append(pd.read_hdf(path))

In [5]:
y_train = pd.concat(labels)
X_train = pd.concat(data)

In [6]:
data = []
labels = []
directory = "/mnt/10edb508-27ad-4f92-9467-37a536784b53/temp/test/"
files_test = os.listdir(directory)
for file in np.sort(files_test):
    path = os.path.join(directory, file)
    if file[0] == "d":
        data.append(pd.read_hdf(path))
    else:
        labels.append(pd.read_hdf(path))

In [7]:
X_temp_train = X_train[(y_train[0] == 0).to_numpy()]
y_temp_train = y_train[(y_train[0] == 0).to_numpy()]

In [8]:
y_test = pd.concat(labels)
X_test = pd.concat(data)

In [9]:
X_temp_test = X_test[(y_test[0] == 0).to_numpy()]
y_temp_test = y_test[(y_test[0] == 0).to_numpy()]

In [10]:
train_dataset = TensorDataset(torch.Tensor(X_temp_train.to_numpy()),torch.Tensor(y_temp_train.to_numpy())) # create your datset
train_dataloader = DataLoader(train_dataset, num_workers=12, batch_size=4096, shuffle=True, drop_last=True)
test_dataset = TensorDataset(torch.Tensor(X_temp_test.to_numpy()),torch.Tensor(y_temp_test.to_numpy())) # create your datset
test_dataloader = DataLoader(test_dataset, num_workers=12, batch_size=4096, shuffle=True)

In [11]:
del X_train, X_test, data, labels

In [12]:
# from sklearn.ensemble import RandomForestRegressor
# from sklearn.metrics import mean_absolute_error

In [13]:
# reg = RandomForestRegressor(n_estimators=8, max_depth = 16)

In [14]:
# reg.fit(X_temp_train,y_temp_train)
# y_pred = reg.predict(X_temp_test)
# print(mean_absolute_error(y_pred, y_temp_test))

In [15]:
def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.01)

class ResNetGated(nn.Module):
    def __init__(self, init_dim, hidden_dim, layers, p_dropout):
        super().__init__()
        self.p_dropout = p_dropout
        assert layers > 0
        self.layers = nn.ModuleList([nn.Sequential(nn.Linear(init_dim, hidden_dim),
                             nn.ReLU(),
                             nn.BatchNorm1d(hidden_dim),
                             nn.Linear( hidden_dim, init_dim)) for i in range(layers)])
        self.gates = nn.Parameter(torch.Tensor(layers))
        self.layers.apply(init_weights)
    def forward(self, x):
        range_gates = torch.sigmoid(self.gates)
        for i, layer in enumerate(self.layers):
            x = F.relu(x)
            x = (range_gates[i])*layer(x) + (1-range_gates[i])*x
        return x

    
class Net(torch.nn.Module):
    def __init__(self, init_dim, hidden_dim, hidden_width, layers, p_dropout):
        super().__init__()
        self.dr = nn.Sequential(nn.Linear(init_dim, 512),
                               nn.ReLU(),
                               nn.Dropout(p=p_dropout),
                               nn.Linear(512, hidden_dim))
        # self.ad = nn.Linear(init_dim, hidden_dim)
        self.fc = ResNetGated(hidden_dim, hidden_width, layers, p_dropout)
        self.reg = nn.Linear(hidden_dim, 1)
        self.fc.apply(init_weights)
        self.reg.apply(init_weights)
        self.dr.apply(init_weights)
    def forward(self, x):
        x = self.dr(x)
        x = self.fc(x)
        x = self.reg(x)
        return x

In [19]:
def train(model, device, data ,loss_fn, optimizer):
    model.train()
    train_losses = []
    optimizer.zero_grad()
    for i, batch in enumerate(data):
        data, labels = batch[0].to(device), batch[1][:, 1].to(device)
        logits = model(data)
        loss = loss_fn()
        output=loss(logits.squeeze(), labels.squeeze())
        output.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 3)
        optimizer.step()
        train_loss = output.data.cpu().numpy()
        train_losses.append(train_loss)    
    return np.mean(train_losses)

### Testing function
def test(model, device, data, loss_fn):
    # Set evaluation mode for encoder and decoder
    model.eval()
    test_losses = []
    with torch.no_grad(): # No need to track the gradients
        for i, batch in enumerate(data):
            data, labels = batch[0].to(device), batch[1][:, 1].to(device)
            logits = model(data)
            loss = loss_fn()
            output=loss(logits.squeeze(), labels.squeeze())
            test_loss = output.data.cpu().numpy()
            test_losses.append(test_loss)  
    return np.mean(test_losses)

In [22]:
### Define the loss function
loss_fn = nn.MSELoss

lr= 0.0001
weight_decay = 0.00001
hidden_dim = 256
hidden_width=4096
layers=2
p_dropout = 0.05
### Set the random seed for reproducible results
torch.manual_seed(0)

model = Net(776, hidden_dim, hidden_width, layers, p_dropout=p_dropout)
params_to_optimize = [
    {'params': model.parameters()}
]

optim = torch.optim.Adam(params_to_optimize, lr=lr, weight_decay=weight_decay)
# Check if the GPU is available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f'Selected device: {device}')

# Move both the encoder and the decoder to the selected device
model.to(device)

Selected device: cuda


Net(
  (dr): Sequential(
    (0): Linear(in_features=776, out_features=512, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.05, inplace=False)
    (3): Linear(in_features=512, out_features=256, bias=True)
  )
  (fc): ResNetGated(
    (layers): ModuleList(
      (0): Sequential(
        (0): Linear(in_features=256, out_features=4096, bias=True)
        (1): ReLU()
        (2): BatchNorm1d(4096, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (3): Linear(in_features=4096, out_features=256, bias=True)
      )
      (1): Sequential(
        (0): Linear(in_features=256, out_features=4096, bias=True)
        (1): ReLU()
        (2): BatchNorm1d(4096, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (3): Linear(in_features=4096, out_features=256, bias=True)
      )
    )
  )
  (reg): Linear(in_features=256, out_features=1, bias=True)
)

In [31]:
import datetime
date = datetime.datetime.today().strftime('%Y-%m-%d-%H:%M:%S')

RUN = date

In [23]:
best_loss = 1000
num_epochs = 100
diz_loss = {'train_loss':[],'val_loss':[]}
decay = 0.98
for epoch in range(num_epochs):
    if epoch == 0:
        for par in optim.param_groups:
            par["lr"] = 0.0001
    elif epoch< 10:
        for par in optim.param_groups:
            par["lr"] = par["lr"] * 1.2
    else:
        for par in optim.param_groups:
            par["lr"] = par["lr"] * decay
    train_loss = train(model, device, train_dataloader, loss_fn, optim)
    test_loss = test(model, device, test_dataloader, loss_fn)
    print('\n EPOCH {}/{} \t train loss {} \t \t val loss {}'.format(epoch + 1, num_epochs, train_loss, test_loss))
    diz_loss['train_loss'].append(train_loss)
    diz_loss['val_loss'].append(test_loss)
    if test_loss < best_loss:
        best_loss = test_loss
        torch.save(model.state_dict(), "./saved_models/type0_{}.pth".format(RUN))


 EPOCH 1/100 	 train loss 947.40966796875 	 	 val loss 12.255552291870117

 EPOCH 2/100 	 train loss 11.25273323059082 	 	 val loss 23.75988006591797

 EPOCH 3/100 	 train loss 10.218965530395508 	 	 val loss 16.986724853515625

 EPOCH 4/100 	 train loss 11.1924409866333 	 	 val loss 8.492461204528809

 EPOCH 5/100 	 train loss 9.576630592346191 	 	 val loss 16.53786277770996

 EPOCH 6/100 	 train loss 12.072429656982422 	 	 val loss 7.526257514953613

 EPOCH 7/100 	 train loss 11.48862075805664 	 	 val loss 6.685426235198975

 EPOCH 8/100 	 train loss 12.567367553710938 	 	 val loss 22.32969093322754

 EPOCH 9/100 	 train loss 13.027607917785645 	 	 val loss 6.411657333374023

 EPOCH 10/100 	 train loss 12.463861465454102 	 	 val loss 5.2877631187438965

 EPOCH 11/100 	 train loss 10.283299446105957 	 	 val loss 9.738157272338867

 EPOCH 12/100 	 train loss 10.022004127502441 	 	 val loss 4.517904281616211

 EPOCH 13/100 	 train loss 10.027518272399902 	 	 val loss 8.715250015258789


In [24]:
train_dataloader = DataLoader(train_dataset, num_workers=12, batch_size=9192, shuffle=True, drop_last=True)

In [32]:
diz_loss = {'train_loss':[],'val_loss':[]}
decay = 0.98
num_epochs = 200
size = 4096
for epoch in range(num_epochs):
    if epoch%50 == 0:
        for par in optim.param_groups:
            par["weight_decay"] = par["weight_decay"]/2
            size = size * 2
            train_dataloader = DataLoader(train_dataset, num_workers=12, batch_size=size, shuffle=True, drop_last=True)
    train_loss = train(model, device, train_dataloader, loss_fn, optim)
    test_loss = test(model, device, test_dataloader, loss_fn)
    print('\n EPOCH {}/{} \t train loss {} \t \t val loss {}'.format(epoch + 1, num_epochs, train_loss, test_loss))
    diz_loss['train_loss'].append(train_loss)
    diz_loss['val_loss'].append(test_loss)
    if test_loss < best_loss:
        best_loss = test_loss
        torch.save(model.state_dict(), "./saved_models/type0_{}.pth".format(RUN))


 EPOCH 1/200 	 train loss 1.0806183815002441 	 	 val loss 1.3378956317901611

 EPOCH 2/200 	 train loss 1.122038722038269 	 	 val loss 1.2559125423431396

 EPOCH 3/200 	 train loss 1.1041810512542725 	 	 val loss 1.6684787273406982

 EPOCH 4/200 	 train loss 1.1207603216171265 	 	 val loss 1.3649799823760986

 EPOCH 5/200 	 train loss 1.0891923904418945 	 	 val loss 1.2943757772445679

 EPOCH 6/200 	 train loss 1.0935049057006836 	 	 val loss 1.2516168355941772

 EPOCH 7/200 	 train loss 1.1176207065582275 	 	 val loss 1.5039525032043457

 EPOCH 8/200 	 train loss 1.0759211778640747 	 	 val loss 1.6000818014144897

 EPOCH 9/200 	 train loss 1.1012513637542725 	 	 val loss 1.4496575593948364

 EPOCH 10/200 	 train loss 1.1134144067764282 	 	 val loss 1.2922626733779907

 EPOCH 11/200 	 train loss 1.0981831550598145 	 	 val loss 1.493839144706726

 EPOCH 12/200 	 train loss 1.1291348934173584 	 	 val loss 1.2377830743789673

 EPOCH 13/200 	 train loss 1.1203107833862305 	 	 val loss 1.3

RuntimeError: CUDA out of memory. Tried to allocate 32.00 MiB (GPU 0; 11.75 GiB total capacity; 2.58 GiB already allocated; 24.75 MiB free; 2.68 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [33]:
decay = 0.98
num_epochs = 500
size = 4096
for epoch in range(num_epochs):
    # if epoch%50 == 0:
    #     for par in optim.param_groups:
    #         par["weight_decay"] = par["weight_decay"]/2
    #         size = size * 2
    #         train_dataloader = DataLoader(train_dataset, num_workers=12, batch_size=size, shuffle=True, drop_last=True)
    train_loss = train(model, device, train_dataloader, loss_fn, optim)
    test_loss = test(model, device, test_dataloader, loss_fn)
    print('\n EPOCH {}/{} \t train loss {} \t \t val loss {}'.format(epoch + 1, num_epochs, train_loss, test_loss))
    diz_loss['train_loss'].append(train_loss)
    diz_loss['val_loss'].append(test_loss)
    if test_loss < best_loss:
        best_loss = test_loss
        torch.save(model.state_dict(), "./saved_models/type0_{}.pth".format(RUN))


 EPOCH 1/500 	 train loss 0.8321484327316284 	 	 val loss 1.1664091348648071

 EPOCH 2/500 	 train loss 0.9137967824935913 	 	 val loss 1.196007490158081

 EPOCH 3/500 	 train loss 0.8796329498291016 	 	 val loss 1.2180395126342773

 EPOCH 4/500 	 train loss 0.8323535919189453 	 	 val loss 1.187269926071167

 EPOCH 5/500 	 train loss 0.8880458474159241 	 	 val loss 1.1900079250335693

 EPOCH 6/500 	 train loss 0.8844610452651978 	 	 val loss 1.1753665208816528

 EPOCH 7/500 	 train loss 1.0223983526229858 	 	 val loss 1.5502612590789795

 EPOCH 8/500 	 train loss 0.9309446215629578 	 	 val loss 1.3624950647354126

 EPOCH 9/500 	 train loss 0.8570320010185242 	 	 val loss 1.2701430320739746

 EPOCH 10/500 	 train loss 0.8738130331039429 	 	 val loss 1.1969883441925049

 EPOCH 11/500 	 train loss 1.0300955772399902 	 	 val loss 1.6159136295318604

 EPOCH 12/500 	 train loss 0.8816038370132446 	 	 val loss 1.2026729583740234

 EPOCH 13/500 	 train loss 0.8881822824478149 	 	 val loss 1.1