In [1]:
import os

import numpy as np
import pandas as pd

import torch
import torch.nn.functional as F
import torch.optim as optim


from datetime import datetime
from torch import nn
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import MessagePassing
from torch_scatter import scatter
from torch.utils.tensorboard import SummaryWriter

from DataClasses import lmdb_dataset, Dataset
from ModelFunctions import train, evaluate, inference

In [2]:
import sys
sys.path.append(os.path.expanduser('../../ocp/ocpmodels/models'))

from spinconv import spinconv

In [3]:
#вызывается каждый раз, когда датасет отдаёт элемент (систему)
#делаем из данных матрицу векторов-атомов, список рёбер (edge_index) и матрицу векторов-рёбер; надо писать свою функцию для каждой сети
def preprocessing(system):
    return Data(**system)

In [4]:
#config
batch_size = 50
num_workers = 0

features_cols = ['feature_1']

target_col = 'y_relaxed'
lr = 0.001
epochs = 20

In [5]:
#чтобы тензор по умолчанию заводился на куде
if torch.cuda.is_available():
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
    print('cuda')

In [6]:
#set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  
print(device)

cpu


In [7]:
#инициализируем тренировочный датасети и тренировочный итератор
train_dataset_file_path= os.path.expanduser("../../ocp_datasets/data/is2re/10k/train/data_mod2.lmdbz")

training_set = Dataset(train_dataset_file_path, features_cols, target_col, preprocessing=preprocessing)
training_generator = DataLoader(training_set, batch_size=batch_size, num_workers=num_workers)

{'map_addr': 0, 'map_size': 3100106752, 'last_pgno': 756861, 'last_txnid': 10000, 'max_readers': 1000, 'num_readers': 0}


In [8]:
#инициализируем валидационный датасет и валидационный итератор
val_dataset_file_path = os.path.expanduser("../../ocp_datasets/data/is2re/all/val_ood_both/data_mod2.lmdbz")

valid_set = Dataset(val_dataset_file_path, features_cols, target_col, preprocessing=preprocessing)
valid_generator = DataLoader(valid_set, batch_size=batch_size, num_workers=num_workers)

{'map_addr': 0, 'map_size': 8066416640, 'last_pgno': 1969339, 'last_txnid': 24987, 'max_readers': 1000, 'num_readers': 0}


In [9]:
try:
    lmdb_dataset(train_dataset_file_path).describe()
except:
    pass

{'map_addr': 0, 'map_size': 3100106752, 'last_pgno': 756861, 'last_txnid': 10000, 'max_readers': 1000, 'num_readers': 0}
item: 0
atomic_numbers:...........      [86]
cell:..................... [1, 3, 3]
cell_offsets:............. [2964, 3]
cell_offsets_new:......... [1214, 3]
contact_solid_angles:.....    [1214]
direct_neighbor:..........    [1214]
distances:................    [2964]
distances_new:............    [1214]
edge_angles:..............       607
edge_index:............... [2, 2964]
edge_index_new:........... [2, 1214]
fixed:....................      [86]
force:....................   [86, 3]
natoms:...................        86
pos:......................   [86, 3]
pos_relaxed:..............   [86, 3]
sid:......................   2472718
spherical_domain_radii:...      [86]
tags:.....................      [86]
voronoi_surface_areas:....      [86]
voronoi_volumes:..........      [86]
y_init:...................    6.2825
y_relaxed:................   -0.0256


In [10]:
#model
model = spinconv(None, None, 1, otf_graph=True, regress_forces=False)

#optimizer and loss
optimizer = optim.AdamW(model.parameters(), lr=lr)
criterion = nn.L1Loss()

#переносим на куду если она есть
model = model.to(device)
criterion = criterion.to(device)

In [11]:
timestamp = str(datetime.now().strftime("%Y-%m-%d-%H-%M-%S"))

print(timestamp)

2021-09-21-16-55-30


In [12]:
#tensorboard writer, при первом запуске надо руками сделать папку для логов

# server
#log_folder_path = "../../ocp_results/logs/tensorboard/out_base_model"

# colab
# log_folder_path = "/content/drive/MyDrive/ocp_results/logs/tensorboard/out_base_model"

# user_specific 
log_file_path = "../logs/tensorboard_airi"

writer = SummaryWriter(log_file_path + '/' + timestamp)

In [13]:
%%time
logfile_str = {
    "train_dataset_file_path": train_dataset_file_path,
    "val_dataset_file_path": val_dataset_file_path,
    "features_cols": features_cols,
    "target_col": target_col,
    "batch_size": batch_size,
    "num_workers": num_workers,
    "epochs": epochs,
    "lr": lr
}

#граф модели
try:
    #trace_system = dict(list(next(iter(training_generator))[0]))
    writer.add_graph(model, trace_system)
except:
    print('no graph')
writer.add_text(timestamp, str(logfile_str))

no graph
CPU times: user 400 µs, sys: 107 µs, total: 507 µs
Wall time: 476 µs


## Training

In [None]:
%%time
loss = []
loss_eval = []

print(timestamp)
print(f'Start training model {str(model)}')
for i in range(epochs):
    loss.append(train(model, training_generator, optimizer, criterion, epoch=i, writer=writer, device=device))
    loss_eval.append(evaluate(model, valid_generator, criterion, epoch=i, writer=writer, device=device))

2021-09-21-16-55-30
Start training model spinconv(
  (act): Swish()
  (distance_expansion_forces): GaussianSmearing()
  (embeddingblock2): EmbeddingBlock(
    (act): Swish()
    (fc1): Linear(in_features=200, out_features=200, bias=True)
    (fc2): Linear(in_features=200, out_features=1600, bias=True)
    (fc3): Linear(in_features=200, out_features=32, bias=True)
    (source_embedding): Embedding(90, 32)
    (target_embedding): Embedding(90, 32)
    (embed_fc1): Linear(in_features=64, out_features=8, bias=True)
    (softmax): Softmax(dim=1)
  )
  (distfc1): Linear(in_features=200, out_features=200, bias=True)
  (distfc2): Linear(in_features=200, out_features=200, bias=True)
  (dist_block): DistanceBlock(
    (distance_expansion): GaussianSmearing()
    (dist_scalar): Embedding(8100, 1)
    (dist_offset): Embedding(8100, 1)
    (fc1): Linear(in_features=200, out_features=200, bias=True)
  )
  (message_blocks): ModuleList(
    (0): MessageBlock(
      (act): Swish()
      (spinconvblock)