# IC Testing

In [2]:
import os
#os.environ["CUDA_VISIBLE_DEVICES"]="1,2"
import argparse
import time
import csv
import sys
import json
import random
import numpy as np
import pprint
import yaml

import torch
import torch.multiprocessing as mp

import ray
from ray import tune

from matdeeplearn import models, process, training

config_path = 'config.yml'
#os.path.exists(config_path)
# os
os.path.abspath(os.getcwd())

assert os.path.exists(config_path), (
    "Config file not found in " + config_path
  )
with open(config_path, "r") as ymlfile:
    config = yaml.load(ymlfile, Loader=yaml.FullLoader)
config["Job"] = config["Job"]['Inductive_Conformal']
config["Models"] = config["Models"].get("MPNN_demo")
world_size = torch.cuda.device_count()
print(world_size)
config["Processing"]["data_path"] = "data/pt_data/pt_data_2"

3


In [3]:
config['Training']['train_ratio'] = 0.7
config['Training']['val_ratio'] = 0.1
config['Training']['test_ratio'] = 0.2


In [4]:
config

{'Job': {'job_name': 'my_conformal_job',
  'error_model_path': 'error_model.pth',
  'reprocess': 'True',
  'model': 'MEGNet_demo',
  'load_model': 'False',
  'save_model': 'True',
  'model_path': 'my_model.pth',
  'write_output': 'True',
  'parallel': 'True',
  'seed': 98},
 'Processing': {'dataset_type': 'inmemory',
  'data_path': 'data/pt_data/pt_data_2',
  'target_path_errors': 'error_targets.csv',
  'target_path': 'targets.csv',
  'dictionary_source': 'default',
  'dictionary_path': 'atom_dict.json',
  'data_format': 'json',
  'verbose': 'True',
  'graph_max_radius': 8.0,
  'graph_max_neighbors': 12,
  'voronoi': 'False',
  'edge_features': 'True',
  'graph_edge_length': 50,
  'SM_descriptor': 'False',
  'SOAP_descriptor': 'False',
  'SOAP_rcut': 8.0,
  'SOAP_nmax': 6,
  'SOAP_lmax': 4,
  'SOAP_sigma': 0.3},
 'Training': {'target_index': 0,
  'loss': 'l1_loss',
  'train_ratio': 0.7,
  'val_ratio': 0.1,
  'test_ratio': 0.2,
  'verbosity': 1},
 'Models': {'model': 'MPNN',
  'dim1': 1

In [5]:
rank = 'cuda'
print(world_size)
data_path = config["Processing"]["data_path"]
job_parameters= config["Job"]
training_parameters= config["Training"]
model_parameters= config["Models"]
processing_args= config['Processing']

3


In [6]:
training_parameters

{'target_index': 0,
 'loss': 'l1_loss',
 'train_ratio': 0.7,
 'val_ratio': 0.1,
 'test_ratio': 0.2,
 'verbosity': 1}

In [7]:
##General imports
import csv
import os
import time
from datetime import datetime
import shutil
import copy
import numpy as np
from functools import partial
import platform
import random
import pandas as pd

##Torch imports
import torch.nn.functional as F
import torch
from torch_geometric.data import DataLoader, Dataset
from torch_geometric.nn import DataParallel
import torch_geometric.transforms as T
from torch.utils.data.distributed import DistributedSampler
from torch.nn.parallel import DistributedDataParallel
import torch.distributed as dist
import torch.multiprocessing as mp

##Matdeeplearn imports
from matdeeplearn import models
import matdeeplearn.process as process
import matdeeplearn.training as training
from matdeeplearn.models.utils import model_summary

In [8]:
##DDP
training.ddp_setup(rank, world_size)
##some issues with DDP learning rate
if rank not in ("cpu", "cuda"):
    model_parameters["lr"] = model_parameters["lr"] * world_size

##Get dataset
dataset = process.get_dataset(data_path, training_parameters["target_index"], True,  processing_args= config['Processing'])

print('Done Processing')

if rank not in ("cpu", "cuda"):
    dist.barrier()

##Set up loader
(
    train_loader,
    val_loader,
    test_loader,
    train_sampler,
    train_dataset,
    val_dataset,
    test_dataset,
) = training.loader_setup(
    training_parameters["train_ratio"],
    training_parameters["val_ratio"],
    training_parameters["test_ratio"],
    model_parameters["batch_size"],
    dataset,
    rank,
    job_parameters["seed"],
    world_size,
)

##Set up model
model =training.model_setup(
    rank,
    model_parameters["model"],
    model_parameters,
    dataset,
    job_parameters["load_model"],
    job_parameters["model_path"],
    model_parameters.get("print_model", True),
)

##Set-up optimizer & scheduler
optimizer = getattr(torch.optim, model_parameters["optimizer"])(
    model.parameters(),
    lr=model_parameters["lr"],
    **model_parameters["optimizer_args"]
)
scheduler = getattr(torch.optim.lr_scheduler, model_parameters["scheduler"])(
    optimizer, **model_parameters["scheduler_args"]
)



Done Processing
train length: 13860 val length: 1980 test length: 3960 unused length: 1 seed : 98




--------------------------------------------------------------------------
               Layer.Parameter    Param Tensor Shape              Param #
--------------------------------------------------------------------------
         pre_lin_list.0.weight            [100, 114]                11400
           pre_lin_list.0.bias                 [100]                  100
              conv_list.0.bias                 [100]                  100
       conv_list.0.nn.0.weight             [100, 50]                 5000
         conv_list.0.nn.0.bias                 [100]                  100
       conv_list.0.nn.2.weight          [10000, 100]              1000000
         conv_list.0.nn.2.bias               [10000]                10000
        conv_list.0.lin.weight            [100, 100]                10000
              conv_list.1.bias                 [100]                  100
       conv_list.1.nn.0.weight             [100, 50]                 5000
         conv_list.1.nn.0.bias      

In [None]:
##Start training
model = training.trainer(
    rank,
    world_size,
    model,
    optimizer,
    scheduler,
    training_parameters["loss"],
    train_loader,
    val_loader,
    train_sampler,
    model_parameters["epochs"],
    training_parameters["verbosity"],
    "my_model_temp.pth",
)

if rank in (0, "cpu", "cuda"):

    train_error = val_error = test_error = float("NaN")

    ##workaround to get training output in DDP mode
    ##outputs are slightly different, could be due to dropout or batchnorm?
    train_loader = DataLoader(
        train_dataset,
        batch_size=model_parameters["batch_size"],
        shuffle=False,
        num_workers=0,
        pin_memory=True,
    )

    test_loader = training.DataLoader(
                    test_dataset,
                    batch_size=model_parameters["batch_size"],
                    shuffle=False,
                    num_workers=0,
                    pin_memory=True,
                )

    val_loader = training.DataLoader(
                    val_dataset,
                    batch_size=model_parameters["batch_size"],
                    shuffle=False,
                    num_workers=0,
                    pin_memory=True,
                )

    ##Get train error in eval mode
    train_error, train_out = training.evaluate(
        train_loader, model, training_parameters["loss"], rank, out=True
    )
    print("Train Error: {:.5f}".format(train_error))

    ##Get val error
    if val_loader != None:
        val_error, val_out = training.evaluate(
            val_loader, model, training_parameters["loss"], rank, out=True
        )
        print("Val Error: {:.5f}".format(val_error))

    ##Get test error
    if test_loader != None:
        test_error, test_out = training.evaluate(
            test_loader, model, training_parameters["loss"], rank, out=True
        )
        print("Test Error: {:.5f}".format(test_error))
        
        
if job_parameters["save_model"] == "True":

    if rank not in ("cpu", "cuda"):
        torch.save(
            {
                "model_state_dict": model.state_dict(),
                "optimizer_state_dict": optimizer.state_dict(),
                "scheduler_state_dict": scheduler.state_dict(),
                "full_model": model,
            },
            job_parameters["model_path"],
        )
    else:
        torch.save(
            {
                "model_state_dict": model.state_dict(),
                "optimizer_state_dict": optimizer.state_dict(),
                "scheduler_state_dict": scheduler.state_dict(),
                "full_model": model,
            },
            job_parameters["model_path"],
        )


In [None]:
train_error = val_error = test_error = epoch_time = float("NaN")
train_start = time.time()
best_val_error = 1e10
model_best = model
##Start training over epochs loop
train_error = train(model, optimizer, train_loader, loss, rank=rank, my_coeff = coeff)

In [9]:
model.train()
loss_all = 0
count = 0
for data in train_loader:
    data = data.to(rank)
    optimizer.zero_grad()
    output = model(data)
    break

In [10]:
output

tensor([0.0485, 0.0465, 0.0502, 0.0466, 0.0484, 0.0464, 0.0482, 0.0450, 0.0448,
        0.0502, 0.0466, 0.0485, 0.0448, 0.0498, 0.0484, 0.0482, 0.0447, 0.0486,
        0.0486, 0.0464, 0.0465, 0.0465, 0.0496, 0.0485, 0.0480, 0.0463, 0.0483,
        0.0466, 0.0482, 0.0485, 0.0449, 0.0498, 0.0449, 0.0450, 0.0464, 0.0502,
        0.0449, 0.0465, 0.0448, 0.0498, 0.0467, 0.0463, 0.0461, 0.0487, 0.0499,
        0.0485, 0.0482, 0.0497, 0.0484, 0.0487, 0.0482, 0.0502, 0.0463, 0.0497,
        0.0498, 0.0472, 0.0483, 0.0500, 0.0497, 0.0485, 0.0498, 0.0483, 0.0467,
        0.0461, 0.0500, 0.0486, 0.0502, 0.0499, 0.0464, 0.0503, 0.0452, 0.0500,
        0.0485, 0.0466, 0.0449, 0.0447, 0.0466, 0.0499, 0.0453, 0.0451, 0.0502,
        0.0473, 0.0496, 0.0477, 0.0465, 0.0482, 0.0496, 0.0447, 0.0487, 0.0485,
        0.0465, 0.0464, 0.0465, 0.0465, 0.0449, 0.0499, 0.0485, 0.0464, 0.0468,
        0.0447], device='cuda:0', grad_fn=<ViewBackward>)

In [None]:
tr_ind = pd.DataFrame({'index':train_dataset.indices, 'set':list(np.repeat("train",len(train_dataset.indices)))})
v_ind = pd.DataFrame({'index':val_dataset.indices, 'set':list(np.repeat("train",len(val_dataset.indices)))})
tst_ind = pd.DataFrame({'index':test_dataset.indices, 'set':list(np.repeat("train",len(test_dataset.indices)))})

dset = pd.concat([tr_ind, v_ind, tst_ind])

dset.to_csv(os.path.join(os.getcwd(),'IC_mod1_test.csv'), index = False, header=True)

In [None]:
import pandas as pd

In [None]:
for data in train_loader:
    print(data)

In [None]:
torch.save(train_loader, 'train_loader.pth')
torch.save(val_loader, 'val_loader.pth')
torch.save(test_loader, 'train_loader.pth')

In [None]:
target_train = pd.DataFrame(train_out, columns=['index', 'target', 'predicted'])
target_val = pd.DataFrame(val_out, columns=['index', 'target', 'predicted']) 
target_test = pd.DataFrame(test_out, columns=['index', 'target', 'predicted']) 
target_errors = pd.concat([target_train,target_val,target_test], axis = 0)
target_errors = target_errors.sort_values(list(target_errors), ascending=True)
target_errors['error'] = np.absolute(target_errors['target'].apply(float) - target_errors['predicted'].apply(float))

indices_list = target_errors['index'].to_list()
my_ind = pd.read_csv(os.path.join(os.getcwd(),data_path,'targets.csv'),header = None )

target_errors = target_errors.sort_values('index')

target_errors['index'] = target_errors['index'].apply(int)
indices_list = target_errors['index'].to_list()

all_ind = my_ind[0].to_list()
main_list = list(set(all_ind) - set(indices_list))

#print(main_list)
my_index = main_list[0]

new_df = pd.DataFrame({"index":my_index,"target":0, 'predicted':0, "error":0}, index=[19801])
target_errors = target_errors.append(new_df)
target_errors = target_errors.reset_index(drop=True)
target_errors = target_errors.sort_values('index')

target_errors = target_errors.sort_values('index')
target_errors = target_errors.reset_index(drop=True)
target_errors[['index','error']].to_csv(os.path.join(os.getcwd(),data_path,'error_targets.csv'), index = False, header=False)

new_data = process.get_dataset_error(data_path ,training_parameters["target_index"], False, processing_args)


error_train_subset =  torch.utils.data.Subset(new_data, train_dataset.indices)
error_val_subset = torch.utils.data.Subset(new_data, val_dataset.indices)
error_test_subset = torch.utils.data.Subset(new_data, test_dataset.indices)

train_loader_e = training.DataLoader(
    error_train_subset,
    batch_size=model_parameters["batch_size"],
    shuffle=False,
    num_workers=0,
    pin_memory=True,
)

val_loader_e = training.DataLoader(
                error_val_subset,
                batch_size=model_parameters["batch_size"],
                shuffle=False,
                num_workers=0,
                pin_memory=True,
            )

test_loader_e = training.DataLoader(
                error_test_subset,
                batch_size=model_parameters["batch_size"],
                shuffle=False,
                num_workers=0,
                pin_memory=True,
            )



In [None]:
model_errors = training.model_setup(
        rank,
        model_parameters["model"],
        model_parameters,
        new_data,
        job_parameters["load_model"],
        job_parameters["model_path"],
        model_parameters.get("print_model", True),
    ) 

optimizer = getattr(torch.optim, model_parameters["optimizer"])(
    model_errors.parameters(),
    lr=model_parameters["lr"],
    **model_parameters["optimizer_args"]
)
scheduler = getattr(torch.optim.lr_scheduler, model_parameters["scheduler"])(
    optimizer, **model_parameters["scheduler_args"]
)

##Start training
model_errors = training.trainer(
    rank,
    world_size,
    model_errors,
    optimizer,
    scheduler,
    training_parameters["loss"],
    train_loader_e,
    val_loader_e,
    train_sampler,
    model_parameters["epochs"],
    training_parameters["verbosity"],
    "my_model_error_temp.pth",
)

train_error_e, train_out_e = training.evaluate(
    train_loader_e, model_errors, training_parameters["loss"], rank, out=True)

val_error_e, val_out_e = training.evaluate(
    val_loader_e, model_errors, training_parameters["loss"], rank, out=True)

test_error_e, test_out_e = training.evaluate(
    test_loader_e, model_errors, training_parameters["loss"], rank, out=True)


target_train_e = pd.DataFrame(train_out_e, columns=['index', 'target_error', 'predicted_error'])
target_val_e = pd.DataFrame(val_out_e, columns=['index', 'target_error', 'predicted_error']) 
target_test_e = pd.DataFrame(test_out_e, columns=['index', 'target_error', 'predicted_error']) 
target_errors_e = pd.concat([target_train_e,target_val_e,target_test_e], axis = 0)
target_errors_e = target_errors_e.sort_values(list(target_errors_e), ascending=True)
target_errors_e['error_2'] = np.absolute(target_errors_e['target_error'].apply(float) - target_errors_e['predicted_error'].apply(float))

target_val_e_2 = copy.copy(target_val_e)
target_val_e_2['target_error'] = target_val_e_2['target_error'].apply(float)
target_val_e_2['predicted_error'] = target_val_e_2['predicted_error'].apply(float)
target_val_e_2['alpha'] = np.abs(target_val_e_2['target_error']-target_val_e_2['predicted_error'])

target_val_e_2 = target_val_e_2.sort_values(['alpha'], axis=0, ascending=True)
alpha = np.percentile(target_val_e_2['alpha'], 95)

target_train_e['predicted_error'] = target_train_e['predicted_error'].apply(float)
target_train_e['lower_error_confidence_level'] = target_train_e['predicted_error'] - alpha
target_train_e['upper_error_confidence_level'] = target_train_e['predicted_error'] + alpha

target_val_e['predicted_error'] = target_val_e['predicted_error'].apply(float)
target_val_e['lower_error_confidence_level'] = target_val_e['predicted_error'] - alpha
target_val_e['upper_error_confidence_level'] = target_val_e['predicted_error'] + alpha

target_test_e['predicted_error'] = target_test_e['predicted_error'].apply(float)
target_test_e['lower_error_confidence_level'] = target_test_e['predicted_error'] - alpha
target_test_e['upper_error_confidence_level'] = target_test_e['predicted_error'] + alpha


target_train_e.to_csv(os.path.join(os.getcwd(),'error_prediction_conf_train.csv'), index = False, header=True)
target_val_e.to_csv(os.path.join(os.getcwd(),'error_prediction_conf_val.csv'), index = False, header=True)
target_test_e.to_csv(os.path.join(os.getcwd(),'error_prediction_conf_test.csv'), index = False, header=True)

        ##Write outputs
if job_parameters["write_output"] == "True":

    training.write_results(
        train_out, str(job_parameters["job_name"]) + "_train_outputs.csv"
    )
    if val_loader != None:
        training.write_results(
            val_out, str(job_parameters["job_name"]) + "_val_outputs.csv"
        )
    if test_loader != None:
        training.write_results(
            test_out, str(job_parameters["job_name"]) + "_test_outputs.csv"
        )

if rank not in ("cpu", "cuda"):
    dist.destroy_process_group()

##Write out model performance to file
error_values = np.array((train_error.cpu(), val_error.cpu(), test_error.cpu()))
if job_parameters.get("write_error") == "True":
    np.savetxt(
        job_parameters["job_name"] + "_errorvalues.csv",
        error_values[np.newaxis, ...],
        delimiter=",",
    )

In [None]:
new_data = process.get_dataset_error(data_path ,training_parameters["target_index"], False, processing_args)


error_train_subset =  torch.utils.data.Subset(new_data, train_dataset.indices)
error_val_subset = torch.utils.data.Subset(new_data, val_dataset.indices)
error_test_subset = torch.utils.data.Subset(new_data, test_dataset.indices)

train_loader_e = training.DataLoader(
    error_train_subset,
    batch_size=model_parameters["batch_size"],
    shuffle=False,
    num_workers=0,
    pin_memory=True,
)

val_loader_e = training.DataLoader(
                error_val_subset,
                batch_size=model_parameters["batch_size"],
                shuffle=False,
                num_workers=0,
                pin_memory=True,
            )

test_loader_e = training.DataLoader(
                error_test_subset,
                batch_size=model_parameters["batch_size"],
                shuffle=False,
                num_workers=0,
                pin_memory=True,
            )

model_errors = training.model_setup(
        rank,
        model_parameters["model"],
        model_parameters,
        new_data,
        job_parameters["load_model"],
        job_parameters["model_path"],
        model_parameters.get("print_model", True),
    ) 

optimizer = getattr(torch.optim, model_parameters["optimizer"])(
    model_errors.parameters(),
    lr=model_parameters["lr"],
    **model_parameters["optimizer_args"]
)
scheduler = getattr(torch.optim.lr_scheduler, model_parameters["scheduler"])(
    optimizer, **model_parameters["scheduler_args"]
)

##Start training
model_errors = training.trainer(
    rank,
    world_size,
    model_errors,
    optimizer,
    scheduler,
    training_parameters["loss"],
    train_loader_e,
    val_loader_e,
    train_sampler,
    model_parameters["epochs"],
    training_parameters["verbosity"],
    "my_model_error_temp.pth",
)

train_error_e, train_out_e = training.evaluate(
    train_loader_e, model_errors, training_parameters["loss"], rank, out=True)

val_error_e, val_out_e = training.evaluate(
    val_loader_e, model_errors, training_parameters["loss"], rank, out=True)

test_error_e, test_out_e = training.evaluate(
    test_loader_e, model_errors, training_parameters["loss"], rank, out=True)


target_train_e = pd.DataFrame(train_out_e, columns=['index', 'target_error', 'predicted_error'])
target_val_e = pd.DataFrame(val_out_e, columns=['index', 'target_error', 'predicted_error']) 
target_test_e = pd.DataFrame(test_out_e, columns=['index', 'target_error', 'predicted_error']) 
target_errors_e = pd.concat([target_train_e,target_val_e,target_test_e], axis = 0)
target_errors_e = target_errors_e.sort_values(list(target_errors_e), ascending=True)
target_errors_e['error_2'] = np.absolute(target_errors_e['target_error'].apply(float) - target_errors_e['predicted_error'].apply(float))

target_val_e_2 = copy.copy(target_val_e)
target_val_e_2['target_error'] = target_val_e_2['target_error'].apply(float)
target_val_e_2['predicted_error'] = target_val_e_2['predicted_error'].apply(float)
target_val_e_2['alpha'] = np.abs(target_val_e_2['target_error']-target_val_e_2['predicted_error'])

target_val_e_2 = target_val_e_2.sort_values(['alpha'], axis=0, ascending=True)
alpha = np.percentile(target_val_e_2['alpha'], 95)

target_train_e['predicted_error'] = target_train_e['predicted_error'].apply(float)
target_train_e['lower_error_confidence_level'] = target_train_e['predicted_error'] - alpha
target_train_e['upper_error_confidence_level'] = target_train_e['predicted_error'] + alpha

target_val_e['predicted_error'] = target_val_e['predicted_error'].apply(float)
target_val_e['lower_error_confidence_level'] = target_val_e['predicted_error'] - alpha
target_val_e['upper_error_confidence_level'] = target_val_e['predicted_error'] + alpha

target_test_e['predicted_error'] = target_test_e['predicted_error'].apply(float)
target_test_e['lower_error_confidence_level'] = target_test_e['predicted_error'] - alpha
target_test_e['upper_error_confidence_level'] = target_test_e['predicted_error'] + alpha


target_train_e.to_csv(os.path.join(os.getcwd(),'error_prediction_conf_train.csv'), index = False, header=False)
target_val_e.to_csv(os.path.join(os.getcwd(),'error_prediction_conf_val.csv'), index = False, header=False)
target_test_e.to_csv(os.path.join(os.getcwd(),'error_prediction_conf_test.csv'), index = False, header=False)

        ##Write outputs
if job_parameters["write_output"] == "True":

    training.write_results(
        train_out, str(job_parameters["job_name"]) + "_train_outputs.csv"
    )
    if val_loader != None:
        training.write_results(
            val_out, str(job_parameters["job_name"]) + "_val_outputs.csv"
        )
    if test_loader != None:
        training.write_results(
            test_out, str(job_parameters["job_name"]) + "_test_outputs.csv"
        )

if rank not in ("cpu", "cuda"):
    dist.destroy_process_group()

##Write out model performance to file
error_values = np.array((train_error.cpu(), val_error.cpu(), test_error.cpu()))
if job_parameters.get("write_error") == "True":
    np.savetxt(
        job_parameters["job_name"] + "_errorvalues.csv",
        error_values[np.newaxis, ...],
        delimiter=",",
    )