Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 106 additions & 44 deletions dpgen/database/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import os
import time
import json
from uuid import uuid4
from threading import Thread
from glob import glob
Expand All @@ -13,31 +14,43 @@
from dpgen.database.vasp import VaspInput
from dpdata import System,LabeledSystem
from monty.serialization import loadfn,dumpfn
import numpy as np
import traceback

OUTPUT=SHORT_CMD+'_db.json'
SUPPORTED_CACULATOR=['vasp','pwscf','siesta','gaussian']
SUPPORTED_CACULATOR=['vasp','pwscf','gaussian']
ITERS_PAT="iter.*/02.fp/task*"
INIT_PAT="init/*/02.md/sys-*/scale-*/*"

def db_run(args):
dlog.info ("collecting data")
print(args.ID_PREFIX)
_main(args.PATH, args.CALCULATOR, args.OUTPUT,args.ID_PREFIX)
#print(args.ID_PREFIX)
_main(args.PARAM)
dlog.info ("finished")

def _main(path,calculator,output,id_prefix):
def _main(param):
with open(param, "r") as fp:
jdata = json.load(fp)
calculator = jdata["calculator"]
path = jdata["path"]
calulator = jdata["calculator"]
output = jdata["output"]
config_info_dict = jdata["config_info_dict"]
id_prefix = jdata["id_prefix"]
skip_init = False
if "skip_init" in jdata:
skip_init = jdata["skip_init"]
## The mapping from sys_info to sys_configs
assert calculator.lower() in SUPPORTED_CACULATOR
dlog.info('data collection from: %s'%path)
if calculator == "vasp":
parsing_vasp(path,output,id_prefix)
parsing_vasp(path,config_info_dict,skip_init, output,id_prefix)
elif calculator == 'gaussian':
parsing_gaussian(path,output)
elif calculator == "siesta":
parsing_siesta(path, output)
else:
parsing_pwscf(path,output)

def parsing_vasp(path,output=OUTPUT,id_prefix=None):
def parsing_vasp(path,config_info_dict, skip_init, output=OUTPUT,id_prefix=None):

fp_iters=os.path.join(path,ITERS_PAT)
dlog.debug(fp_iters)
Expand All @@ -46,54 +59,103 @@ def parsing_vasp(path,output=OUTPUT,id_prefix=None):
fp_init=os.path.join(path,INIT_PAT)
dlog.debug(fp_init)
f_fp_init=glob(fp_init)
dlog.info("len initialization data: %s"%len(f_fp_init))
entries=_parsing_vasp(f_fp_init,id_prefix,iters=False)
entries.extend(_parsing_vasp(f_fp_iters,id_prefix))
dlog.info("len collected data: %s"%len(entries))

if skip_init:
entries = _parsing_vasp(f_fp_iters,config_info_dict, id_prefix)
dlog.info("len collected data: %s"%len(entries))
else:
dlog.info("len initialization data: %s"%len(f_fp_init))
entries=_parsing_vasp(f_fp_init,config_info_dict, id_prefix,iters=False)
entries.extend(_parsing_vasp(f_fp_iters,config_info_dict, id_prefix))
dlog.info("len collected data: %s"%len(entries))
#print(output)
#print(entries)
dumpfn(entries,output,indent=4)

def _parsing_vasp(paths,id_prefix,iters=True):
def _parsing_vasp(paths,config_info_dict, id_prefix,iters=True):
entries=[]
icount=0
if iters:
iter_record = []
iter_record_new = []
try:
with open ("record.database", "r") as f_record:
iter_record = [i.split()[0] for i in f_record.readlines()]
iter_record.sort()
dlog.info("iter_record")
dlog.info(iter_record)
except:
pass
for path in paths:
try:
f_outcar = os.path.join(path,'OUTCAR')
f_job = os.path.join(path,'job.json')

try:
vi = VaspInput.from_directory(path)
if os.path.isfile(f_job):
attrib=loadfn(f_job)
else:
attrib={}
tmp_iter = path.split('/')[-3]
if (tmp_iter in iter_record) and (tmp_iter != iter_record[-1]):
continue
if tmp_iter not in iter_record_new:
iter_record_new.append(tmp_iter)
vi = VaspInput.from_directory(path)
if os.path.isfile(f_job):
attrib=loadfn(f_job)
else:
attrib={}

if iters and attrib:
tmp_=path.split('/')[-1]
iter_info=tmp_.split('.')[1]
task_info=tmp_.split('.')[-1]
attrib['iter_info']=iter_info
attrib['task_info']=task_info
else:
pass
comp=vi['POSCAR'].structure.composition
ls = LabeledSystem(f_outcar)
lss=ls.to_list()
for ls in lss:
if id_prefix:
eid=id_prefix+"_"+str(icount)
else:
eid = str(uuid4())
entry=Entry(comp,'vasp',vi.as_dict(),ls.as_dict(),attribute=attrib,entry_id=eid)
entries.append(entry)
icount+=1
except:
dlog.info("failed here : %s"%path)
if iters and attrib:
# generator/Cu/iter.000031/02.fp/task.007.000000
tmp_=path.split('/')[-1]
#config_info=tmp_.split('.')[1]
task_info=tmp_.split('.')[-1]
tmp_iter = path.split('/')[-3]
iter_info = tmp_iter.split('.')[-1]
sys_info = path.split('/')[-4]
config_info_int = int(tmp_.split('.')[1])
for (key, value) in config_info_dict.items():
if config_info_int in value:
config_info = key
attrib['config_info']=config_info
attrib['task_info']=task_info
attrib['iter_info']=iter_info
attrib['sys_info']=sys_info
with open(f_outcar , "r") as fin_outcar:
infile_outcar = fin_outcar.readlines()
for line in infile_outcar:
if "running on" in line:
attrib["core"] = int(line.split()[2])
if "Elapse" in line:
attrib["wall_time"] = float(line.split()[-1])
if "executed on" in line:
attrib["date"] = line.split()[-2]
attrib["clocktime"] = line.split()[-1]
dlog.info("Attrib")
dlog.info(attrib)
comp=vi['POSCAR'].structure.composition
ls = LabeledSystem(f_outcar)
lss=ls.to_list()
for ls in lss:
if id_prefix:
eid=id_prefix+"_"+str(icount)
else:
eid = str(uuid4())
entry=Entry(comp,'vasp',vi.as_dict(),ls.as_dict(),attribute=attrib,entry_id=eid)
entries.append(entry)
icount+=1
except Exception:
#dlog.info(str(Exception))
dlog.info("failed for %s"%(path))
#pass
if iters:
iter_record.sort()
iter_record_new.sort()
with open("record.database" , "w") as fw:
for line in iter_record:
fw.write(line + "\n")
for line in iter_record_new:
fw.write(line + "\n")
return entries

def parsing_pwscf(path,output=OUTPUT):
pass
def parsing_siesta(path,output=OUTPUT):
pass

def parsing_gaussian(path,output=OUTPUT):
pass

14 changes: 4 additions & 10 deletions dpgen/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,16 +104,10 @@ def main():
# db
parser_db = subparsers.add_parser(
"db",
help="Collecting data from Deep Generator.")
parser_db.add_argument('PATH', type=str,
help="root path for dpgen modeling")
parser_db.add_argument('ENGINE', type=str,
help="engine used for labeling: vasp/pwscf/cp2k/gaussian/siesta")
parser_db.add_argument('OUTPUT', type=str,
help="output filename : file.json/file.yaml")
parser_db.add_argument("ID_PREFIX", type=str, default=None,
nargs="?",
help="prefix of an entry id")
help="Collecting data from DP-GEN.")

parser_db.add_argument('PARAM', type=str,
help="parameter file, json format")

parser_db.set_defaults(func=db_run)

Expand Down
19 changes: 19 additions & 0 deletions examples/database/param_Ti.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"path" : "/path/to/Ti",
"calculator" : "vasp",
"_comment" : "Current only support VASP",
"output" : "./db_Ti.json",
"id_prefix" : "",
"config_info_dict" : {
"fcc-bulk" : [0,1,2,3,4,5,6,7],
"hcp-bulk" : [8,9,10,11,12,13,14,15],
"bcc-bulk" : [16,17,18,19,20,21,22,23],
"fcc-surf-100" : [24,25,26,27,28,29,30,31],
"fcc-surf-111" : [32,33,34,35,36,37,38,39],
"fcc-surf-110" : [40,41,42,43,44,45,46,47],
"hcp-surf-001" : [48,49,50,51,52,53,54,55],
"hcp-surf-100" : [56,57,58,59,60,61,62,63],
"hcp-surf-110" : [64,65,66,67,68,69,70,71]
},
"skip_init" : true
}
Binary file modified tests/database/data.tar.gz
Binary file not shown.
19 changes: 19 additions & 0 deletions tests/database/param_Al.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"path" : "./",
"calculator" : "vasp",
"_comment" : "vasp/pwscf/gaussian",
"output" : "dpgen_db.json",
"id_prefix" : "",
"config_info_dict" : {
"fcc-bulk" : [0,1,2,3,4,5,6,7],
"hcp-bulk" : [8,9,10,11,12,13,14,15],
"bcc-bulk" : [16,17,18,19,20,21,22,23],
"fcc-surf-100" : [24,25,26,27,28,29,30,31],
"fcc-surf-111" : [32,33,34,35,36,37,38,39],
"fcc-surf-110" : [40,41,42,43,44,45,46,47],
"hcp-surf-001" : [48,49,50,51,52,53,54,55],
"hcp-surf-100" : [56,57,58,59,60,61,62,63],
"hcp-surf-110" : [64,65,66,67,68,69,70,71]
},
"skip_init" : true
}
27 changes: 20 additions & 7 deletions tests/database/test_db_vasp.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os,sys,shutil
import unittest
import json
import numpy as np
import tarfile
from glob import glob
Expand Down Expand Up @@ -42,6 +43,11 @@ def setUp(self):
self.ref_entries=loadfn(os.path.join(self.cwd,'data/entries.json'))
self.init_path=sorted(glob(os.path.join(self.r_init_path,init_pat)))
self.iter_path=sorted(glob(os.path.join(self.r_iter_path,iter_pat)))
with open("param_Al.json", "r") as fr:
jdata = json.load(fr)
self.config_info_dict = jdata["config_info_dict"]
self.skip_init = jdata["skip_init"]
self.output = jdata["output"]

def testDPPotcar(self):

Expand Down Expand Up @@ -111,12 +117,15 @@ def testEntry(self):
self.assertEqual(ret0.entry_id,'pku-0')

def testParsingVasp(self):
parsing_vasp(self.cwd,id_prefix=dpgen.SHORT_CMD)
try:
Potcar(['Al'])
ref=os.path.join(self.cwd,'data/all_data_pp.json')
except:
ref=os.path.join(self.cwd,'data/all_data.json')
parsing_vasp(self.cwd, self.config_info_dict, self.skip_init,self.output, id_prefix=dpgen.SHORT_CMD )
#try:
# Potcar(['Al'])
# ref=os.path.join(self.cwd,'data/all_data_pp.json')
#except:
# ref=os.path.join(self.cwd,'data/all_data.json')
#Potcar(['Al'])
ref=os.path.join(self.cwd,'data/all_data_pp.json')

ret=os.path.join(self.cwd,'dpgen_db.json')

retd=loadfn(ret)
Expand All @@ -134,10 +143,14 @@ def testParsingVasp(self):
self.assertEqual(len(i.composition),len(j.composition))
self.assertEqual(len(i.attribute),len(j.attribute))
os.remove(os.path.join(self.cwd,'dpgen_db.json'))


def tearDown(self):
for path in [self.r_init_path, self.r_iter_path, self.data]:
if os.path.isdir(path) :
shutil.rmtree(path)
if os.path.isfile("dpgen.log"):
os.remove("dpgen.log")
if os.path.isfile("record.database"):
os.remove("record.database")