This notebook uses multiple databases that are not part of the ferroelectric_dataset:  
ferro_launchpad_merge  
ferro_vasp_merge  
ferro_distort  
deform_check  

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pymongo import MongoClient
from bson import ObjectId
M = MongoClient()

In [3]:
launchpad = M.ferro_launchpad_merge.workflows
# We're currated two different distortion databases, need to check both.
ferrodistort = M.ferro_distort.distortions
deformcheck = M.deform_check.distortions

In [4]:
# Get all connection ids in workflows
workflows = launchpad.find()
cids = []
for w in workflows:
    cid = filter(lambda x: 'cid' in x, w['metadata']['tags'])[0]
    cids.append(cid)
print len(cids)

not_cid = []
for c in cids:
    r = deformcheck.find_one({"_id": ObjectId(c[4:])})
    if not r:
        r = ferrodistort.find_one({"_id": ObjectId(c[4:])})
        if not r:
            not_cid.append(c)
print len(not_cid)

432
0


In [5]:
nds_db = M.nds_revised_20180326
nds_workflow = M.nds_revised_20180326.workflow_data
nds_distort = M.nds_revised_20180326.distortions

In [6]:
from pymatgen.symmetry.groups import SpaceGroup, SYMM_DATA, sg_symbol_from_int_number

Pymatgen will drop Py2k support from v2019.1.1. Pls consult the documentation
at https://www.pymatgen.org for more details.
  at https://www.pymatgen.org for more details.""")


In [7]:
# This is a list of the point groups as noted in pymatgen
point_groups = []
for i in range(1,231):
    symbol = sg_symbol_from_int_number(i)
    point_groups.append(SYMM_DATA['space_group_encoding'][symbol]['point_group'])

# Note that there are 40 of them, rather than 32.
print "Number of point groups denoted in pymatgen: ", len(set(point_groups))

# This is because multiple conventions are used for the same point group.
# This dictionary can be used to convert between them.
point_group_conv = {'321' :'32', '312': '32', '3m1' :'3m', '31m': '3m',
                    '-3m1' : '-3m', '-31m': '-3m', '-4m2': '-42m', '-62m': '-6m2' }

# Using this dictionary we can correct to the standard point group notation.
corrected_point_groups = [point_group_conv.get(pg, pg) for pg in point_groups]
# Which produces the correct number of point groups. 32.
print "Number of point groups in conventional notation: ", len(set(corrected_point_groups))

Number of point groups denoted in pymatgen:  40
Number of point groups in conventional notation:  32


In [8]:
# polar_point_groups = ['1', '2', 'm', 'mm2', '4', '4mm', '3', '3m', '3m1', '31m','6', '6mm']
# There are 10 polar point groups
polar_point_groups = ['1', '2', 'm', 'mm2', '4', '4mm', '3', '3m', '6', '6mm']

# Polar spacegroups have polar point groups.
polar_spacegroups = []
# There are 230 spacegroups
for i in range(1,231):
    symbol = sg_symbol_from_int_number(i)
    pg = SYMM_DATA['space_group_encoding'][symbol]['point_group']
    if point_group_conv.get(pg, pg) in polar_point_groups:
        polar_spacegroups.append(i)
        
# 68 of the 230 spacegroups are polar.
print "Number of polar spacegroups: ", len(polar_spacegroups)

Number of polar spacegroups:  68


In [9]:
# DO NOT OVERWRITE
# Add condition on dmax HERE
# Add double check on polar and nonpolar spacegroups properly classified
# (had previous issues with misclassification of a couple spacegroups due to different point group notation)
for c in cids:
    if not nds_distort.find_one({"_id": ObjectId(c[4:])}):
        r = deformcheck.find_one({"_id": ObjectId(c[4:])})
        if r:
            if (float(r['distortion']['dmax']) <= 1.5 and 
                r['polar_spacegroup'] == r['bilbao_polar_spacegroup'] and
                r['nonpolar_spacegroup'] == r['bilbao_nonpolar_spacegroup'] and
                int(r['polar_spacegroup']) in polar_spacegroups and
                int(r['nonpolar_spacegroup']) not in polar_spacegroups and
                len(r['distortion']['low_symm']['sites']) == len(r['distortion']['high_low_setting']['sites'])):
                nds_distort.insert_one(r)
        if not r:
            r = ferrodistort.find_one({"_id": ObjectId(c[4:])})
            if r:
                if (float(r['distortion']['dmax']) <= 1.5 and 
                    r['polar_spacegroup'] == r['bilbao_polar_spacegroup'] and
                    r['nonpolar_spacegroup'] == r['bilbao_nonpolar_spacegroup'] and
                    int(r['polar_spacegroup']) in polar_spacegroups and
                    int(r['nonpolar_spacegroup']) not in polar_spacegroups and
                    len(r['distortion']['low_symm']['sites']) == len(r['distortion']['high_low_setting']['sites'])):
                    r.pop('checked')
                    r.pop('deformable')
                    nds_distort.insert_one(r)
            if not r:
                not_cid.append(c)

In [10]:
# Number of workflows with valid distortions
cids_good = ["cid_"+str(c["_id"]) for c in nds_distort.find()]
workflows = launchpad.find({'metadata.tags': {"$in": cids_good}})
wfids_good = [filter(lambda x: "wfid" in x, w['metadata']['tags'])[0] for w in workflows]
print len(wfids_good)

414


In [11]:
# Workflow dictionary
wf_dict = {wfid: {'state': launchpad.find_one({'metadata.tags': wfid})['state'],
                  'cid': filter(lambda x: "cid" in x, launchpad.find_one({'metadata.tags': wfid})['metadata']['tags'])[0]}
                                for wfid in wfids_good}

In [12]:
# DO NOT OVERWRITE
# Create subset of VASP database
vasp = nds_db.vasp

vaspruns = M.ferro_vasp_merge.tasks.find({'tags': {"$in": wfids_good}})
for v in vaspruns:
    vasp.insert_one(v)

In [13]:
from process_vasp_runs import *

In [14]:
for i, wfid in enumerate(wf_dict):
    if not nds_workflow.find_one({'wfid': wfid}):
        print str(i)+"/"+str(len(wf_dict))
        d = make_database_entry_dictionary(wfid, wf_dict[wfid]['cid'], wf_dict[wfid]['state'], nds_db)
        nds_workflow.insert_one(d)

0/414
wfid_1476040938.410118
1/414
wfid_1476041147.43703
2/414
wfid_1476040094.757061
3/414
wfid_1484694921.619765
4/414
wfid_1476040445.577528
5/414
wfid_1476040837.97019
6/414
wfid_1476040879.269934
7/414
wfid_1476040196.242976
8/414
wfid_1476040956.832394
9/414
wfid_1476040378.192845
10/414
wfid_1476040820.373005
11/414
wfid_1476041131.850447
12/414
wfid_1476040944.53976
13/414
wfid_1484445057.763994
14/414
wfid_1484445157.56323
15/414
wfid_1476040384.215156
16/414
wfid_1476041135.279468
17/414
wfid_1476040456.859630
18/414
wfid_1484444932.874533
19/414
wfid_1476040766.163640
20/414
wfid_1476040134.799732
21/414
wfid_1484445160.765248
22/414
wfid_1476040854.772923
23/414
wfid_1476040882.228351
24/414
wfid_1484694862.160232
25/414
wfid_1484445273.712041
26/414
wfid_1484445079.765713
27/414
wfid_1476040112.283926
28/414
wfid_1476040149.329165
29/414
wfid_1484694898.10818
30/414
wfid_1484445233.032543
31/414
wfid_1476040262.06436
32/414
wfid_1484694924.357683
33/414
wfid_1476041095.479

270/414
wfid_1484445287.593799
271/414
wfid_1476040776.284586
272/414
wfid_1484445209.371670
273/414
wfid_1476040282.74358
274/414
wfid_1476040300.667232
275/414
wfid_1484444922.915617
276/414
wfid_1484445253.438371
277/414
wfid_1476040971.018097
278/414
wfid_1476040922.712643
279/414
wfid_1484694842.890785
280/414
wfid_1476040496.278929
281/414
wfid_1476040894.532948
282/414
wfid_1476040851.179557
283/414
wfid_1484445076.837767
284/414
wfid_1484445054.820602
285/414
wfid_1476040288.221865
286/414
wfid_1476040350.947385
287/414
wfid_1476040153.114278
288/414
wfid_1476040875.673476
289/414
wfid_1476040174.338651
290/414
wfid_1484445126.957709
291/414
wfid_1484445186.170228
292/414
wfid_1476040489.133921
293/414
wfid_1476040452.983312
294/414
wfid_1476040291.620895
295/414
wfid_1484445136.247224
296/414
wfid_1476041030.232078
297/414
wfid_1484694929.994404
298/414
wfid_1476040331.097043
299/414
wfid_1476040272.256172
300/414
wfid_1484445154.456250
301/414
wfid_1484445301.869271
302/414
w