In [1]:
import iprPy
import duplicates
from copy import deepcopy

import numpy as np
import pandas as pd

In [2]:
record_style = 'calculation_relax_static'

Load database and records of a given style

In [3]:
#database = iprPy.load_database('master')
database = iprPy.load_database(style='local', host='database')

records_df = database.get_records_df(style=record_style, flat=True, full=False)

Get record-specific terms

In [4]:
record = iprPy.load_record(style=record_style)
dterms = record.compare_terms
fterms = {}
for key in record.compare_fterms:
    fterms[key] = 1e-4

Create possibly new calculation records

In [5]:
baserecord = database.get_record(style=record_style, name=records_df.key[0])
basedict = baserecord.todict(full=False, flat=True)

tests = []
# Vary size multipliers
for i in range(1, 6, 1):
    vals = deepcopy(basedict)
    vals['a_mult2'] = i
    vals['b_mult2'] = i
    vals['c_mult2'] = i
    tests.append(vals)

# Vary temperatures
for i in range(5):
    vals = deepcopy(basedict)
    vals['temperature'] = i * 50.0 + 1e-5
    tests.append(vals)
tests = pd.DataFrame(tests)

tests

Unnamed: 0,LAMMPS_version,a_mult1,a_mult2,b_mult1,b_mult2,c_mult1,c_mult2,error,family,iprPy_version,...,pressure_xx,pressure_xy,pressure_xz,pressure_yy,pressure_yz,pressure_zz,script,status,symbols,temperature
0,22 Aug 2018,0,1,0,1,0,1,,A1--Cu--fcc,0.8.3,...,0.0,0.0,0.0,0.0,0.0,0.0,calc_relax_static,finished,Ti,0.0
1,22 Aug 2018,0,2,0,2,0,2,,A1--Cu--fcc,0.8.3,...,0.0,0.0,0.0,0.0,0.0,0.0,calc_relax_static,finished,Ti,0.0
2,22 Aug 2018,0,3,0,3,0,3,,A1--Cu--fcc,0.8.3,...,0.0,0.0,0.0,0.0,0.0,0.0,calc_relax_static,finished,Ti,0.0
3,22 Aug 2018,0,4,0,4,0,4,,A1--Cu--fcc,0.8.3,...,0.0,0.0,0.0,0.0,0.0,0.0,calc_relax_static,finished,Ti,0.0
4,22 Aug 2018,0,5,0,5,0,5,,A1--Cu--fcc,0.8.3,...,0.0,0.0,0.0,0.0,0.0,0.0,calc_relax_static,finished,Ti,0.0
5,22 Aug 2018,0,1,0,1,0,1,,A1--Cu--fcc,0.8.3,...,0.0,0.0,0.0,0.0,0.0,0.0,calc_relax_static,finished,Ti,1e-05
6,22 Aug 2018,0,1,0,1,0,1,,A1--Cu--fcc,0.8.3,...,0.0,0.0,0.0,0.0,0.0,0.0,calc_relax_static,finished,Ti,50.00001
7,22 Aug 2018,0,1,0,1,0,1,,A1--Cu--fcc,0.8.3,...,0.0,0.0,0.0,0.0,0.0,0.0,calc_relax_static,finished,Ti,100.00001
8,22 Aug 2018,0,1,0,1,0,1,,A1--Cu--fcc,0.8.3,...,0.0,0.0,0.0,0.0,0.0,0.0,calc_relax_static,finished,Ti,150.00001
9,22 Aug 2018,0,1,0,1,0,1,,A1--Cu--fcc,0.8.3,...,0.0,0.0,0.0,0.0,0.0,0.0,calc_relax_static,finished,Ti,200.00001


Identify test records that are not duplicates of existing records

In [6]:
def new_calculations(old, test, dterms, fterms):
    old_count = len(old)
    allrecords = pd.concat([old, test], ignore_index=True)
    
    if 'a_mult' in dterms:
        allrecords['a_mult'] = allrecords.a_mult2 - allrecords.a_mult1
    if 'b_mult' in dterms:
        allrecords['b_mult'] = allrecords.b_mult2 - allrecords.b_mult1
    if 'c_mult' in dterms:
        allrecords['c_mult'] = allrecords.c_mult2 - allrecords.c_mult1
    
    isdup = duplicates.duplicates_allclose(allrecords, dterms, fterms)
    print(np.sum(isdup[:old_count]), 'duplicates found in old records')
    print(np.sum(isdup[old_count:]), 'duplicates found in new records')

    isnew = ~isdup[old_count:].values
    return tests[isnew]

In [7]:
new_calculations(records_df, tests, dterms, fterms)

0 duplicates found in old records
2 duplicates found in new records


Unnamed: 0,LAMMPS_version,a_mult1,a_mult2,b_mult1,b_mult2,c_mult1,c_mult2,error,family,iprPy_version,...,pressure_xx,pressure_xy,pressure_xz,pressure_yy,pressure_yz,pressure_zz,script,status,symbols,temperature
1,22 Aug 2018,0,2,0,2,0,2,,A1--Cu--fcc,0.8.3,...,0.0,0.0,0.0,0.0,0.0,0.0,calc_relax_static,finished,Ti,0.0
2,22 Aug 2018,0,3,0,3,0,3,,A1--Cu--fcc,0.8.3,...,0.0,0.0,0.0,0.0,0.0,0.0,calc_relax_static,finished,Ti,0.0
3,22 Aug 2018,0,4,0,4,0,4,,A1--Cu--fcc,0.8.3,...,0.0,0.0,0.0,0.0,0.0,0.0,calc_relax_static,finished,Ti,0.0
4,22 Aug 2018,0,5,0,5,0,5,,A1--Cu--fcc,0.8.3,...,0.0,0.0,0.0,0.0,0.0,0.0,calc_relax_static,finished,Ti,0.0
6,22 Aug 2018,0,1,0,1,0,1,,A1--Cu--fcc,0.8.3,...,0.0,0.0,0.0,0.0,0.0,0.0,calc_relax_static,finished,Ti,50.00001
7,22 Aug 2018,0,1,0,1,0,1,,A1--Cu--fcc,0.8.3,...,0.0,0.0,0.0,0.0,0.0,0.0,calc_relax_static,finished,Ti,100.00001
8,22 Aug 2018,0,1,0,1,0,1,,A1--Cu--fcc,0.8.3,...,0.0,0.0,0.0,0.0,0.0,0.0,calc_relax_static,finished,Ti,150.00001
9,22 Aug 2018,0,1,0,1,0,1,,A1--Cu--fcc,0.8.3,...,0.0,0.0,0.0,0.0,0.0,0.0,calc_relax_static,finished,Ti,200.00001
