In [8]:
# read list of nuclear charges, give energy predictions up to nth order

In [174]:
import numpy as np
import pandas as pd
import qml
import scipy.spatial as scs
import scipy.interpolate as sci
import functools
%load_ext line_profiler

## Code section

In [178]:
c = qml.Compound('../../test/c20.xyz')
def _get_nn(refsite):
    distances = np.linalg.norm(c.coordinates - c.coordinates[refsite], axis=1)
    return np.argsort(distances)[1:4]
def build_reindexing_1_merged(refsite, ontosite):
    cog = np.mean(c.coordinates, axis=0)
    valid = False
    for Ann in _get_nn(refsite):
        for Bnn in _get_nn(ontosite):
            A = c.coordinates[[refsite, Ann]]
            B = c.coordinates[[ontosite, Bnn]]
            rot = scs.transform.Rotation.match_vectors(A, B)[0]
            transformed = rot.apply(c.coordinates)
            found = []
            for site in range(len(c.coordinates)):
                ds = np.linalg.norm(transformed - c.coordinates[site], axis=1)
                if min(ds) < 1e-5:
                    found.append(np.argmin(ds))
            if set(found) == set([_ for _ in range(len(c.coordinates))]) and found[refsite] == ontosite:
                valid = True
                break
        if valid:
            break
    if not valid:
        raise ValueError('no solution')
    return found

def read_DENSITY(fn):
    with open(fn, 'r') as fh:
        _ = np.fromfile(fh, 'i4')
        q = _[3:-1].view(np.float64)
        ccdensity = q.reshape((-1, 10))
    ccdensity = ccdensity[:, 1:6]
    return ccdensity[:, :3], ccdensity[:, 3], ccdensity[:, 4]

def read_grid_first_order():
    changed_site = 0
    delta = 0.1
    
    upgrid, upweight, updens = read_DENSITY_cached('c20-data/derivatives/order-1/site-0-up/DENSITY')
    dngrid, dnweight, dndens = read_DENSITY_cached('c20-data/derivatives/order-1/site-0-dn/DENSITY')
    
    if not np.allclose(upgrid, dngrid):
        raise ValueError('Grid?')
        
    if not np.allclose(upweight, dnweight):
        raise ValueError('Grid?')
    
    return changed_site, upgrid, ((updens - dndens) / delta)*upweight
def get_nucnuc(zs):
    ds = scs.distance.squareform(scs.distance.pdist(c.coordinates))*1.8897259885789
    q = np.outer(zs, zs)/ds
    np.fill_diagonal(q, 0)
    return q.sum()/2    
def get_deriv(i, j):
    """ Returns 
    t_i : atom index of i after rotation
    t_j : atom index of j after rotation
    deriv_pair : the density to be integrated after pairwise rotation
    deriv_single : density to be integrated after single rotation """
    
    d = np.linalg.norm(c.coordinates[i] - c.coordinates[j])
    geo = np.argmin(np.abs(np.array(sorted(set(np.round(scs.distance.pdist(c.coordinates), 2)))) - d))
    sites = (0, (1, 2, 8, 10, 16)[geo])
    i, j = sites
    delta = 0.1
    
    assert i == 0
    midgrid, midweight, middens = read_DENSITY_cached('c20-data/derivatives/order-0/site-all-cc/DENSITY')
    # prefill output
    deriv_single = np.zeros(middens.shape)
    deriv_pair = np.zeros(middens.shape)
    
    iupgrid, iupweight, iupdens = read_DENSITY_cached('c20-data/derivatives/order-1/site-0-up/DENSITY')
    idngrid, idnweight, idndens = read_DENSITY_cached('c20-data/derivatives/order-1/site-0-dn/DENSITY')
    
    if i == j:
        deriv_single = (iupdens + idndens - 2 * middens)/(delta**2)
    else:
        rhojup = iupdens
        rhojdn = idndens
        upgrid, upweight, updens = read_DENSITY_cached('c20-data/derivatives/order-2/site-0-%d-up/DENSITY' % j)
        dngrid, dnweight, dndens = read_DENSITY_cached('c20-data/derivatives/order-2/site-0-%d-dn/DENSITY' % j)
        
        deriv_pair = (updens + dndens + 2 * middens - iupdens - idndens) / (2 * delta**2)
        deriv_single = (- rhojup - rhojdn) / (2 * delta**2)
    
    return i, j, deriv_pair, deriv_single
def build_reindexing_2_merged(refsite1, refsite2, ontosite1, ontosite2):
    if refsite1 == ontosite1 and refsite2 == ontosite2:
        return list(range(20))
    for inverse in (True, False):
        for asc in (True, False):
            for mirror in (True, False):
                for mirrorafter in (True, False):
                    for noflip in (True, False):
                        for rotate60 in (True, False):
                            for rotate90 in (True, False):
                                for reflectrotate in (True, False):
                                    for rotate120 in (True, False):
                                        try:
                                            return do_it(refsite1, refsite2, ontosite1, ontosite2, inverse, asc, mirror, mirrorafter, noflip, rotate60, rotate90, reflectrotate, rotate120)
                                        except ValueError:
                                            continue
    raise ValueError('No luck.')
def do_it(refsite1, refsite2, ontosite1, ontosite2, inverse, asc, mirror, mirrorafter, noflip, rotate60, rotate90, reflectrotate,rotate120):
    #print (inverse, asc, mirror, mirrorafter, noflip,rotate60, rotate90)
    valid = False
    if inverse:
        coordinates = np.copy(c.coordinates)*(-1)
    else:
        coordinates = np.copy(c.coordinates)
    
    A = c.coordinates[[refsite1, refsite2]]
    B = coordinates[[ontosite1, ontosite2]]
    
    if rotate60:
        a = B[0] -B[1]
        a = a/np.linalg.norm(a)*(np.pi/3)
        rot = scs.transform.Rotation.from_rotvec(a)
        coordinates = rot.apply(coordinates)
    if rotate90:
        a = B[0] -B[1]
        a = a/np.linalg.norm(a)*(np.pi/4)
        rot = scs.transform.Rotation.from_rotvec(a)
        coordinates = rot.apply(coordinates)
    if rotate120:
        a = B[0] -B[1]
        a = a/np.linalg.norm(a)*(2*np.pi/3)
        rot = scs.transform.Rotation.from_rotvec(a)
        coordinates = rot.apply(coordinates)
    if mirror:
        ax1 = A.sum(axis=0)
        ax2 = B.sum(axis=0)
        a = ax1 - ax2
        for site in range(20):
            v = coordinates[site].copy()
            coordinates[site] = v- 2*a*np.dot(v, a) / np.dot(a, a)
        transformed = coordinates
    else:
        A = c.coordinates[[refsite1, refsite2]]
        B = coordinates[[ontosite1, ontosite2]]
        #print (np.linalg.norm(c.coordinates[[refsite1, refsite2]] - coordinates[[ontosite1, ontosite2]] , axis=1))
        if asc:
            index = 0
        else:
            index = 1

        # rotate first
        a = np.cross(A[index], (0,0,1))
        b = np.cross(B[index], (0, 0, 1))
        rot = scs.transform.Rotation.match_vectors([A[index], a], [B[index], b])[0]
        transformed = rot.apply(c.coordinates)
        #print (np.linalg.norm(c.coordinates[[refsite1, refsite2]] - transformed[[ontosite1, ontosite2]] , axis=1))

        # rotate second
        A = c.coordinates[[refsite1, refsite2]]
        B = transformed[[ontosite1, ontosite2]]
        rot = scs.transform.Rotation.match_vectors(A, B)[0]
        transformed2 = rot.apply(transformed)
        transformed = transformed2
        #print (np.linalg.norm(c.coordinates[[refsite1, refsite2]] - transformed[[ontosite1, ontosite2]] , axis=1))
        if max(np.linalg.norm(c.coordinates[[refsite1, refsite2]] - transformed[[ontosite1, ontosite2]] , axis=1)) > 1e-5:
            raise ValueError('no rotation')
    
    if mirrorafter:
        a = transformed[ontosite1] - transformed[ontosite2]
        for site in range(20):
            v = transformed[site].copy()
            transformed[site] = v- 2*a*np.dot(v, a) / np.dot(a, a)
    if noflip:      
        a = transformed[ontosite1] + transformed[ontosite2]
        a = a/np.linalg.norm(a)*np.pi
        rot = scs.transform.Rotation.from_rotvec(a)
        transformed = rot.apply(transformed)
        #print (np.linalg.norm(c.coordinates[[refsite1, refsite2]] - transformed[[ontosite1, ontosite2]] , axis=1))
    
    if reflectrotate:
        a = transformed[ontosite1] - transformed[ontosite2]
        for site in range(20):
            v = transformed[site].copy()
            transformed[site] = v- 2*a*np.dot(v, a) / np.dot(a, a)
        a = a/np.linalg.norm(a)*(np.pi)
        rot = scs.transform.Rotation.from_rotvec(a)
        transformed = rot.apply(transformed)
    found = []
    for site in range(len(c.coordinates)):
        ds = np.linalg.norm(transformed - c.coordinates[site], axis=1)
        if min(ds) < 1e-5:
            #print (site, np.argmin(ds))
            found.append(np.argmin(ds))
    #try:
    #    print (set(found))#, found[refsite1], ontosite1, found[refsite2], ontosite2)
    #except:
    #    pass
    if set(found) == set([_ for _ in range(len(c.coordinates))]) and found[refsite1] == ontosite1 and found[refsite2] == ontosite2:
        pass
    else:
        raise ValueError('no solution')
    return found

## Caches

In [153]:
@functools.lru_cache(maxsize=20*20*20*20)
def build_reindexing_2_cached(a, b, c, d):
    return build_reindexing_2_merged(a, b, c,d)
@functools.lru_cache(maxsize=20*20*20*20)
def build_reindexing_1_cached(a, b):
    return build_reindexing_1_merged(a, b)
@functools.lru_cache(200)
def read_DENSITY_cached(fn):
    return read_DENSITY(fn)
@functools.lru_cache(30)
def get_grid_ds(j):
    return 1/(np.linalg.norm(grid_points - c.coordinates[j], axis=1)*1.8897259885789)
@functools.lru_cache(maxsize=20*20)
def get_deriv_cached(i, j):
    return get_deriv(i, j)
changed_site, grid_points, grid_densweight = read_grid_first_order()

In [143]:
# warm caches
def test_all_pairs(n):
    ds = scs.distance.squareform(scs.distance.pdist(c.coordinates))
    dvals = np.unique(np.round(ds, 2))
    xs, ys = np.where(abs(ds - dvals[n])< 0.1)
    tosites = (0,1, 2, 8, 10, 16)[n]
    for i, j in zip(xs, ys):
        if i == j:
            continue
        try:
            build_reindexing_2_merged(i, j, 0, tosites)
        except:
            print (i, j, 0, tosites)
for i in range(6):
    print (i)
    test_all_pairs(i)

SyntaxError: unexpected EOF while parsing (<ipython-input-143-e6b92fe9211f>, line 2)

## Analysis

In [183]:
def get_predictions(comb = [6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 7]):
    #rho, dsingle, dneigh1, dneigh2, dneigh3, dneigh4, dneigh5 = read_densities()
        
    E =0 #-758.072029908548 # base energy
    deltaZ = np.array(comb) - 6
        
    zs = np.array([int(_) for _ in comb])
    zsref = np.zeros(20) + 6
    
    E -= get_nucnuc(zsref)
    E += get_nucnuc(zs)
    
    # 0-th order, no rotation necessary, should be hard zero
    #ds = np.linalg.norm(grid_points - c.coordinates[0], axis=1)
    #es = (rho * grid_weights / ds).sum()
    #for idx, Z in enumerate(comb):
    #    if deltaZ[idx] == 0:
    #        continue
    #    E += np.sum(deltaZ[idx] * rho * grid_weights / ds)
    
    # 1st order
    changed_site, grid_points, grid_densweight = read_grid_first_order()
    dV = np.zeros(grid_densweight.shape)
    for idx, Z in enumerate(comb):
        if deltaZ[idx] == 0:
            continue
        mapping = build_reindexing_1_cached(idx, changed_site)
        
        for j in range(20): 
            if deltaZ[mapping[j]] == 0:
                continue
            ds = get_grid_ds(j)
            dV += deltaZ[idx] * deltaZ[mapping[j]]* ds
            #E += np.sum(deltaZ[idx] * deltaZ[mapping[j]] * grid_densweight / ds)/2
    E += np.sum(dV * grid_densweight)/2
    
    # 2nd order
    dV *= 0
    for idx_i, Z_i in enumerate(comb):
        if deltaZ[idx_i] == 0:
            continue
        for idx_j, Z_j in enumerate(comb):
            if deltaZ[idx_j] == 0:
                continue
            
            # t_i: target for idx_i after rotation
            # deriv_pair: part of derivative after pair-mapping
            # deriv_single: part of derivative after single-mapping
            t_i, t_j, deriv_pair, deriv_single = get_deriv_cached(idx_i, idx_j)
            
            # pairwise mapping
            if idx_i != idx_j:
                try:
                    mapping = build_reindexing_2_cached(idx_i, idx_j, t_i, t_j)
                except:
                    print (idx_i, idx_j, t_i, t_j)
                for j in range(20): 
                    if deltaZ[mapping[j]] == 0:
                        continue
                    ds = get_grid_ds(j)
                    #E += np.sum(deltaZ[idx] * deltaZ[mapping[j]] * grid_densweight / ds) / 6
                    dV += deltaZ[idx] * deltaZ[mapping[j]] * ds
            
            # single mapping
            mapping = build_reindexing_1(idx_j, 0)
            for j in range(20): 
                if deltaZ[mapping[j]] == 0:
                    continue
                ds = get_grid_ds(j)
                #E += np.sum(deltaZ[idx] * deltaZ[mapping[j]] * grid_densweight / ds) / 6
                dV += deltaZ[idx] * deltaZ[mapping[j]] * ds
    E += np.sum(dV * grid_densweight)/6
            
    return E
%lprun -f get_predictions get_predictions([int(_) for _ in '57766576666555776675'])



Timer unit: 1e-06 s

Total time: 5.57754 s
File: <ipython-input-183-cf0e87111acf>
Function: get_predictions at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           def get_predictions(comb = [6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 7]):
     2                                               #rho, dsingle, dneigh1, dneigh2, dneigh3, dneigh4, dneigh5 = read_densities()
     3                                                   
     4         1          4.0      4.0      0.0      E =0 #-758.072029908548 # base energy
     5         1         31.0     31.0      0.0      deltaZ = np.array(comb) - 6
     6                                                   
     7         1         20.0     20.0      0.0      zs = np.array([int(_) for _ in comb])
     8         1         18.0     18.0      0.0      zsref = np.zeros(20) + 6
     9                                               
    10         1        497.0    497.

In [177]:
%lprun -f get_deriv get_predictions([int(_) for _ in '57766576666555776675'])



(552844,)


Timer unit: 1e-06 s

Total time: 6.16006 s
File: <ipython-input-157-b8d355666068>
Function: get_deriv at line 55

Line #      Hits         Time  Per Hit   % Time  Line Contents
    55                                           def get_deriv(i, j):
    56                                               """ Returns 
    57                                               t_i : atom index of i after rotation
    58                                               t_j : atom index of j after rotation
    59                                               deriv_pair : the density to be integrated after pairwise rotation
    60                                               deriv_single : density to be integrated after single rotation """
    61                                               
    62       144      10665.0     74.1      0.2      d = np.linalg.norm(c.coordinates[i] - c.coordinates[j])
    63       144      39788.0    276.3      0.6      geo = np.argmin(np.abs(np.array(sorted(set(np.round(s