In [1]:
from ase.geometry import get_distances
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt

## Bootleg Wigner-Seitz

In [2]:
from sklearn.neighbors import KNeighborsClassifier

In [16]:
def dist(x, y): 
    _, D = get_distances(x, y, cell=np.eye(3), pbc=True)
    return D

In [18]:
knn = KNeighborsClassifier(1, metric=dist)

In [19]:
from ase.io import read

In [20]:
ref = read('../0_setup/initial-geometries/si-perfect-2x2x2.vasp').get_scaled_positions()

In [24]:
ref[:3]

array([[0.   , 0.   , 0.   ],
       [0.125, 0.125, 0.125],
       [0.   , 0.25 , 0.25 ]])

In [22]:
knn.fit(ref, np.arange(ref.shape[0]))

In [23]:
knn.predict(ref)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63])

Sanity check the PBC: 

In [27]:
knn.predict([[-0.125, -0.125, -0.125]])

array([44])

In [30]:
traj = read('runs/runs/si-vacancy-2x2x2-temp=2073.0-method=lda-blend=5-b9b03ba2/md.traj', 
            index=':')

In [35]:
from joblib import delayed, Parallel

In [41]:
f = delayed(knn.predict)
p = Parallel(n_jobs=-4,verbose=1)

In [42]:
p(f(a.get_scaled_positions()) for a in traj)

[Parallel(n_jobs=-4)]: Using backend LokyBackend with 17 concurrent workers.
[Parallel(n_jobs=-4)]: Done  16 tasks      | elapsed:    1.8s
[Parallel(n_jobs=-4)]: Done 166 tasks      | elapsed:   11.8s
[Parallel(n_jobs=-4)]: Done 416 tasks      | elapsed:   31.8s
[Parallel(n_jobs=-4)]: Done 766 tasks      | elapsed:   58.5s
[Parallel(n_jobs=-4)]: Done 1216 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-4)]: Done 1766 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-4)]: Done 2416 tasks      | elapsed:  3.2min
[Parallel(n_jobs=-4)]: Done 3166 tasks      | elapsed:  4.1min
[Parallel(n_jobs=-4)]: Done 4016 tasks      | elapsed:  5.1min
[Parallel(n_jobs=-4)]: Done 4966 tasks      | elapsed:  6.4min
[Parallel(n_jobs=-4)]: Done 6016 tasks      | elapsed:  7.9min
[Parallel(n_jobs=-4)]: Done 7166 tasks      | elapsed:  9.6min
[Parallel(n_jobs=-4)]: Done 8416 tasks      | elapsed: 11.5min
[Parallel(n_jobs=-4)]: Done 9766 tasks      | elapsed: 13.6min
[Parallel(n_jobs=-4)]: Done 11216 tasks      

[array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
        35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
        52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]),
 array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
        35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
        52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]),
 array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
        35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
        52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]),
 array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32