In [1]:
import numpy as np
from ase.io import read
from rascal.representations import SphericalInvariants as SOAP
from helpers import filter_by_status


In [12]:
filename = "./make_tensor_data/train_tensor/CSD-3k+S546_shift_tensors.xyz"
frames = read(filename,format="extxyz",index=":")
frames = filter_by_status(frames, status="PASSING")
for frame in frames:
    frame.wrap(eps=1e-12)

In [16]:
HYPERS = {
    'interaction_cutoff': 6.3,
    'max_radial': 8,
    'max_angular': 5,
    'gaussian_sigma_type': 'Constant',
    'gaussian_sigma_constant': 0.05,
    'cutoff_smooth_width': 0.3,
    'radial_basis': 'GTO'
}


In [17]:
def get_nice():
    return StandardSequence([
        StandardBlock(ThresholdExpansioner(num_expand=150),
                      CovariantsPurifierBoth(max_take=10),
                      IndividualLambdaPCAsBoth(n_components=50),
                      ThresholdExpansioner(num_expand=300, mode='invariants'),
                      InvariantsPurifier(max_take=50),
                      InvariantsPCA(n_components=200)),
        StandardBlock(ThresholdExpansioner(num_expand=150),
                      CovariantsPurifierBoth(max_take=10),
                      IndividualLambdaPCAsBoth(n_components=50),
                      ThresholdExpansioner(num_expand=300, mode='invariants'),
                      InvariantsPurifier(max_take=50),
                      InvariantsPCA(n_components=200)),
        StandardBlock(None, None, None,
                      ThresholdExpansioner(num_expand=300, mode='invariants'),
                      InvariantsPurifier(max_take=50),
                      InvariantsPCA(n_components=200))
    ],
                            initial_scaler=InitialScaler(
                                mode='signal integral', individually=True))

In [18]:

train_structures = frames[::2]

test_structures = frames[1::2]

all_species = get_all_species(train_structures + test_structures)
print("all species: ", all_species)
train_coefficients = get_spherical_expansion(train_structures, HYPERS,
                                             all_species)

test_coefficients = get_spherical_expansion(test_structures, HYPERS,
                                            all_species)



all species:  [ 1  6  7  8 16]


100%|██████████| 18/18 [00:09<00:00,  1.97it/s]
100%|██████████| 5/5 [00:00<00:00,  8.11it/s]
100%|██████████| 18/18 [00:09<00:00,  1.96it/s]
100%|██████████| 5/5 [00:00<00:00,  8.31it/s]


In [49]:
train_coefficients[8].shape

(21504, 40, 6, 11)

In [39]:
train_structures

[Atoms(symbols='N4O4C20H20', pbc=True, cell=[13.564047596, 5.79502231063, 5.60401339238], cs_iso=..., cs_tensor=...),
 Atoms(symbols='C28H16N32O32', pbc=True, cell=[[12.722757232, 0.0, 0.0], [0.0, 11.8045558426, 0.0], [-0.879600543992, 0.0, 8.09690265277]], cs_iso=..., cs_tensor=...),
 Atoms(symbols='C32H32N16O16', pbc=True, cell=[[13.24907966, 0.0, 0.0], [0.0, 11.1910736183, 0.0], [-0.519960131257, 0.0, 6.25747407802]], cs_iso=..., cs_tensor=...),
 Atoms(symbols='C40H40N24O32', pbc=True, cell=[9.092423678, 11.8043299642, 12.7518332508], cs_iso=..., cs_tensor=...),
 Atoms(symbols='C48H48O12', pbc=True, cell=[[8.550067096, 0.0, 0.0], [0.0, 15.6651251309, 0.0], [-4.01069112359, 0.0, 7.38984864127]], cs_iso=..., cs_tensor=...),
 Atoms(symbols='C84H64O8', pbc=True, cell=[9.86206307, 17.4021132522, 18.3081212623], cs_iso=..., cs_tensor=...),
 Atoms(symbols='C32H64N32O16', pbc=True, cell=[[13.865098098, 0.0, 0.0], [0.0, 5.69203872217, 0.0], [-3.27880453312, 0.0, 17.2207568699]], cs_iso=..., 

In [20]:
environments_for_fitting = 5000

In [27]:
nice = {}
for key in train_coefficients.keys():
    nice[key] = get_nice()

In [28]:
for key in train_coefficients.keys():
    nice[key].fit(train_coefficients[key][:environments_for_fitting])

In [38]:
len(train_structures)


1715

In [30]:
train_features = {}
for specie in all_species:
    train_features[specie] = nice[specie].transform(
        train_coefficients[specie], return_only_invariants=True)



In [42]:
train_features[1]

{1: array([[ 9.57851497e-01, -1.23857951e-01, -8.42696588e-02, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [ 9.57851416e-01, -1.23858172e-01, -8.42698818e-02, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [ 9.57851368e-01, -1.23858172e-01, -8.42699922e-02, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        ...,
        [ 9.57018420e-01, -3.60323245e-02,  2.46932694e-02, ...,
          1.55534728e-03, -7.72205500e-04,  5.65154485e-03],
        [ 9.59619700e-01, -4.02954397e-02, -1.27915508e-02, ...,
         -2.16324298e-03, -7.35481547e-05,  1.12706746e-02],
        [ 9.59619684e-01, -4.02955185e-02, -1.27908842e-02, ...,
         -2.16313244e-03, -7.35874038e-05,  1.12706470e-02]]),
 2: array([[-2.89071616e-02,  7.64519442e-03,  7.84126747e-03, ...,
         -1.01700445e-17, -1.33020924e-18,  2.38524478e-18],
        [-2.89066477e-02,  7.64997258e-03,  7.84420803e-03, ...,
         -3.43032051e-18,  7.402

In [53]:
ind = 8

for key in train_features[ind].keys():
    print("{} : {}".format(key, train_features[ind][key].shape))



1 : (21504, 40)
2 : (21504, 200)
3 : (21504, 200)
4 : (21504, 200)


In [54]:
train_features_c = make_structural_features(train_features, train_structures,
                                          all_species)
test_features_c = make_structural_features(test_features, test_structures,
                                         all_species)

100%|██████████| 1715/1715 [00:00<00:00, 4768.13it/s]


AttributeError: 'numpy.ndarray' object has no attribute 'keys'

In [26]:
train_features.shape

(1715, 3200)

In [4]:
import tqdm
from nice.blocks import *
from nice.utilities import *
from matplotlib import pyplot as plt
from sklearn.linear_model import BayesianRidge