In [4]:
from helpers import *
from rascal.representations import SphericalCovariants 
from rascal.neighbourlist.structure_manager import mask_center_atoms_by_species #change later
from rascal.representations import SphericalExpansion
from rascal.utils import ClebschGordanReal, compute_lambda_soap, spherical_expansion_reshape, xyz_to_spherical
from rascal.representations import SphericalInvariants
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [5]:
hypers = {"interaction_cutoff": 3,
          "radial_basis": "GTO",
          "max_radial": 6,
          "max_angular": 4,
          "gaussian_sigma_constant": 0.3,
          "gaussian_sigma_type":"Constant",
          "cutoff_function_type":"ShiftedCosine",
          "cutoff_smooth_width": 0.5,
          "compute_gradients":False,
          "cutoff_function_parameters":dict(rate=1,scale=3.5,exponent=4),
         }
hypers_soap = {"soap_type": "PowerSpectrum",
          "interaction_cutoff": 3,
          "radial_basis": "GTO",
          "max_radial": 9,
          "max_angular": 9,
          "gaussian_sigma_constant": 0.3,
          "gaussian_sigma_type":"Constant",
          "cutoff_function_type":"ShiftedCosine",
          "cutoff_smooth_width": 0.5,
          "normalize": True,
          "compute_gradients":False,
          "cutoff_function_parameters":dict(rate=1,scale=3.5,exponent=4),
         }

In [133]:
def build_lambda(structures,hypers):
    spex = SphericalExpansion(**hypers)
    feat_scaling = 1e6            # just a scaling to make coefficients O(1)
    feats = spex.transform(structures).get_features(spex)
    ref_feats = feat_scaling*spherical_expansion_reshape(feats, **hypers)
    CG = ClebschGordanReal(lmax=hypers["max_angular"])
    sel_lambda = 2
    lsoap_utils = compute_lambda_soap(ref_feats, CG, sel_lambda, 1)
    return lsoap_utils.reshape(lsoap_utils.shape[0],5,-1)

def build_soap(structures,hypers):
    soap = SphericalInvariants(**hypers)
    X = soap.transform(structures).get_features(soap)
    return X

def tensvec_to_irrep(tensor,order=2):
    #flatten tensor?
    fivevec = np.zeros((6))
    fivevec[0] = np.trace(tensor)/np.sqrt(3)
    fivevec[1:4] = tensor[0,1],tensor[1,2],tensor[0,2]
    fivevec[4] = 1/(2*np.sqrt(3)) * (2*tensor[2,2]-tensor[0,0]-tensor[1,1])
    fivevec[5] = 0.5 * (tensor[0,0]-tensor[1,1])
    fivevec[1:] = np.sqrt(2) * fivevec[1:]
    return fivevec
    
def irrep_to_tensvec(b):
    #vector is axx, ayy, azz, axy, ayz, axz
    A = np.array([[1,1,1,0,0,0],[0,0,0,1,0,0],[0,0,0,0,1,0],[0,0,0,0,0,1],[-1,-1,2,0,0,0],[1,-1,0,0,0,0]])
    print(A)
    b[0] = b[0] * np.sqrt(3)
    b[1:] = b[1:] * 1/np.sqrt(2)
    b[4] = b[4] * 2*np.sqrt(3)
    b[5] = b[5] * 2
    tensvec = np.linalg.solve(A, b)
    #permutate
    return tensvec

In [2]:
cg = ClebschGordanReal(lmax=2)

NameError: name 'ClebschGordanReal' is not defined

In [3]:
cg.couple(xyz_to_spherical(tensors_train[0].reshape(3,3).T))[(1,1)][2]

NameError: name 'cg' is not defined

In [86]:
xx_spherical = xyz_to_spherical(tensors_train[0])



In [55]:
def f2v(vec):
    t = ()
    for s in vec.shape:
        t += (s//5,5)
    return vec.reshape(t)

def v2f(vec):
    t = ()
    for s in vec.shape[:-1:2]:        
        t += tuple([s*5])
    if len(vec.shape)%2==1: #if the number of dimensions is uneven
        t += tuple([-1])
    #print(t)
    return vec.reshape(t)

def train_model(X_train, y_train, sigma=1e-08):
    KMM = f2v(v2f(X_train) @ v2f(X_train).T)
    weights = f2v( np.linalg.lstsq(v2f(KMM) + sigma *  v2f(KMM), v2f(y_train), rcond=None)[0] )
    return weights

def train_soap(X_train,y_train):
    kernel = (X_train@X_train.T)**1
    weights = np.linalg.lstsq( kernel + 1e-3*np.eye(kernel.shape[0]), y_train, rcond=None)[0]
    return weights

def predict_soap(X_test,X_train,weights):
    kernel = (X_test@X_train.T)**1
    return kernel @ weights

def predict(X_test, X_train, weights):
    lKM = f2v(v2f(X_test) @ v2f(X_train).T)
    y_pred = f2v(v2f(lKM) @ v2f(weights) )
    return y_pred

In [85]:
#load_data

PATH_TRAIN = "./train_tensor/CSD-3k+S546_shift_tensors.xyz"
PATH_TEST = "./test_tensor/CSD-500+104-7_shift_tensors.xyz"

structures_train, tensors_train = load_CSD_data(PATH_TRAIN,prop_string="cs_tensor",random_subsample=100)
structures_test, tensors_test = load_CSD_data(PATH_TEST,prop_string="cs_tensor")#,random_subsample=20)


#-----mask----
for structure in structures_train: mask_center_atoms_by_species(structure,species_select=[1])
for structure in structures_test: mask_center_atoms_by_species(structure,species_select=[1])

#-----select tensors-----
train_tensors = np.array([tensor for structure in structures_train for tensor in structure.arrays["cs_tensor"][structure.arrays["center_atoms_mask"]]])
test_tensors = np.array([tensor for structure in structures_test for tensor in structure.arrays["cs_tensor"][structure.arrays["center_atoms_mask"]]])
#train_shift = np.array([tensor for structure in structures_train for tensor in structure.arrays["cs_iso"][structure.arrays["center_atoms_mask"]]])
#test_shift = np.array([tensor for structure in structures_test for tensor in structure.arrays["cs_iso"][structure.arrays["center_atoms_mask"]]])

#-----transform tensors-----
#train_tensors_trans = np.vstack([tensvec_to_irrep(tens)  for tens in train_tensors.reshape(-1,3,3)])
#test_tensors_trans  = np.vstack([tensvec_to_irrep(tens)  for tens in test_tensors.reshape(-1,3,3)])

cg = ClebschGordanReal(lmax=2)
train_tensors_trans = cg.couple(xyz_to_spherical(train_tensors.reshape(3,3)))


#------build_data------

###-----lambda=2-------
#X_train = build_lambda(structures_train,hypers)
#X_test = build_lambda(structures_test,hypers)
###-----lambda=0-------
#X_train_soap = build_soap(structures_train,hypers_soap)
#X_test_soap = build_soap(structures_test,hypers_soap)

#-----train model for lambda=2------
weights = train_model(X_train, train_tensors_trans[:,1:])
y_pred = predict(X_test,X_train,weights)

#-----train model for lambda=0------
#
#weights_soap = train_soap(X_train_soap,train_tensors_trans[:,0])
#y_pred_soap = predict_soap(X_test_soap,X_train_soap,weights_soap)

#-----train model for lambda=0, for shift------
#weights_soap_shift = train_soap(X_train_soap,train_shift)
#y_pred_soap_shift = predict_soap(X_test_soap,X_train_soap,weights_soap_shift)

#-----evaluate error--------
#print(mean_squared_error(test_shift,y_pred_soap_shift))


In [65]:
from rascal.utils import from_dict, to_dict, CURFilter, FPSFilter, dump_obj, load_obj

In [76]:
n_sparse_env = 50
soap = SphericalInvariants(**hypers)
managers = soap.transform(structures_train)
sample_compressor = FPSFilter(soap, n_sparse_env, act_on='sample')
X_sparse = sample_compressor.select_and_filter(managers)

In [80]:
sample_compressor.selected_sample_ids

array([   0,  148,  581, 1623,  724, 2820, 1063, 2052,  864,  441,  691,
       2533, 1850, 1194, 1832, 2785,  972,  166, 1665, 2367,  642, 1251,
       3653, 3369,  747, 3375,  488, 1511,  687,  777, 1620,  375,  451,
       1139, 3696, 1131,  846,  733, 1546,  174, 1888,   11, 1532, 2397,
       2983,   88,  249, 3077, 1566, 1614])

In [69]:
sample_compressor.selected_ids

[[0, 11],
 [],
 [],
 [12],
 [],
 [],
 [0, 18, 26],
 [],
 [],
 [1],
 [],
 [],
 [3],
 [],
 [17, 27],
 [],
 [4],
 [],
 [],
 [1],
 [83, 34, 79],
 [20, 29],
 [7, 37],
 [],
 [2],
 [0],
 [],
 [],
 [0],
 [],
 [],
 [3],
 [],
 [11, 3],
 [2],
 [],
 [7],
 [],
 [],
 [],
 [],
 [31, 52],
 [10],
 [0],
 [9, 51, 6, 0],
 [],
 [],
 [],
 [40, 22],
 [6],
 [],
 [],
 [],
 [22],
 [],
 [],
 [],
 [],
 [],
 [55],
 [],
 [1],
 [],
 [],
 [25],
 [],
 [],
 [],
 [5],
 [],
 [12],
 [],
 [],
 [],
 [],
 [],
 [25],
 [],
 [],
 [23],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [11],
 [5],
 [],
 [],
 [],
 [],
 [],
 [],
 [23],
 [10],
 []]

In [60]:
y_pred_soap_shift

array([26.12058508, 26.12059182, 26.1205717 , ..., 23.86020068,
       23.86464609, 23.86016068])

In [61]:
mean_squared_error(test_shift,y_pred_soap_shift)

1.0781622872684926

In [18]:
weights_soap_shift.shape

(3833,)

In [7]:
X_train_soap.shape

(15501, 12150)

In [62]:
from sklearn.kernel_ridge import KernelRidge
from sklearn.metrics.pairwise import polynomial_kernel

clf = KernelRidge(alpha=1e-06,kernel='poly',degree=1.,coef0=0)
clf.fit(X_train_soap, train_shift)

KernelRidge(alpha=1e-06, coef0=0, degree=1.0, kernel='poly')

In [63]:
y_predicted = clf.predict(X_test_soap)

In [64]:
mean_squared_error(test_shift, np.array(y_predicted),squared=True)

0.9721504568406262

In [41]:
mean_squared_error(test_shift, np.array(y_predicted),squared=True)

4.9797490692489275

In [14]:
test_shift.shape

(35289,)

In [42]:
X_train_soap.shape

(3690, 12150)

In [45]:
train_tensors.shape

(3690, 9)

In [None]:
#object_ only store features
#build KN_pred_N_find and return features