In [1]:
import numpy as np
import matplotlib.pyplot as plt
import sklearn.model_selection
import copy
from sklearn.metrics import mean_squared_error

import ase.io
import equistore.io


# from utils.librascal import RascalSphericalExpansion
from utils.rascaline import RascalineSphericalExpansion
from utils.models.soap import compute_power_spectrum

from utils.models.linear import LinearModel
from utils.models.full_kernel import FullKernelGap
from utils.models.sparse_kernel import SparseKernelGap
from utils.models.sparse_kernel import select_support_points, SparseKernelGap
from utils.pylode import PyLODESphericalExpansion

In [2]:
def framesSplittor(frames, atomsMax = 1000, framesMin = 5):
    '''
    INPUTS:
    frames: Frames to be splitted
    atomsMax: The maximum number of atoms/environments in a splitted subset
    framesMin: The minimum number of frames in a splitted subset
    
    OUTPUTS:
    A list containing the splitted subsets of the frames
    '''
    n=len(frames)
    sliceList=[]
    tempList=[]
    for i in np.arange(n):
        tempList.append(i)
        if i == n-1:
            sliceList.append(slice(tempList[0],tempList[-1]+1))
            del tempList
        else:
            if len(tempList) < framesMin:
                continue
            else:
                if np.sum([len(frames[nn]) for nn in tempList]) < atomsMax:
                    continue
                else:
                    sliceList.append(slice(tempList[0],tempList[-1]+1))
                    tempList=[]
    
#     # check sliceList
#     print(sliceList)
    
    framesList = []
    for s in sliceList:
        framesList.append(frames[s])
    
    return framesList

In [3]:
frames = ase.io.read("P_GAP_20_fitting_data.xyz", ":")

In [4]:
spf = framesSplittor(frames)

In [5]:
for ss in np.arange(len(spf)):
    ase.io.write('./splittedFrames/subset_{}.xyz'.format(ss),spf[ss])



In [6]:
### IMPORTANT!
### The naming of the files are:
### splitted subsets: ./splittedFrames/subset_{}.xyz

### for each subset, we have a specific python file and job submit file on HPC
### named as: ./splittedCalculations/computeFeature_{}.py
### and ./splittedCalculations/job_{}.py

### The computed feature matrix will be stored as: 
### ./splittedFeatureMatrix/tensorMap_{}.npz

### eventually, the 4.2_read_splitted_tensorMap_and_combine will read these .npz
### files and combine the feature matrix