In [20]:
import numpy as np
import subprocess
import os
import re
import pandas as pd
import argparse

## Cython definitions

In [21]:
%load_ext Cython
#Don't merge this cell with the next. The next contains cython code, as it starts by %%cython

The Cython extension is already loaded. To reload it, use:
  %reload_ext Cython


In [22]:
%%cython

# -*- coding: utf-8 -*-
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
# Copyright Guglielmo Saggiorato 2018
""" An "efficient" reader of LAMMPS dump files in cython.
Usage:
```import pyximport
pyximport.install()
from load_lammps import read_lammps
for time, column_names,data in read_lammps(filepath):
  pass
```
"""
__author__ = "Guglielmo Saggiorato" #Modified by Ivan Palaia
__copyright__ = "Copyright 2018, Guglielmo Saggiorato"
__credits__ = ["Guglielmo Saggiorato",]
__license__ = "GPLv3"
__version__ = "1.0"
__maintainer__ = "Guglielmo Saggiorato"
__email__ = "astyonax@gmail.com"
__status__ = "Production"

import numpy as np
cimport numpy as np
import pandas as pd
cimport cython
from libc.stdio cimport FILE, fopen, fwrite, fscanf, fclose, fprintf, fseek, ftell, SEEK_SET, rewind, fread

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.nonecheck(False)
def read_traj(str fname):
    """ Reads a LAMMPS dump file frame by frame yielding the data as a pandas DataFrame
This function automatically reads the fields/columns names and sets the pandas dataframe columns accordingly
For maximum performance, the bulk of the data is loaded without using any python object
In: fname [str]-- a file name as strings
Out: frame time [int], columns name [list], data [pandas.DataFrame], boxbounds [pandas.DataFrame]
Usage:
```import pyximport
pyximport.install()
from load_lammps import read_lammps
for time, column_names,data in read_lammps(filepath):
  pass
    """
    cdef str line,values
    cdef int i,N,T,j,cln,
    cdef list toarr,columns,
    cdef list columnsbox,
    cdef int Nbox,clnbox
    cdef double[:,:] data
    cdef double[:,:] box
    cdef double tmp
    cdef FILE * ptr_r

    fin = open(fname,'r')
    ptr_r = fopen(bytes(fname.encode('utf-8')), "r")
    line = fin.readline()
    while line:
        if 'ITEM: TIMESTEP' in line:
            # begin new timestep
            T = int(fin.readline())
            N = 0
            columns = []
        if 'ITEM: NUMBER OF ATOMS' in line:
            N = int(fin.readline())
        if 'ITEM: BOX BOUNDS' in line:
            Nbox=3
            columnsbox = ['lo','hi']
            clnbox = len(columnsbox)
            if not (Nbox and clnbox):
                raise StopIteration
            box = np.zeros((Nbox,clnbox),dtype='float64')#,dtype=[(j,'float64') for j in columns])

            fseek(ptr_r,int(fin.tell()),SEEK_SET)
            # loop over x, y and z coordinate
            for i in range(Nbox):
                for j in range(clnbox):
                    fscanf(ptr_r,"%le",&tmp)
                    box[i,j] = tmp#toarr[j]

            fin.seek(ftell(ptr_r))
            qbox  = pd.DataFrame(np.asarray(box),columns=columnsbox)
        if 'ITEM: ATOMS' in line:
            columns = line.split()[2:]
            cln = len(columns)
            if not (N and cln):
                raise StopIteration
            data = np.zeros((N,cln),dtype='float64')#,dtype=[(j,'float64') for j in columns])

            fseek(ptr_r,int(fin.tell()),SEEK_SET)
            # loop over particles
            for i in range(N):
                for j in range(cln):
                    fscanf(ptr_r,"%le",&tmp)
                    data[i,j] = tmp#toarr[j]

            fin.seek(ftell(ptr_r))
            q  = pd.DataFrame(np.asarray(data),columns=columns)
            yield T,columns,q,qbox

        line = fin.readline()

## Export largest cluster from pre-run trajectory as configuration file for new simulation

In [29]:
from ovito.io import *
from ovito.modifiers import *
from ovito.data import *
from ovito.pipeline import *
from ovito.vis import *
import PySide6.QtCore
import os.path
#del dataparticlesidentifiers
#del dataparticlescluster
#del dataparticlesmolecules
#del dataparticlespositions

dens=0.40
eT=3.0
real=202
TrajLogFilePath='/Users/ivan/Documents/SpinningClusters/Simulations/Test_2/Results_qA5_dp0.50_dens{:.3f}_eT{:.2f}/TrajLog_qA5_dp0.50_dens{:.2f}_eT{:.2f}_nA10000_rp0.15_ra0.15_ep-10.0_ea20.0_T1.0_{:d}.xyz'.format(dens,eT,dens,eT,real)
timestep=10000000
GenerateDataFlag=0


# Define string managers
r1 = re.compile('[_]')
r2 = re.compile(r'(([-+]?\d+\.\d+)|([-+]?\d+))')

# Open ClustersList
filepattern = subprocess.check_output(" echo {:s} | sed 's/.*\/TrajLog_//' | sed 's/.xyz//'".format(TrajLogFilePath), shell=True)
filepattern = filepattern.decode("utf-8")[:-1]
Test2Path = subprocess.check_output(" echo {:s} | sed 's/Results_.*//' | sed 's/.xyz//'".format(TrajLogFilePath), shell=True)
Test2Path = Test2Path.decode("utf-8")[:-2]
#ClustersListFilePath = '{:s}/Analysis/ClustersFiles/ClustersList_{:s}_ts{:d}.dat'.format(Test2Path, filepattern, timestep)
#ClustersListDf = pd.read_csv(ClustersListFilePath, sep=' ')
#AvgRg = ClustersListDf['Rg'].mean()

# Get nA and dens, compute box length
for s in r1.split(filepattern):
    if s.startswith('nA'):
        nA = int(r2.split(s)[1])
    if s.startswith('qA'):
        qA = int(r2.split(s)[1])
    if s.startswith('dens'):
        dens = float(r2.split(s)[1])
    if s[0].isdigit():
        real = int(s)
TwoL=np.sqrt(nA/dens)
Lxlo=-0.5*TwoL
Lxhi=0.5*TwoL
    
    
# Analyse cluster with Ovito
if True:
    # Data import:
    pipeline = import_file(TrajLogFilePath, multiple_frames=True)

    # Select type:
    pipeline.modifiers.append(SelectTypeModifier(types={2, 3}))

    # Delete selected:
    pipeline.modifiers.append(DeleteSelectedModifier())

    # Cluster analysis:
    pipeline.modifiers.append(ClusterAnalysisModifier(
        cutoff=1.15,
        sort_by_size=True,
        unwrap_particles=True,  # needs to stay true, otherwise omega&AngMom per cluster will be wrong
        compute_com=False,
        compute_gyration=False,
        cluster_coloring=False))

    # Export Cluster analysis    
    for ThisFrame in range(pipeline.source.num_frames):
        data = pipeline.compute(frame=ThisFrame)
        if data.attributes['Timestep'] == timestep:
            print(timestep)
            # check that no particle went missing
            if data.particles.count != nA:
                print("Missing atoms (only {:d} present) in timestep {:d}, file\n {:s}".format(data.particles.count, data.attributes['Timestep'], filestring))            
                break
            ClustersOvito = data.tables['clusters']
            assert (ClustersOvito.x[:] == np.arange(1, len(ClustersOvito.x[:]) + 1, 1)).all(), "ClusterIDs are weird... Add additional check in the ClustersRotation computation (AngMom and Omega) "
            ClustersDataDf = pd.DataFrame(ClustersOvito.xy(), columns=['ClusterID', 'Size'])
            if ClustersDataDf['Size'].max()>0.9*nA:
                dataparticlesidentifiers=np.array(data.particles.identifiers[:])
                dataparticlesmolecules=np.array(data.particles['Molecule Identifier'][:])
                dataparticlescluster=np.array(data.particles.cluster[:])
                dataparticlespositions=np.array(data.particles.positions[:])
                dataparticlesDf=[ [dataparticlesidentifiers[i], dataparticlesmolecules[i], dataparticlescluster[i], dataparticlespositions[i]] for i in range(len(dataparticlesidentifiers))]
                dataparticlesDf=pd.DataFrame(dataparticlesDf,columns=['id','MolID','ClusterID','position'])
                #dataparticlesDf['id']=int(dataparticlesDf['id'])
                #dataparticlesDf['MolID']=int(dataparticlesDf['MolID'])
                #dataparticlesDf['ClusterID']=int(dataparticlesDf['ClusterID'])
                LargestClusterMolecules=dataparticlesDf[dataparticlesDf['ClusterID']==1].MolID.values
                GenerateDataFlag=1
            else:
                print("Probably not percolated")
            '''   
            ClustersDataDf['Xcm'] = ClustersOvito['Center of Mass'][:, 0]
            ClustersDataDf['Ycm'] = ClustersOvito['Center of Mass'][:, 1]
            ClustersDataDf['Gxx'] = ClustersOvito['Gyration Tensor'][:, 0]  
            # the gyration tensor is computed as in lammps, for instance: Gxx =  1/M sum(mi xi^2), where xi is the position of particle i wrt the CM of its cluster
            ClustersDataDf['Gyy'] = ClustersOvito['Gyration Tensor'][:, 1]
            ClustersDataDf['Gxy'] = ClustersOvito['Gyration Tensor'][:, 3]
            ClustersDataDf['Rg'] = np.sqrt(ClustersDataDf['Gxx'].values + ClustersDataDf['Gyy'].values)
            
            # Delete clusters smaller than 900 and bigger than 1000 colloids.    
            print(len(ClustersDataDf.index))
            ClustersDataDf = ClustersDataDf[ClustersDataDf['Size']>=AcceptableSizeInterval[0]]
            ClustersDataDf = ClustersDataDf[ClustersDataDf['Size']<=AcceptableSizeInterval[1]]
            AcceptableClustersID = ClustersDataDf['ClusterID'].values
            print(len(ClustersDataDf.index))
            
            break
            '''

10000000


In [30]:
assert GenerateDataFlag==1
folder= '/Users/ivan/Documents/SpinningClusters/Simulations/Test_8/Input/Configurations/ConfigCluFrom_{:s}_ts{:d}/'.format(filepattern,timestep)
try:
    os.makedirs(folder)
except:
    pass


def unwrap_patch(xp,xc,Lxlo,Lxhi):
    if np.fabs(xp-xc)<1.0:
        return xp
    else:
        while np.fabs(xp-xc)>1.0:
            xp-=(Lxhi-Lxlo)*np.sign(xp-xc)
        assert(np.fabs(xp-xc)<1.0)
        return xp
        
def wrap_atom(x,Lxlo,Lxhi):
    while x<Lxlo:
        x+=(Lxhi-Lxlo)
    while x>Lxhi:
        x-=(Lxhi-Lxlo)
    return x


for t, column_names, data, box in read_traj(TrajLogFilePath):
    if t!=timestep:
        continue
    print(t)
    Lxlo, Lxhi, Lylo, Lyhi = box['lo'].iloc[0], box['hi'].iloc[0], box['lo'].iloc[1], box['hi'].iloc[1]
        
    configfile='{:s}Config_C1.dat'.format(folder)
    with open(configfile, 'w') as f:
        f.write("""LAMMPS Description 

{:d} atoms 
0 bonds 
0 angles 
0 dihedrals 
0 impropers 

3 atom types 
0 bond types 
0 angle types 
0 dihedral types 
0 improper types 

{:f} {:f} xlo xhi
{:f} {:f} ylo yhi 
-0.05 0.05 zlo zhi

Masses 

1 0.60000 
2 0.08000 
3 0.08000 

Atoms

""".format(  len(LargestClusterMolecules)*(qA+1), Lxlo, Lxhi, Lylo, Lyhi)  )
        data=data[data['mol'].isin(LargestClusterMolecules)]
        data['mol']=data['mol'].astype(int)
        data['id']=data['id'].astype(int)
        data['type']=data['type'].astype(int)
        inew=0
        molnew=0
        for molold in LargestClusterMolecules:
            molnew+=1
            atomsdata=data[data['mol']==molold]
            
            # central atom
            inew+=1
            thisatomdata=atomsdata[atomsdata['type']==1]
            f.write(" {:d} {:d} {:d} 0 {:.5f} {:.5f} 0 \n".format(inew, molnew, thisatomdata['type'].iloc[0], thisatomdata['x'].iloc[0], thisatomdata['y'].iloc[0])) # atom-ID molecule-ID atom-type q x y z
            xc, yc = thisatomdata['x'].iloc[0], thisatomdata['y'].iloc[0]
            
            # main patch
            inew+=1
            thisatomdata=atomsdata[atomsdata['type']==2]
            f.write(" {:d} {:d} {:d} 0 {:.5f} {:.5f} 0 \n".format(inew, molnew, thisatomdata['type'].iloc[0], thisatomdata['x'].iloc[0], thisatomdata['y'].iloc[0]))
            xp, yp = thisatomdata['x'].iloc[0], thisatomdata['y'].iloc[0]

            # other patches
            patchesdata=atomsdata[atomsdata['type']==3]
            if len(patchesdata)==qA-1:
                for i in range(len(patchesdata)):
                    inew+=1
                    f.write(" {:d} {:d} {:d} 0 {:.5f} {:.5f} 0 \n".format(inew, molnew, patchesdata['type'].iloc[i], patchesdata['x'].iloc[i], patchesdata['y'].iloc[i]))
            else:
                xp=unwrap_patch(xp,xc,Lxlo,Lxhi)
                yp=unwrap_patch(yp,yc,Lylo,Lyhi)
                rprime = [xp-xc, yp-yc]
                for j in np.arange(1,qA):
                    inew+=1
                    typ=3
                    theta = 2*np.pi*j/qA
                    RotMatrix = np.array([[np.cos(theta),np.sin(theta)],[-np.sin(theta),np.cos(theta)]])
                    x,y = np.array([xc,yc]) + np.dot(RotMatrix,rprime)   # CM already removed
                    x,y = wrap_atom(x,Lxlo,Lxhi), wrap_atom(y,Lylo,Lyhi)
                    f.write(" {:d} {:d} {:d} 0 {:.5f} {:.5f} 0 \n".format(inew, molnew, typ, x, y))
        assert inew==len(LargestClusterMolecules)*(qA+1)

10000000
