In [None]:
#read build.json
import utils as F
import json
with open('../data/CG/build.json', 'r') as f:
    params = json.load(f)

build_degree,build_number = params["build_degree"],params["build_number"]
#_______________________________________________________    
num_atoms_for_smiles = params["num_atoms_for_smiles"]
num_atom_for_bead = [num_atoms_for_smiles]

pos_map_method = "backbone"
force_map_method = "sum"

print("num_atoms:", num_atom_for_bead)

AAMD_dir = "../data/AA"
CGMD_dir = "../data/CG"

AA_dump_path = AAMD_dir+"/AA.lammpstrj"
AA_data_path = AAMD_dir+"/AA.data"
CG_dump_path = CGMD_dir+"/CG.lammpstrj"
CG_data_path = CGMD_dir+"/CG.data"

num_atoms, num_atom_types, box_size, mass_map = F.parse_aa_data(AA_data_path)
index_for_backbone_A = params["index_for_backbone_A"]
index_for_backbone_B = params["index_for_backbone_B"]

num_beads = params["build_degree"]*params["build_number"]*2
print("number_of_beads: ", num_beads)

#-------------------------------------------------------
cg_map = {} 
atom_id = 0
for bead_index in range(0,num_beads,2):
    cg_map[bead_index] = [atom_id+i-1 for i in params["id_for_bead_A"]]
    cg_map[bead_index+1] = [atom_id+i-1 for i in params["id_for_bead_B"]]
    atom_id += num_atom_for_bead[0]
    #if reach the end of the molecule, atom_id +=2
    if (bead_index/2) % build_degree == build_degree-1: 
        cg_map[bead_index-2*build_degree+2].append(atom_id)
        atom_id += 1
        cg_map[bead_index].append(atom_id)
        atom_id += 1

#-------------------------------------------------------
#generate the bond, angle, dihedral arrays
bonds_array = F.generate_bonds_array_AB(build_degree, build_number)
angles_array = F.generate_angles_array_AB(build_degree, build_number)
dihedrals_array = F.generate_dihedrals_array_AB(build_degree, build_number)
num_bonds = len(bonds_array)
num_angles = len(angles_array)
num_dihedrals = len(dihedrals_array)

#psf file
psf_content = F.generate_psf_file_AB(params["build_degree"],params["build_number"],params["mass_A"],params["mass_B"])
psf_path = '../data/CG/structure.psf'
with open(psf_path, "w") as f:
    f.write(psf_content)



In [None]:
input_dump = open(AA_dump_path, "r")
output_dump = open(CG_dump_path, 'w')
last_two_timesteps,atom_attr = F.read_last_timesteps(AA_dump_path)
last_timestep = int(last_two_timesteps[1])
second_last_timestep = int(last_two_timesteps[0])
ts_interval = int(last_timestep - second_last_timestep)
num_ts = int(last_timestep/ts_interval)
print(num_ts)

In [None]:
import pickle
import numpy as np
from concurrent.futures import ProcessPoolExecutor
from tqdm import tqdm
from multiprocessing import Manager

# Read the last two timesteps and atom attributes
last_two_timesteps, atom_attr = F.read_last_timesteps(AA_dump_path)
last_timestep = int(last_two_timesteps[1])
second_last_timestep = int(last_two_timesteps[0])
ts_interval = int(last_timestep - second_last_timestep)
num_ts = int(last_timestep / ts_interval)
print(num_ts)

# Read input dump file lines
lines = input_dump.readlines()

# Backbone atom type
backbone_atom_type = max(mass_map, key=mass_map.get)
print("backbone_atom_type:", backbone_atom_type)

# Number of atoms and beads
num_atoms = int(lines[3])
num_beads = len(cg_map)

# Initialize beads_info array
manager = Manager()
beads_info = manager.list([np.zeros((num_beads, len(atom_attr))) for _ in range(num_ts)])

# Create atom attribute index map
atom_attr_index = {attr: j for j, attr in enumerate(atom_attr)}

# Process a single timestep
def process_timestep(ts):
    ts_lines = lines[ts * (num_atoms + 9):(ts + 1) * (num_atoms + 9)]
    result = []
    result.extend(ts_lines[:3])
    result.append(str(num_beads) + "\n")
    result.extend(ts_lines[4:8])
    
    atoms_info = np.array([line.split() for line in ts_lines[9:9 + num_atoms]], dtype=float)
    bead_info = np.zeros((num_beads, len(atom_attr)))
    
    result.append("ITEM: ATOMS " + " ".join(atom_attr) + "\n")
    
    for bead_id in range(num_beads):
        atom_ids = cg_map[bead_id]
        atom_ids_info = atoms_info[atom_ids, :]
        
        id_index = atom_attr_index["id"]
        bead_info[bead_id, id_index] = bead_id + 1
        
        mol_index = atom_attr_index["mol"]
        bead_info[bead_id, mol_index] = atom_ids_info[0, mol_index]
        
        type_index = atom_attr_index["type"]
        bead_info[bead_id, type_index] = bead_id%2 + 1
        
        q_index = atom_attr_index["q"]
        bead_info[bead_id, q_index] = 0
        
        mass_index = atom_attr_index["mass"]
        for attr_name in ["xu", "yu", "zu"]:
            attr_index = atom_attr_index[attr_name]
            if bead_id%2 == 0:
                bead_info[bead_id, attr_index] = np.mean(atom_ids_info[index_for_backbone_A, attr_index])
            else:
                bead_info[bead_id, attr_index] = np.mean(atom_ids_info[index_for_backbone_B, attr_index])
    
        for attr_name in ["fx", "fy", "fz"]:
            attr_index = atom_attr_index[attr_name]
            bead_info[bead_id, attr_index] = np.sum(atom_ids_info[:, attr_index])
        
        bead_info[bead_id, mass_index] = np.sum(atom_ids_info[:, mass_index])
        bead_info_str = " ".join(
            [f"{int(bead_info[bead_id, i])}" for i in range(4)] +
            [f"{bead_info[bead_id, j]:.6f}" for j in range(4, len(atom_attr))]
        )
        result.append(bead_info_str + "\n")
    
    beads_info[ts] = bead_info
    return result

# Process all timesteps using ProcessPoolExecutor
with ProcessPoolExecutor() as executor:
    results = list(tqdm(executor.map(process_timestep, range(num_ts)), total=num_ts))

# Write results to the output dump file
for result in results:
    output_dump.writelines(result)

# Close input and output dump files
input_dump.close()
output_dump.close()

# Convert managed list to numpy array
beads_info = np.array(beads_info)


print("done")
print(atom_attr)


In [10]:
# #This script is used to generate the ".data" file from the ".lammpstrj" file for CG simulation
dump = open(CG_dump_path, "r")
lines = dump.readlines()
for i in range(len(lines)):
    line = lines[i]
    if line.startswith("ITEM: TIMESTEP"):
        num_atoms = int(lines[3])
        num_chains = int((lines[8+num_atoms]).split()[1])
        break
    
dump.close()


In [11]:
bead_mass_A = beads_info[0, 0, -1]
bead_mass_B = beads_info[0, 1, -1]
#modify here for polymers
with open(CG_data_path, "w") as  output_data:
    output_data.write("LAMMPS data file via do_cg_map.py\n\n")
    output_data.write(str(num_beads)+" atoms\n")
    output_data.write("2 "+ "atom types\n")
    output_data.write(str(num_bonds)+" bonds\n")
    output_data.write("2 "+ "bond types\n")
    output_data.write(str(num_angles)+" angles\n")
    output_data.write("2 "+ "angle types\n")
    output_data.write(str(num_dihedrals)+" dihedrals\n")
    output_data.write("3 "+ "dihedral types\n")
    output_data.write("\n")

    # write the header
    output_data.write(lines[5][:-1]+" xlo xhi\n")
    output_data.write(lines[6][:-1]+" ylo yhi\n")
    output_data.write(lines[7][:-1]+" zlo zhi\n\n")

    output_data.write("Masses\n")
    output_data.write("\n")
    output_data.write(f"1 {bead_mass_A}\n")
    output_data.write(f"2 {bead_mass_B}\n")

    output_data.writelines("\nAtoms # full\n\n")
    # output_data.writelines(lines[9:9+num_atoms])
    output_data.writelines(' '.join(line.split()[:7])+"\n" for line in lines[9:9+num_atoms])
    output_data.writelines("\nBonds\n\n")
    for i, bond in enumerate(bonds_array):
        bond_type = 2 if ((int(bond[0])-int(bond[1])) % 2 == 0) else 1  # Fixed bond type as per your description
        adjusted_bond = [int(bond[0]+1), int(bond[1]+1)]  # Example transformation
        line = ' '.join([str(i+1), str(bond_type)] + list(map(str, adjusted_bond))) + "\n"
        output_data.write(line)

    output_data.writelines("\nAngles\n\n")
    for i, angle in enumerate(angles_array):
        angle_type = 1 if (np.sum([int(angle[0]),int(angle[1]),int(angle[2])]) % 2 == 0) else 2
        adjusted_angle = [int(angle[0]+1), int(angle[1]+1), int(angle[2]+1)]  # Example transformation
        line = ' '.join([str(i+1), str(angle_type)] + list(map(str, adjusted_angle))) + "\n"
        output_data.write(line)

    output_data.writelines("\nDihedrals\n\n")
    for i, dihedral in enumerate(dihedrals_array):
        dihedral_type = 1  # Fixed dihedral type as per your description
        adjusted_dihedral = [int(dihedral[0]+1), int(dihedral[1]+1), int(dihedral[2]+1), int(dihedral[3]+1)]  # Example transformation
        line = ' '.join([str(i+1), str(dihedral_type)] + list(map(str, adjusted_dihedral))) + "\n"
        output_data.write(line)


In [13]:
import mdtraj as md
box_size = np.array(box_size)
dim = box_size.reshape(3,2)[:,1]-box_size.reshape(3,2)[:,0]
cutoff = (box_size[1]-box_size[0])/2

num_beads = len(beads_info[0])
num_ts = len(beads_info)

#extract the data
mol_ids = beads_info[:,:,0]
types = beads_info[:,:,2]
positions = beads_info[:,:,4:7]
forces = beads_info[:,:,7:10]

CG_traj = md.load(CG_dump_path, top=psf_path,stride=1)
box = CG_traj.unitcell_lengths*10

np.save('../data/CG/pos.npy', positions)
np.save('../data/CG/force.npy', forces)
np.save('../data/CG/atom_types.npy', types)
np.save('../data/CG/box.npy', box)

