In [None]:
import sys
import numpy as np
import pandas as pd
import simtk.openmm
import os
import glob
import shutil
import time
import MDAnalysis as mda
import math
pd.set_option("display.precision", 10)

ca_sbm_3spn_openmm_path = '/Users/administrator/Documents/Projects/CA_SBM_3SPN2C_OPENMM'
sys.path.insert(0, ca_sbm_3spn_openmm_path)

import openSMOG3SPN2.open3SPN2.ff3SPN2 as ff3SPN2
import openSMOG3SPN2.calphaSMOG.ffCalpha as ffCalpha
import openSMOG3SPN2.openFiber as openFiber
import openSMOG3SPN2.rigid

# set some global parameters
n_nucl_each_fiber = 12 # the number of nucleosomes in each chromatin fiber
n_fibers = 2 # the number of chromatin fibers
scale_factor = 2.5 # scale factor for all the SBM related potentials
apply_rigid_body = False
compare_with_lammps = True
run_sim = False

ffCalpha_xml_path = '%s/openSMOG3SPN2/calphaSMOG/ffCalpha.xml' % ca_sbm_3spn_openmm_path
single_fiber_dcd_path = '%s/data/chromatin-%dmer/chromatin-%dmer-snapshots/relaxed1.dcd' % (ca_sbm_3spn_openmm_path, n_nucl_each_fiber, n_nucl_each_fiber)
single_fiber_group_rigid_txt_path = '%s/data/chromatin-%dmer/chromatin-%dmer-rigid-group/group_rigid.txt' % (ca_sbm_3spn_openmm_path, n_nucl_each_fiber, n_nucl_each_fiber) # group_rigid.txt file with atom index starts from 1 (lammps format)
single_fiber_main_output_dir = '%s/output-files/chromatin-%dmer' % (ca_sbm_3spn_openmm_path, n_nucl_each_fiber) # the main output directory for a single chromatin fiber
multi_fibers_main_output_dir = '%s/output-files/chromatin-%dx%dmers' % (ca_sbm_3spn_openmm_path, n_fibers, n_nucl_each_fiber) # the main output directory for multiple chromatin fibers
single_fiber_smog_output_dir = '%s/smog' % single_fiber_main_output_dir # smog output directory for single chromatin fiber
openmm_files_dir = '%s/openmm-files' % multi_fibers_main_output_dir

platform_name = 'CPU' # 'Reference', 'CPU', 'CUDA', 'OpenCL'
sim_output_dir = '%s/sim-test-%s' % (openmm_files_dir, platform_name)

# build the output directories
if not os.path.exists(single_fiber_main_output_dir):
    print('%s does not exist!' % single_fiber_main_output_dir)
if not os.path.exists(single_fiber_smog_output_dir):
    print('%s does not exist!' % single_fiber_smog_output_dir)
if not os.path.exists('%s/cg-fibers' % multi_fibers_main_output_dir):
    os.makedirs('%s/cg-fibers' % multi_fibers_main_output_dir)
if not os.path.exists(sim_output_dir):
    os.makedirs(sim_output_dir)

# 1 Build multiple fiber system from single fiber

## 1.1 Load the structure of single chromatin fiber

In [None]:
# load the pandas dataframe of single fiber structure
single_cg_fiber_unique_chainID = pd.read_csv('%s/cg-fiber/cg_fiber_unique_chainID.csv' % single_fiber_main_output_dir)
single_cg_fiber = pd.read_csv('%s/cg-fiber/cg_fiber.csv' % single_fiber_main_output_dir)

n_cg_atoms_each_fiber = single_cg_fiber_unique_chainID.shape[0]

## 1.2 Build the structure for multiple chromatin fibers

In [None]:
# build the pandas dataframe for multiple fibers
# build two pandas dataframes, one with unique chainID and resSeq, and one without unique chainID or resSeq
# the one without unique chainID or resSeq will be converted to pdb format and later loaded by openmm
delta_x, delta_y, delta_z = -180, 100, 0
cg_fibers_unique_chainID = single_cg_fiber_unique_chainID.copy()
cg_fibers = single_cg_fiber.copy()
for i in range(1, n_fibers):
    cg_fiber_i_unique_chainID = single_cg_fiber_unique_chainID.copy()
    cg_fiber_i_unique_chainID['x'] += i*delta_x
    cg_fiber_i_unique_chainID['y'] += i*delta_y
    cg_fiber_i_unique_chainID['z'] += i*delta_z
    cg_fibers_unique_chainID = openFiber.combine_molecules(cg_fibers_unique_chainID, cg_fiber_i_unique_chainID, add_resSeq=False)
    cg_fiber_i = single_cg_fiber.copy()
    cg_fiber_i['x'] += i*delta_x
    cg_fiber_i['y'] += i*delta_y
    cg_fiber_i['z'] += i*delta_z
    cg_fibers = openFiber.combine_molecules(cg_fibers, cg_fiber_i, add_serial=False, add_resSeq=False)

# move center to (0, 0, 0)
cg_fibers = openFiber.move_complex_to_center(cg_fibers)
cg_fibers_unique_chainID = openFiber.move_complex_to_center(cg_fibers_unique_chainID)

cg_fibers_unique_chainID = openFiber.change_unique_chainID(cg_fibers_unique_chainID)
cg_fibers_unique_chainID.index = list(range(len(cg_fibers_unique_chainID.index)))
cg_fibers.index = list(range(len(cg_fibers.index)))

n_cg_atoms = len(cg_fibers.index)

# replace NaN with ''
cg_fibers_unique_chainID = cg_fibers_unique_chainID.fillna('')
cg_fibers = cg_fibers.fillna('')

cg_fibers_pdb_path = '%s/cg-fibers/cg_fibers.pdb' % multi_fibers_main_output_dir
ffCalpha.writePDB(cg_fibers, cg_fibers_pdb_path)
cg_fibers_unique_chainID.to_csv('%s/cg-fibers/cg_fibers_unique_chainID.csv' % multi_fibers_main_output_dir, index=False)

# 2 Set up OpenMM simulations

## 2.1 Set up the system, protein and dna objects

In [None]:
os.chdir('%s/cg-fibers' % multi_fibers_main_output_dir)

pdb = simtk.openmm.app.PDBFile(cg_fibers_pdb_path)
top = pdb.getTopology()
#coord_pdb = pdb.getPositions(asNumpy=True)

# get position from dcd file
# start from single fiber coordinate
single_cg_fiber_pdb_path = '%s/cg-fiber/cg_fiber.pdb' % single_fiber_main_output_dir
single_fiber_coord = openFiber.load_coord_from_dcd(single_cg_fiber_pdb_path, single_fiber_dcd_path)
# extend single fiber coordinate to mutliple fibers


forcefield = simtk.openmm.app.ForceField(ffCalpha_xml_path, ff3SPN2.xml)
s = forcefield.createSystem(top)

In [None]:
# create the DNA and protein objects
# set dna bonds, angles, and dihedrals from the parameters of single dsDNA
# so the original open3SPN2 code will build a long DNA with sequence composed of all the bases, though convenient, this may lead to some boundary effects
# do not use ff3SPN2 to automatically set bonds, angles, and dihedrals (i.e. set compute_topology as False, then ff3PNS2.DNA.fromCoarsePDB_thorugh_pdframe will not automatically get dna bonds, angles, stackings, and dihedrals)
# load dna bonds, angles, and dihedrals manually based on single chromatin fiber dna bonds, angels, and dihedrals
dna = ff3SPN2.DNA.fromCoarsePandasDataFrame(pd_df=cg_fibers_unique_chainID, dna_type='B_curved', compute_topology=False, parse_config=True)
single_fiber_dna_bonds = pd.read_csv('%s/cg-fiber/dna_bonds.csv' % single_fiber_main_output_dir)
single_fiber_dna_angles = pd.read_csv('%s/cg-fiber/dna_angles.csv' % single_fiber_main_output_dir)
single_fiber_dna_stackings = pd.read_csv('%s/cg-fiber/dna_stackings.csv' % single_fiber_main_output_dir)
single_fiber_dna_dihedrals = pd.read_csv('%s/cg-fiber/dna_dihedrals.csv' % single_fiber_main_output_dir)
single_fiber_dna_topo_dict = dict(bond=single_fiber_dna_bonds, 
                                  angle=single_fiber_dna_angles, 
                                  stacking=single_fiber_dna_stackings, 
                                  dihedral=single_fiber_dna_dihedrals)
openFiber.add_topo_to_fibers_from_single_fiber_dna(dna, single_fiber_dna_topo_dict, n_fibers, n_cg_atoms_each_fiber)
#print(dna.bonds)

single_fiber_protein_seq_path = '%s/cg-fiber/protein_seq.txt' % single_fiber_main_output_dir
with open(single_fiber_protein_seq_path, 'r') as ps:
    single_fiber_protein_seq = ps.readlines()[0].rstrip()
multi_fibers_protein_seq = single_fiber_protein_seq*n_fibers

protein = ffCalpha.Protein.fromCoarsePandasDataFrame(pd_df=cg_fibers_unique_chainID, sequence=multi_fibers_protein_seq)

dna.periodic = False
protein.periodic = False


In [None]:
# save the coordinate of the multi-fiber system as xyz file for vmd visualization
xyz_file = '%s/cg-fibers/fibers_coord_openmm.xyz' % multi_fibers_main_output_dir
openFiber.write_openmm_coord_xyz(fibers_coord_openmm, cg_fibers, xyz_file)

In [None]:
# create rigid identity list for the fiber
if apply_rigid_body:
    pass # to be fulfilled
else:
    fibers_rigid_identity = [None]*n_cg_atoms

# get exclusions list
single_fiber_dna_exclusions_list = openFiber.load_exclusions_list('%s/cg-fiber/dna_exclusions.dat' % single_fiber_main_output_dir)
if compare_with_lammps:
    # if compare with lammps, then openmm needs to compute electrostatic and MJ potential for protein native pairs
    print('Compare with lammps, so we need to compute electrostatic and MJ potential for protein native pairs')
    single_fiber_protein_exclusions_list_file = '%s/cg-fiber/protein_exclusions_compare_with_lammps.dat' % single_fiber_main_output_dir
    single_fiber_protein_exclusions_list = openFiber.load_exclusions_list(single_fiber_protein_exclusions_list_file)
else:
    single_fiber_protein_exclusions_list_file = '%s/cg-fiber/protein_exclusions.dat' % single_fiber_main_output_dir
    single_fiber_protein_exclusions_list = openFiber.load_exclusions_list(single_fiber_protein_exclusions_list_file)

fibers_dna_exclusions_list = ff3SPN2.buildDNANonBondedExclusionsList(dna) # since there are exclusions between W-C paired basepairs, we cannot simply generalize exclusions from single fiber DNA exclusions
fibers_protein_exclusions_list = openFiber.extend_exclusions(single_fiber_protein_exclusions_list, n_fibers, n_cg_atoms_each_fiber)

print('total number of exclusions between DNA atoms is %d' % len(fibers_dna_exclusions_list))
print('total number of exclusions between protein atoms is %d' % len(fibers_protein_exclusions_list))


## 2.2 Set up forces for histones and dna

In [None]:
# set force dictionary
forces = {}

# load smog data
single_fiber_smog_bonds_file_path = '%s/bonds.dat' % single_fiber_smog_output_dir
single_fiber_smog_angles_file_path = '%s/angles.dat' % single_fiber_smog_output_dir
single_fiber_smog_dihedrals_file_path = '%s/dihedrals_IDR_removed.dat' % single_fiber_smog_output_dir
single_fiber_smog_exclusions_file_path = '%s/exclusions_IDR_removed.dat' % single_fiber_smog_output_dir
single_fiber_smog_pairs_file_path = '%s/pairs_IDR_removed.dat' % single_fiber_smog_output_dir

single_fiber_smog_bonds_data = openFiber.load_smog_bonds(single_fiber_smog_bonds_file_path)
single_fiber_smog_angles_data = openFiber.load_smog_angles(single_fiber_smog_angles_file_path)
single_fiber_smog_dihedrals_data = openFiber.load_smog_dihedrals(single_fiber_smog_dihedrals_file_path)
single_fiber_smog_exclusions_data = openFiber.load_smog_exclusions(single_fiber_smog_exclusions_file_path)
single_fiber_smog_pairs_data = openFiber.load_smog_pairs(single_fiber_smog_pairs_file_path)

fibers_smog_bonds_data = openFiber.extend_single_fiber_to_multi_fibers_bonds(single_fiber_smog_bonds_data, n_fibers, n_cg_atoms_each_fiber)
fibers_smog_angles_data = openFiber.extend_single_fiber_to_multi_fibers_angles(single_fiber_smog_angles_data, n_fibers, n_cg_atoms_each_fiber)
fibers_smog_dihedrals_data = openFiber.extend_single_fiber_to_multi_fibers_dihedrals(single_fiber_smog_dihedrals_data, n_fibers, n_cg_atoms_each_fiber)
fibers_smog_exclusions_data = openFiber.extend_single_fiber_to_multi_fibers_exclusions(single_fiber_smog_exclusions_data, n_fibers, n_cg_atoms_each_fiber)
fibers_smog_pairs_data = openFiber.extend_single_fiber_to_multi_fibers_pairs(single_fiber_smog_pairs_data, n_fibers, n_cg_atoms_each_fiber)

fibers_smog_data = dict(bonds=fibers_smog_bonds_data, 
                        angles=fibers_smog_angles_data, 
                        dihedrals=fibers_smog_dihedrals_data, 
                        pairs=fibers_smog_pairs_data)

# add dna, protein interactions
openFiber.add_protein_dna_forces(s, forces, dna, protein, fibers_smog_data, fibers_dna_exclusions_list, fibers_protein_exclusions_list, fibers_rigid_identity, scale_factor)


## 2.3 Set up rigid body

In [None]:
if apply_rigid_body:
    rigid_body_list = []
    for i in range(n_nucl_each_fiber*n_fibers):
        rigid_body_list.append([])

    for i in range(len(fibers_rigid_identity)):
        j = fibers_rigid_identity[i]
        if j == None:
            pass
        else:
            rigid_body_list[j].append(i)

    #print(rigid_body_list)
    openFiber.rigid.createRigidBodies(s, fibers_coord_openmm, rigid_body_list)

## 2.4 Run the simulation

In [None]:
temperature = 300*simtk.openmm.unit.kelvin

integrator = simtk.openmm.LangevinIntegrator(temperature, 1/simtk.openmm.unit.picosecond, 10*simtk.openmm.unit.femtoseconds)
platform = simtk.openmm.Platform.getPlatformByName(platform_name)

if platform_name == 'CUDA':
    properties = {'Precision':'double'}
    simulation = simtk.openmm.app.Simulation(top, s, integrator, platform, properties)
else:
    simulation = simtk.openmm.app.Simulation(top, s, integrator, platform)

simulation.context.setPositions(fibers_coord_openmm)
#energy_unit=simtk.openmm.unit.kilojoule_per_mole
energy_unit = simtk.openmm.unit.kilocalories_per_mole
state = simulation.context.getState(getEnergy=True)
energy = state.getPotentialEnergy().value_in_unit(energy_unit)
print("The overall energy is %.6f %s" % (energy, energy_unit.get_symbol()))

# get the detailed energy after the simulation
# double check SBM pair, nonbonded, and electrostatic interactions
df_forces_name_group = pd.read_csv('%s/forces_name_group.csv' % single_fiber_main_output_dir)
for index, row in df_forces_name_group.iterrows():
    group, force_name = int(row['group']), row['name']
    state = simulation.context.getState(getEnergy=True, groups={group})
    energy = state.getPotentialEnergy().value_in_unit(energy_unit)
    print('Group: %d, force name: %s, energy = %.6f %s' % (group, force_name, energy, energy_unit.get_symbol()))

In [None]:
if run_sim:
    # do the energy minimization
    start_time = time.time()
    simulation.minimizeEnergy()
    end_time = time.time()
    delta_time = end_time - start_time
    print('energy minimization takes %.2f seconds' % delta_time)
    energy_unit = simtk.openmm.unit.kilocalories_per_mole
    state = simulation.context.getState(getEnergy=True)
    energy = state.getPotentialEnergy().value_in_unit(energy_unit)
    print('energy minimized')
    print("The overall energy is %.6f %s" % (energy, energy_unit.get_symbol()))
    for index, row in df_forces_name_group.iterrows():
        group, force_name = int(row['group']), row['name']
        state = simulation.context.getState(getEnergy=True, groups={group})
        energy = state.getPotentialEnergy().value_in_unit(energy_unit)
        print('Group: %d, force name: %s, energy = %.6f %s' % (group, force_name, energy, energy_unit.get_symbol()))

    # run simulation
    simulation.context.setVelocitiesToTemperature(temperature)

    # add simulation reporters
    dcd_reporter = simtk.openmm.app.DCDReporter('%s/output.dcd' % sim_output_dir, 1000)
    energy_reporter = simtk.openmm.app.StateDataReporter(sys.stdout, 1000, step=True, time=True, potentialEnergy=True, 
                                                         kineticEnergy=True, totalEnergy=True, temperature=True, speed=True)
    simulation.reporters.append(dcd_reporter)
    simulation.reporters.append(energy_reporter)
    start_time = time.time()
    n_steps = 10000
    simulation.step(n_steps)
    end_time = time.time()
    delta_time = end_time - start_time
    print('%d steps takes %.2f seconds' % (n_steps, delta_time))
