Data source: https://www.encodeproject.org/files/ENCFF780PGS/

In [1]:
import pandas as pd
import numpy as np
import numpy as np
from utils import *
import openmm as mm
import openmm.unit as u
from tqdm import tqdm
from sys import stdout
from mdtraj.reporters import HDF5Reporter
from openmm.app import PDBFile, PDBxFile, ForceField, Simulation, PDBReporter, PDBxReporter, DCDReporter, StateDataReporter, CharmmPsfFile,  DCDFile
import random
import pyvista as pv
import mdtraj as md
from initial_structures_defs import *

In [8]:
df = pd.read_csv("..\data\ENCFF780PGS.bedpe", 
                 sep="\t", 
                 header= None)
df.columns = ["chrom1", "start1", "end1", "chrom2","start2", "end2", "score"]

In [9]:
df.head()

Unnamed: 0,chrom1,start1,end1,chrom2,start2,end2,score
0,chr10,73579,74148,chr10,205258,205891,4
1,chr10,76245,76897,chr10,454409,455026,3
2,chr10,72811,73533,chr10,122997924,122998466,5
3,chr10,88651,89594,chr10,109368,110589,3
4,chr10,113948,114691,chr10,179975,180632,3


In [10]:
df_chr1 = df[df['chrom1'] == 'chr1']

In [11]:
df_chr1['middle1'] = (df_chr1['end1'] + df_chr1['start1'])/2
df_chr1['middle2'] = (df_chr1['end2'] + df_chr1['start2'])/2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chr1['middle1'] = (df_chr1['end1'] + df_chr1['start1'])/2
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chr1['middle2'] = (df_chr1['end2'] + df_chr1['start2'])/2


Scaling for 10 connections

In [12]:
middle_points = df_chr1[['middle1', 'middle2']].reset_index(drop=True)
middle_points


Unnamed: 0,middle1,middle2
0,605183.5,942849.5
1,779228.5,825798.0
2,828688.0,869860.0
3,832697.5,858533.5
4,865574.5,905542.5
...,...,...
117707,248899548.0,248924561.5
117708,248909067.5,248922497.5
117709,248915281.5,248923600.5
117710,248915281.5,248924765.0


In [13]:
def scale_value(x, original_min, original_max, new_min, new_max):
    return new_min + ((x - original_min) * (new_max - new_min) / (original_max - original_min))


def scale_bead_chain(og_middle_points, nr_connections, new_min=1, new_max=200):
    trimmed_df = og_middle_points.head(nr_connections)
    original_min = np.min(trimmed_df)
    original_max = np.max(trimmed_df)
    df_scaled = trimmed_df.map(lambda x: scale_value(x, original_min, original_max, new_min, new_max))
    df_scaled = df_scaled.map(lambda x: int(x))
    
    return df_scaled


In [14]:
df_scaled = scale_bead_chain(middle_points, 10)

In [15]:
df_scaled

Unnamed: 0,middle1,middle2
0,1,178
1,92,117
2,118,140
3,120,134
4,138,159
5,137,177
6,140,146
7,140,152
8,139,186
9,140,200


In [16]:
# 0. Generate some initial structure
N_beads=200
points = helisa(N_beads)
write_mmcif(points,'init_struct.cif')
generate_psf(N_beads,'LE_init_struct.psf')

# 1. Define System
pdb = PDBxFile('init_struct.cif')
forcefield = ForceField('forcefields/classic_sm_ff.xml')
system = forcefield.createSystem(pdb.topology, nonbondedCutoff=1*u.nanometer)
integrator = mm.LangevinIntegrator(310, 0.05, 100 * mm.unit.femtosecond)

# 2. Define the forcefield
# 2.1. Harmonic bond borce between succesive beads
bond_force = mm.HarmonicBondForce()
system.addForce(bond_force)
for i in range(system.getNumParticles() - 1):
    bond_force.addBond(i, i + 1, 0.1, 300000.0)

# Connecting selected beads
for i in range(len(df_scaled)):
    middle1, middle2 = df_scaled.iloc[i,:]
    bond_force.addBond(middle1-1, middle2-1, 0.001, 0.001)


#2.2. Harmonic angle force between successive beads so as to make chromatin rigid
angle_force = mm.HarmonicAngleForce()
system.addForce(angle_force)
for i in range(system.getNumParticles() - 2):
    angle_force.addAngle(i, i + 1, i + 2, np.pi, 0.001)
    
# 3. Minimize energy
simulation = Simulation(pdb.topology, system, integrator)
simulation.reporters.append(StateDataReporter(stdout, 10, step=True, totalEnergy=True, potentialEnergy=True, temperature=True))
simulation.reporters.append(DCDReporter('stochastic_LE.dcd', 10))
simulation.context.setPositions(pdb.positions)
simulation.minimizeEnergy(tolerance=0.001)
state = simulation.context.getState(getPositions=True)
PDBxFile.writeFile(pdb.topology, state.getPositions(), open('minimized.cif', 'w')) # save minimized file

# 4. Run md simulation
simulation.context.setVelocitiesToTemperature(310, 0)
simulation.step(10000)
state = simulation.context.getState(getPositions=True)
PDBxFile.writeFile(pdb.topology, state.getPositions(), open('after_sim.cif', 'w')) # save minimized file
PDBFile.writeFile(pdb.topology, state.getPositions(), open('after_sim.pdb', 'w')) # save minimized file
df = DCDFile(open("after_sim.dcd", "wb"),pdb.topology, dt = 100 * mm.unit.femtosecond)
df.writeModel(state.getPositions(), pdb.topology.getUnitCellDimensions(), pdb.topology.getPeriodicBoxVectors())

#"Step","Potential Energy (kJ/mole)","Total Energy (kJ/mole)","Temperature (K)"
10,117.9885025024414,609.0196300977841,197.84747107262032
20,112.80096435546875,631.3728998508304,208.94428120968763
30,108.62446594238281,636.7140223048627,212.7791444461771
40,119.1517333984375,661.3708298616111,218.47225353690095
50,149.4442138671875,640.5616970732808,197.88226568795073
60,157.43914794921875,667.2900134329684,205.43036620536915
70,144.11688232421875,676.6776399780065,214.58069188060145
80,155.4456024169922,717.5222159996629,226.47329323292496
90,145.4854736328125,750.6479461574927,243.83355361481497
100,188.32542419433594,767.2665578685701,233.26838719633434
110,195.15150451660156,781.0373233705759,236.06655684992788
120,191.35897827148438,780.9563167244196,237.56201146619006
130,216.58917236328125,788.7463914677501,230.5349956328022
140,194.79074096679688,817.9755189940333,251.0951453270157
150,203.87435913085938,830.5767958536744,252.51248903071343
160,229.632568359375,872.744016449898

In [17]:
original_min = 604886
original_max = 983769
new_min = 1
new_max = 200


df_scaled = middle_points.map(lambda x: scale_value(x, original_min, original_max, new_min, new_max))

In [18]:
df_scaled


Unnamed: 0,middle1,middle2
0,1.156255,178.507929
1,92.569581,117.029191
2,118.547100,140.171792
3,120.653002,134.222796
4,137.920927,158.913244
...,...,...
117707,130412.334734,130425.472527
117708,130417.334643,130424.388456
117709,130420.598410,130424.967783
117710,130420.598410,130425.579411


In [19]:
df_scaled

Unnamed: 0,middle1,middle2
0,1.156255,178.507929
1,92.569581,117.029191
2,118.547100,140.171792
3,120.653002,134.222796
4,137.920927,158.913244
...,...,...
117707,130412.334734,130425.472527
117708,130417.334643,130424.388456
117709,130420.598410,130424.967783
117710,130420.598410,130425.579411


Modeling

In [22]:
traj = md.load("after_sim.cif")

positions = traj.xyz

mesh = pv.PolyData(positions[0])

# Create PyVista plotter
plotter = pv.Plotter(notebook=True)

# Add mesh to the plotter
plotter.add_mesh(mesh, color="blue", point_size=5)

# Create lines between consecutive points
lines = pv.lines_from_points(positions[0])

# Add lines to the plotter
plotter.add_mesh(lines, color="red", line_width=2)

# Show the plotter using Trame's notebook backend
plotter.show(jupyter_backend='trame')

Widget(value='<iframe src="http://localhost:58118/index.html?ui=P_0x18faad076d0_2&reconnect=auto" class="pyvis…