Data source: https://www.encodeproject.org/files/ENCFF780PGS/

In [77]:
import pandas as pd
import numpy as np
import numpy as np
from utils import *
import openmm as mm
import openmm.unit as u
from tqdm import tqdm
from sys import stdout
from mdtraj.reporters import HDF5Reporter
from openmm.app import PDBFile, PDBxFile, ForceField, Simulation, PDBReporter, PDBxReporter, DCDReporter, StateDataReporter, CharmmPsfFile,  DCDFile
import random
import pyvista as pv
import mdtraj as md
from initial_structures_defs import *

In [78]:
df = pd.read_csv("..\data\LHG0052H.e500.clusters.cis.BE3", 
                 sep="\t", 
                 header= None)
df.columns = ["chrom1", "start1", "end1", "chrom2","start2", "end2", "score"]

In [79]:
df.head()

Unnamed: 0,chrom1,start1,end1,chrom2,start2,end2,score
0,chr10,73579,74148,chr10,205258,205891,4
1,chr10,76245,76897,chr10,454409,455026,3
2,chr10,72811,73533,chr10,122997924,122998466,5
3,chr10,88651,89594,chr10,109368,110589,3
4,chr10,113948,114691,chr10,179975,180632,3


In [80]:
df_chr1 = df[df['chrom1'] == 'chr1']

In [81]:
df_chr1['middle1'] = (df_chr1['end1'] + df_chr1['start1'])/2
df_chr1['middle2'] = (df_chr1['end2'] + df_chr1['start2'])/2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chr1['middle1'] = (df_chr1['end1'] + df_chr1['start1'])/2
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chr1['middle2'] = (df_chr1['end2'] + df_chr1['start2'])/2


Scaling for 10 connections

In [84]:
middle_points = df_chr1[['middle1', 'middle2']].reset_index(drop=True)
middle_points


Unnamed: 0,middle1,middle2
0,605183.5,942849.5
1,779228.5,825798.0
2,828688.0,869860.0
3,832697.5,858533.5
4,865574.5,905542.5
...,...,...
117707,248899548.0,248924561.5
117708,248909067.5,248922497.5
117709,248915281.5,248923600.5
117710,248915281.5,248924765.0


In [85]:
def scale_value(x, original_min, original_max, new_min, new_max):
    return new_min + ((x - original_min) * (new_max - new_min) / (original_max - original_min))


def scale_bead_chain(og_middle_points, nr_connections, new_min=1, new_max=200):
    trimmed_df = og_middle_points.head(nr_connections)
    original_min = np.min(trimmed_df)
    original_max = np.max(trimmed_df)
    df_scaled = middle_points.map(lambda x: scale_value(x, original_min, original_max, new_min, new_max))
    df_scaled = df_scaled.map(lambda x: int(x))
    
    return df_scaled


In [86]:
df_scaled = scale_bead_chain(middle_points, 10)

In [67]:
# 0. Generate some initial structure
N_beads=200
points = helisa(N_beads)
write_mmcif(points,'init_struct.cif')
generate_psf(N_beads,'LE_init_struct.psf')

# 1. Define System
pdb = PDBxFile('init_struct.cif')
forcefield = ForceField('forcefields/classic_sm_ff.xml')
system = forcefield.createSystem(pdb.topology, nonbondedCutoff=1*u.nanometer)
integrator = mm.LangevinIntegrator(310, 0.05, 100 * mm.unit.femtosecond)

# 2. Define the forcefield
# 2.1. Harmonic bond borce between succesive beads
bond_force = mm.HarmonicBondForce()
system.addForce(bond_force)
for i in range(system.getNumParticles() - 1):
    bond_force.addBond(i, i + 1, 0.1, 300000.0)

# Connecting selected beads
for i in range(len(df_scaled)):
    middle1, middle2 = df_scaled.iloc[i,:]
    bond_force.addBond(middle1, middle2, 0.001, 0.001)


#2.2. Harmonic angle force between successive beads so as to make chromatin rigid
angle_force = mm.HarmonicAngleForce()
system.addForce(angle_force)
for i in range(system.getNumParticles() - 2):
    angle_force.addAngle(i, i + 1, i + 2, np.pi, 0.001)
    
# 3. Minimize energy
simulation = Simulation(pdb.topology, system, integrator)
simulation.reporters.append(StateDataReporter(stdout, 10, step=True, totalEnergy=True, potentialEnergy=True, temperature=True))
simulation.reporters.append(DCDReporter('stochastic_LE.dcd', 10))
simulation.context.setPositions(pdb.positions)
simulation.minimizeEnergy(tolerance=0.001)
state = simulation.context.getState(getPositions=True)
PDBxFile.writeFile(pdb.topology, state.getPositions(), open('minimized.cif', 'w')) # save minimized file

# 4. Run md simulation
simulation.context.setVelocitiesToTemperature(310, 0)
simulation.step(10000)
state = simulation.context.getState(getPositions=True)
PDBxFile.writeFile(pdb.topology, state.getPositions(), open('after_sim.cif', 'w')) # save minimized file
PDBFile.writeFile(pdb.topology, state.getPositions(), open('after_sim.pdb', 'w')) # save minimized file
df = DCDFile(open("after_sim.dcd", "wb"),pdb.topology, dt = 100 * mm.unit.femtosecond)
df.writeModel(state.getPositions(), pdb.topology.getUnitCellDimensions(), pdb.topology.getPeriodicBoxVectors())

#"Step","Potential Energy (kJ/mole)","Total Energy (kJ/mole)","Temperature (K)"
10,130.61795043945312,567.0350657068193,175.84225874090103
20,102.52273559570312,575.1601669210941,190.43623767788765
30,115.8750228881836,627.7189462129027,206.23341397846718
40,137.51690673828125,657.0874116225168,209.3466272467846
50,154.79017639160156,700.0909359753132,219.71392482988395
60,153.725830078125,739.9323547035456,236.19577640908244
70,162.05880737304688,736.2983855521306,231.37402487886095
80,163.00022888183594,720.141011044383,224.48453588286748
90,162.01417541503906,710.3190846964717,220.9243642236722
100,160.1739044189453,712.6232206374407,222.59423887216784
110,178.7578125,719.5294617339969,217.88904453504782
120,180.45460510253906,750.9606778621674,229.86971168166602
130,215.63351440429688,799.8216613046825,235.38252668594765
140,189.67828369140625,811.5297822877765,250.55793709559094
150,192.95144653320312,819.0439941138029,252.26675099570767
160,191.33941650390625,810.1944580376148,24

In [45]:
original_min = 604886
original_max = 983769
new_min = 1
new_max = 200


df_scaled = middle_points.map(lambda x: scale_value(x, original_min, original_max, new_min, new_max))

In [48]:
df_scaled


In [49]:
df_scaled

Unnamed: 0,middle1,middle2
0,1,178
1,92,117
2,118,140
3,120,134
4,137,158
5,137,177
6,140,146
7,139,152
8,139,185
9,140,199


Modeling

In [68]:
traj = md.load("after_sim.cif")

positions = traj.xyz

mesh = pv.PolyData(positions[0])

# Create PyVista plotter
plotter = pv.Plotter(notebook=True)

# Add mesh to the plotter
plotter.add_mesh(mesh, color="blue", point_size=5)

# Create lines between consecutive points
lines = pv.lines_from_points(positions[0])

# Add lines to the plotter
plotter.add_mesh(lines, color="red", line_width=2)

# Show the plotter using Trame's notebook backend
plotter.show(jupyter_backend='trame')

Widget(value='<iframe src="http://localhost:49797/index.html?ui=P_0x1d6846284c0_0&reconnect=auto" class="pyvis…