In [2]:
#this notebook prepares blocks for CVTrajectory in order to reduce memory usage

In [3]:
import bussilab
import scipy
from scipy.optimize import minimize
import cudamat as cm
import numpy as np
import matplotlib.pyplot as plt
import re
import concurrent.futures
kBT=0.6 #kBT in kcal/mol
np.random.seed(1995)
import os
curr_dir=os.getcwd()

In [4]:
def concatenate_simulation_data(files):
    result=None
    for t in files:
        if result is None:
            result=np.load(t)
            output=+result
        else:
            result=np.load(t)
            output=np.concatenate((output,result))

    return output

def read_TLs(files):
    corr=[]
    for file in files:
        with open(file, "r") as f:
            for line in f:
                nums=line.split()
                if nums:
                    corr.append(np.array([float(i) for i in nums]))
    return np.array(corr)

In [5]:
def calculate_TL_bias(Sequence,key,prune,skip,trajGAGA,FFprefactorsGAGA,trajUUCG,FFprefactorsUUCG):
    
    directories=["nh-n_0.5_nh-o_0.5_oh-bo-nbO_-0.5_dumping", 
                 "nh-n_1.0_nh-o_0.0_oh-bo-nbO_-0.5_dumping_bias-from-half-half",
                 "nh-n_0.5_nh-o_0.5_oh-bo-nbO_-0.5_dumping_bias-from-one-zero",
                 "nh-n_1.0_nh-o_0.0_oh-bo-nbO_-0.5_dumping"
                 ]

    collection_weights=[]
    for index,d in enumerate(directories):
        Metadweight=[]
        with open("./data_loaded/%s/%s/weights.rep0" %(Sequence,d) ,"r") as fp:
            for line in fp:
                Metadweight.append(float(line))
        collection_weights.append(np.array(Metadweight))

    MetadPot1=np.concatenate((collection_weights[0],collection_weights[1]))
    MetadPot2=np.concatenate((collection_weights[2],collection_weights[3]))

    MetadPot=np.c_[ MetadPot1,MetadPot2 ] 
    MetadPot=kBT*np.log(MetadPot)
    
    if prune==True:
        MetadPot=MetadPot[::skip,:]
    
    if Sequence=='GAGA':
        if key == 'reference':
            bias=np.matmul(trajGAGA,np.array(FFprefactorsGAGA).T)+MetadPot
        if key == 'proposed':
            bias=np.matmul(trajGAGA,(FFprefactorsGAGA-FFprefactorsGAGA[1]).T)+MetadPot
        
    if Sequence=='UUCG':
        if key == 'reference':
            bias=np.matmul(trajUUCG,np.array(FFprefactorsUUCG).T)+MetadPot
        if key == 'proposed':
            bias=np.matmul(trajUUCG,(FFprefactorsUUCG-FFprefactorsUUCG[1]).T)+MetadPot
        

    del MetadPot1
    del MetadPot2
    del MetadPot
        
    return bias   

In [7]:
#get the names of the different simulations and there forcefield prefactors
trajectoryNamesGACC=[]
trajectoryNamesGAGA=[]
trajectoryNamesUUCG=[]
FFprefactorsGACC=[]
FFprefactorsGAGA=[]
FFprefactorsUUCG=[]

with open("./data_loaded/coefficients.dat","r") as fp:
    for line in fp:
        if(line[0]=='#'):
            continue
        l=line.split()
        file=l[0]+".skip.npy"
        trajectoryNamesGACC.append(file)
        FFprefactorsGACC.append(np.array(np.array(l)[1:],dtype='float'))

with open("./data_loaded/coefficients_TLs.dat","r") as fp:
    for line in fp:
        if(line[0]=='#'):
            continue
        if line.strip():
            l=line.split()
            file=l[0]+".npy"
            if 'gaga' in line:
                FFprefactorsGAGA.append(np.array(l[1:],dtype='float'))
            if 'uucg' in line:
                FFprefactorsUUCG.append(np.array(l[1:],dtype='float'))

firstFF='nh-n_0.5_nh-o_0.5_oh-bo-nbO_-0.5_dumping/'
secondFF='nh-n_1.0_nh-o_0.0_oh-bo-nbO_-0.5_dumping/'
trajectoryNamesGAGA.append('./data_loaded/GAGA/'+firstFF+'gHBfix-parameters_state.rep0')
trajectoryNamesGAGA.append('./data_loaded/GAGA/'+secondFF+'gHBfix-parameters_state.rep0')
trajectoryNamesUUCG.append('./data_loaded/UUCG/'+firstFF+'gHBfix-parameters_state.rep0')
trajectoryNamesUUCG.append('./data_loaded/UUCG/'+secondFF+'gHBfix-parameters_state.rep0')
    
trajGACC=concatenate_simulation_data(['./data_loaded/GACC/' + x for x in trajectoryNamesGACC])
backbone1=concatenate_simulation_data(['./data_loaded/GACC/' + re.sub("HBfix-energy","jcop_backbone-1",x) for x in trajectoryNamesGACC])
backbone2=concatenate_simulation_data(['./data_loaded/GACC/' + re.sub("HBfix-energy","jcop_backbone-2",x) for x in trajectoryNamesGACC])
sugar=concatenate_simulation_data(['./data_loaded/GACC/' + re.sub("HBfix-energy","jcop_sugar",x) for x in trajectoryNamesGACC])
noe=concatenate_simulation_data(['./data_loaded/GACC/' + re.sub("HBfix-energy","noe",x) for x in trajectoryNamesGACC])
unoe=concatenate_simulation_data(['./data_loaded/GACC/' + re.sub("HBfix-energy","unoe",x) for x in trajectoryNamesGACC])

#apply forward models
sugar=9.67*np.cos(sugar*np.pi/180)**2 - 2.03*np.cos(sugar*np.pi/180)
backbone1=9.7*np.cos(backbone1*np.pi/180)**2 - 1.8*np.cos(backbone1*np.pi/180)
backbone2=15.3*np.cos(backbone2*np.pi/180)**2 - 6.1*np.cos(backbone2*np.pi/180)+1.6
noe=noe**-6
unoe=unoe**-6
print('There are 2 simulations per TL.')
trajGAGA=read_TLs([x for x in trajectoryNamesGAGA])
trajUUCG=read_TLs([x for x in trajectoryNamesUUCG])
populationGAGA=trajGAGA[:,-1].reshape(-1,1)
populationUUCG=trajUUCG[:,-1].reshape(-1,1)
trajGAGA=trajGAGA[:,:12]
trajUUCG=trajUUCG[:,:12]

#to obtain the same parameters as published set prune=False
prune=False
if prune==True:
    skip=5000
    print("Pruning data, skip")
    trajGACC=trajGACC[::skip,:]
    backbone1=backbone1[::skip,:]
    backbone2=backbone2[::skip,:]
    sugar=sugar[::skip,:]
    noe=noe[::skip,:]
    unoe=unoe[::skip,:]
    populationGAGA=populationGAGA[::skip,:]
    populationUUCG=populationUUCG[::skip,:]
    trajGAGA=trajGAGA[::skip,:]
    trajUUCG=trajUUCG[::skip,:]
else:
    skip=0

print("Check shape of arrays:")
print(trajGACC.shape)
print(backbone1.shape)
print(backbone2.shape)
print(sugar.shape)
print(noe.shape)
print(unoe.shape)
print(populationGAGA.shape)
print(populationUUCG.shape)
print(trajGAGA.shape)
print(trajUUCG.shape)

biasGAGA=calculate_TL_bias('GAGA','proposed',prune,skip,trajGAGA,FFprefactorsGAGA,trajUUCG,FFprefactorsUUCG)
biasUUCG=calculate_TL_bias('UUCG','proposed',prune,skip,trajGAGA,FFprefactorsGAGA,trajUUCG,FFprefactorsUUCG)

weightsGACC=np.exp(bussilab.wham.wham(np.matmul(trajGACC,np.transpose(FFprefactorsGACC-FFprefactorsGAGA[1])),threshold=1e-20,T=kBT).logW)
weightsGAGA=np.exp(bussilab.wham.wham(biasGAGA,threshold=1e-20,T=kBT).logW)
weightsUUCG=np.exp(bussilab.wham.wham(biasUUCG,threshold=1e-20,T=kBT).logW)

nblocks=5
#split the trajectories into nblocks
weightsGACC_blocks=np.array_split(weightsGACC,nblocks) 
trajGACC_blocks=np.array_split(trajGACC,nblocks) 
noe_blocks=np.array_split(noe,nblocks)
unoe_blocks=np.array_split(unoe,nblocks)
backbone1_blocks=np.array_split(backbone1,nblocks)
backbone2_blocks=np.array_split(backbone2,nblocks)
sugar_blocks=np.array_split(sugar,nblocks)

weightsGAGA_blocks=np.array_split(weightsGAGA,nblocks) 
trajGAGA_blocks=np.array_split(trajGAGA,nblocks) 
populationGAGA_blocks=np.array_split(populationGAGA,nblocks)
biasGAGA_blocks=np.array_split(biasGAGA,nblocks)

weightsUUCG_blocks=np.array_split(weightsUUCG,nblocks) 
trajUUCG_blocks=np.array_split(trajUUCG,nblocks)   
populationUUCG_blocks=np.array_split(populationUUCG,nblocks)
biasUUCG_blocks=np.array_split(biasUUCG,nblocks)


np.save('./data_loaded/weightsGACC_blocks.npy',weightsGACC_blocks)
np.save('./data_loaded/trajGACC_blocks.npy',trajGACC_blocks)
np.save('./data_loaded/noe_blocks.npy',noe_blocks)
np.save('./data_loaded/unoe_blocks.npy',unoe_blocks)
np.save('./data_loaded/backbone1_blocks.npy',backbone1_blocks)
np.save('./data_loaded/backbone2_blocks.npy',backbone2_blocks)
np.save('./data_loaded/sugar_blocks.npy',sugar_blocks)

np.save('./data_loaded/weightsGAGA_blocks.npy',weightsGAGA_blocks)
np.save('./data_loaded/trajGAGA_blocks.npy',trajGAGA_blocks)
np.save('./data_loaded/populationGAGA_blocks.npy',populationGAGA_blocks)
np.save('./data_loaded/biasGAGA_blocks.npy',biasGAGA_blocks)

np.save('./data_loaded/weightsUUCG_blocks.npy',weightsUUCG_blocks)
np.save('./data_loaded/trajUUCG_blocks.npy',trajUUCG_blocks)
np.save('./data_loaded/populationUUCG_blocks.npy',populationUUCG_blocks)
np.save('./data_loaded/biasUUCG_blocks.npy',biasUUCG_blocks)

There are 2 simulations per TL.
Check shape of arrays:
(1400000, 12)
(1400000, 8)
(1400000, 9)
(1400000, 12)
(1400000, 20)
(1400000, 285)
(1000000, 1)
(1000000, 1)
(1000000, 12)
(1000000, 12)
