In [51]:
# import required packages
%cd /Users/namir_oues/Documents/PhD/MDSubSampler
import mdss.protein_data as prot
import mdss.geometrical_property as gp
import mdss.sampler as s
import mdss.dissimilarity as d
import mdss.utilities as u
import tempfile
import os
from IPython.display import Image, display

/Users/namir_oues/Documents/PhD/MDSubSampler


In [52]:
# set paths and all required arguments¶
trajectory_file = "data/input.xtc" 
topology_file = "data/input.gro" 
atom_selection_global = "protein"
atom_selection_local = "resid 120:160"
strata_number = 200
size = 10
dissimilarity_measure = d.Bhattacharyya

# set a temporary directory to save all output files
temp_dir = tempfile.mkdtemp()

In [53]:
# create a protein data object
p_data = prot.ProteinData(trajectory_file,topology_file,config_parameters=None)

In [54]:
# create a protein property object
p_property_global = gp.RMSD(p_data, atom_selection_global, fit=True)
p_property_local = gp.RMSD(p_data, atom_selection_local, fit=True)

In [55]:
# calculate reference property for full trajectory
p_property_global.calculate_property()
p_property_local.calculate_property()

In [56]:
# create uniform sampler object 
sampler_uniform = s.UniformSampler(
            protein_property=p_property_global,
            protein_data=p_data,
            output_folder=temp_dir,
            file_prefix='adv_sc1',
            strata_number = strata_number,
            dissimilarity_measure=dissimilarity_measure,
        )

sampler_weighted = s.WeightedSampler(
            protein_property=p_property_local,
            protein_data=p_data,
            output_folder=temp_dir,
            file_prefix='adv_sc1',
            weights_vector=None,
            seed_number=1999,
            dissimilarity_measure=dissimilarity_measure,
        )

Weights not provided. They will be estimated from discretized property vector.


In [57]:
# generate sampled property for specific sample size
sampled_property_global = sampler_uniform.sample(round(int(size) * p_data.n_frames / 100))
sampled_property_local = sampler_weighted.sample(round(int(size) * p_data.n_frames / 100))

In [58]:
# generate trajectory and save output files in temporary directory
u.write_output_files(
    output_folder=temp_dir,
    file_prefix='adv_sc1_global',
    p_prop=p_property_global,
    s_prop=sampled_property_global,
    p_data=p_data,
    p=size,
    machine_learning=None,
)
u.write_output_files(
    output_folder=temp_dir,
    file_prefix='adv_sc1_local',
    p_prop=p_property_local,
    s_prop=sampled_property_local,
    p_data=p_data,
    p=size,
    machine_learning=None,
)

In [59]:
# check the output files generated in the temporary directory 
print(os.listdir(temp_dir))

['adv_sc1_global_10_RMSD.npy', 'adv_sc1_global_10_RMSD.xtc', 'adv_sc1_local_10_RMSD.dat', 'adv_sc1_local_10_RMSD.xtc', 'adv_sc1_global_10_RMSD.dat', 'adv_sc1_local_10_RMSD.npy']


In [62]:
# display all distribution plots for all sample sizes 
for fname in (os.listdir(temp_dir)):
    fpath = os.path.join(temp_dir, fname)
    _, ext = os.path.splitext(fname)
    if ext == '.png':
        print(fname)
        display(Image(filename=fpath))

In [63]:
# display tables with calculated property for all different sample sizes
for fname in os.listdir(temp_dir):
    fpath = os.path.join(temp_dir, fname)
    _, ext = os.path.splitext(fname)
    if ext == '.dat':
            with open(fpath, 'r') as file:
                contents = file.read()
                print(fname)
                print('-----------------------')
                print(contents)  
                print('-----------------------')

adv_sc1_local_10_RMSD.dat
-----------------------
307 2.0151080866421576
248 2.3868307257523176
412 1.9688256506479869
918 2.6083669450571216
345 2.4636256813173834
732 2.210431223292762
325 2.157693993927635
468 2.0427716460073846
854 2.632512478614645
643 2.3276311783629153
215 2.1705472593469213
453 2.1096623850370073
124 2.191769656522033
429 1.9988661299141806
256 2.2876449523384554
892 2.6934580683605414
599 1.8445309781849653
983 2.8291343752403235
72 2.1553105793606133
642 2.1270888212182606
398 2.101592589012668
288 2.2781128807400233
475 2.302180349617692
950 2.4643095236439745
716 2.350684488722074
315 2.096072420869091
359 2.0063140492246783
245 1.945689941708607
557 1.9459264393770446
678 2.4838418039396983
473 2.3359774763235785
749 2.4849653731091976
735 2.07080612557819
304 2.099213536827684
351 1.8653270464959066
127 2.2442320976797983
382 1.8286458673439172
568 1.7944523508044405
723 2.476520107474809
769 2.3426698675482176
746 2.3833594176399724
412 1.968825650647986