In [None]:
# import required packages
%cd /Users/namir_oues/Documents/PhD/MDSubSampler
import mdss.protein_data as prot
import mdss.geometrical_property as gp
import mdss.sampler as s
import mdss.dissimilarity as d
import tempfile
from IPython.display import Image, display
import os

In [None]:
# set paths and all required arguments¶
trajectory_file = "data/input.xtc" 
topology_file = "data/input.gro" 
atom_selection_global = "protein"
atom_selection_local = "resid 120:160"
strata_number = 200
size = 10
dissimilarity_measure = d.Bhattacharyya

# set a temporary directory to save all output files
temp_dir = tempfile.mkdtemp()

In [None]:
# create a protein data object
p_data = prot.ProteinData(trajectory_file,topology_file,config_parameters=None)

In [None]:
# create a protein property object
p_property_global = gp.RMSD(p_data, atom_selection_global, fit=True)
p_property_local = gp.RMSD(p_data, atom_selection_global, fit=True)

In [None]:
# calculate reference property for full trajectory
p_property_global.calculate_property()
p_property_local.calculate_property()

In [None]:
# create uniform sampler object 
sampler_uniform = s.UniformSampler(
            protein_property=p_property_global,
            protein_data=p_data,
            output_folder=temp_dir,
            file_prefix='adv_sc1',
            strata_number = strata_number,
            dissimilarity_measure=dissimilarity_measure,
        )

sampler_weighted = s.WeightedSampler(
            protein_property=p_property_local,
            protein_data=p_data,
            output_folder=temp_dir,
            file_prefix='adv_sc1',
            weights_vector=None,
            seed_number=1999,
            dissimilarity_measure=dissimilarity_measure,
        )

In [None]:
# generate sampled property for specific sample size
sampled_property_global = sampler_uniform.sample(round(int(size) * p_data.n_frames / 100))
sampled_property_local = sampler_weighted.sample(round(int(size) * p_data.n_frames / 100))

In [None]:
# check the output files generated in the temporary directory 
print(os.listdir(temp_dir))

In [None]:
# display all distribution plots for all sample sizes 
for fname in (os.listdir(temp_dir)):
    fpath = os.path.join(temp_dir, fname)
    _, ext = os.path.splitext(fname)
    if ext == '.png':
        print(fname)
        display(Image(filename=fpath))

In [None]:
# display tables with calculated property for all different sample sizes
for fname in os.listdir(temp_dir):
    fpath = os.path.join(temp_dir, fname)
    _, ext = os.path.splitext(fname)
    if ext == '.dat':
            with open(fpath, 'r') as file:
                contents = file.read()
                print(fname)
                print('-----------------------')
                print(contents)  
                print('-----------------------')