# ont_rdb_explorer

## Import packages
Also set the path to ont_rdb and the name of your snakemake profile for processing purposes.

In [1]:
import informant_class

# Adjust this to the directory storing the ont_rdb package.
ont_rdb_path = "/home/cfrankston/Projects/ont_rdb/ont_rdb"

# Set the snakemake profile you wish to use:
snakemake_profile = 'mamba'

from IPython.display import display, clear_output
import ipywidgets as widgets
import os
import importlib
import sys

## Select and import ontology module from drop-down menu.


In [2]:

selected_module_name = None
imported_module = None

def list_files_in_folder(folder_path):
    """Lists files in the given folder path."""
    files = [f for f in os.listdir(folder_path) if (os.path.isfile(os.path.join(folder_path, f)) and f.endswith("_ontology.py"))]
    return files

ontologies_folder = ont_rdb_path + '/ontologies'
file_list = list_files_in_folder(ontologies_folder)
dropdown_menu = widgets.Dropdown(options=file_list, description='Files:', disabled=False)
display(dropdown_menu)

button = widgets.Button(description="Import Selected Ontology Module", layout=widgets.Layout(width='auto'))
display(button)

output = widgets.Output()

@button.on_click
def button_on_click(b):
    global selected_module_name
    global imported_module
    with output:
        clear_output()
        selected_file = dropdown_menu.value
        module_name = selected_file[:-3]  # Remove the '.py' extension
        import_path = f'ontologies.{module_name}'
        
        try:
            # Dynamically import the selected module
            imported_module = importlib.import_module(import_path)
            # Optionally, add the imported module to sys.modules
            sys.modules[module_name] = imported_module
            print(f"Successfully imported {module_name}.")
            selected_module_name = module_name
        except Exception as e:
            print(f"Failed to import {module_name}: {e}.")

display(output)



Dropdown(description='Files:', options=('hic_January_24_2024_ontology.py',), value='hic_January_24_2024_ontolo…

Button(description='Import Selected Ontology Module', layout=Layout(width='auto'), style=ButtonStyle())

Output()

### Import informant classes from ontology module and construct or access its associated digraph dataframe.
If necessary, modify the ``command`` to use appropriate snakemake configurations.

For example:
* ``use-conda: true``
* ``conda-frontend: mamba``
* ``cores: 1``

In [3]:
import pandas as pd
import subprocess
import snakemake
command = f"snakemake --profile {snakemake_profile} ontology_dataframes/{selected_module_name}_dataframe.pkl"
result = subprocess.run(command, capture_output=True, text=True, shell=True)
# Check if the command was successful
if result.returncode == 0:
    # Print the standard output of the command
    print(f"{selected_module_name} dataframe is constructed.", result.stdout)
else:
    # Print the standard error if the command failed
    print("Command failed with error:", result.stderr)

ontology_dataframe = pd.read_pickle(ont_rdb_path + '/ontology_dataframes/' + selected_module_name + '_dataframe.pkl')

selected_module = importlib.import_module(selected_module_name)
for name in dir(selected_module):
    if not name.startswith('_'):  # Skip internal names
        globals()[name] = getattr(selected_module, name)


hic_January_24_2024_ontology dataframe is constructed. 


## Explore your ontology and create your database.
Construct and save informants and informant dataframes to organize  objects in the context of your ontology. 

In [4]:
ontology_dataframe

Unnamed: 0,informant_subclass_name,informant_subclass,direct_parent_indices,direct_child_indices,is_sink,source_depth,sink_depth,to_nearest_sink
0,Informant,<class 'informant_class.Informant'>,[],"[1, 3, 4, 5, 6, 8, 9, 10, 11, 12, 36]",0,0,1,"[3, 4, 11]"
1,Directory_Informant,<class 'informant_class.Directory_Informant'>,[0],"[2, 7, 22]",0,1,1,[22]
2,File_Informant,<class 'informant_class.File_Informant'>,[1],[23],0,2,3,[23]
3,Informant_Dataframe,<class 'informant_class.Informant_Dataframe'>,[0],[],1,1,0,[]
4,DataBase,<class 'ontologies.hic_January_24_2024_ontolog...,[0],[],1,1,0,[]
5,Algorithm,<class 'ontologies.hic_January_24_2024_ontolog...,[0],[17],0,1,4,[17]
6,Bio_Source,<class 'ontologies.hic_January_24_2024_ontolog...,[0],"[19, 20]",0,1,1,"[19, 20]"
7,File_Set,<class 'ontologies.hic_January_24_2024_ontolog...,[1],[24],0,2,4,[24]
8,Institution,<class 'ontologies.hic_January_24_2024_ontolog...,[0],"[13, 14]",0,1,1,"[13, 14]"
9,Article,<class 'ontologies.hic_January_24_2024_ontolog...,[0],[16],0,1,1,[16]


In [5]:
# Initialize an empty informant dataframe object
my_informant_dataframe = informant_class.Informant_Dataframe()


In [6]:

# Initialize a default BedPe_File informant
bedpe_inf = BedPe_File()
                           
# Use the object's dictionary to see default, characteristic attributes/fields for this class of informant as defined in the ontology.
print(bedpe_inf.__dict__)

# Populate the fields for this informant by updating its dictionary.
bedpe_inf.__dict__.update({'name':"ENCFF661SAZ.bedpe",
'description': 'Basic loops file from ENCODE.',
'species': 'homo_sapiens',
'location': "/home/cfrankston/Projects/Auxiliaries/bedpe_tools/bedpe_data/ENCFF661SAZ.bedpe",
'genome_assembly_name': "GRCh38",
'gz':False})

# Observe that the fields have been populated
bedpe_inf.__dict__

# Append this informant to the empty informant dataframe
my_informant_dataframe.append([bedpe_inf])

# Observe that the informant dataframe now contains the informant
my_informant_dataframe.df

# Test filtering capabilities of the informant dataframe.
my_informant_dataframe.filter("(@genome_assembly_name == 'GRCh38') & (@gz == False)")

{'name': None, 'description': None, 'tags': [], 'reference_informant_names': [], 'informant_class': 'BedPe_File', 'reference_informant_name_redundancy_values': {}, 'source_depth': 5, 'species': None, 'location': None, 'external_locations': None, 'file_type': '.bedpe', 'genome_assembly_name': None, 'aliases': None, 'gz': None}


Unnamed: 0,name,informant,entry_time,verification_status
0,ENCFF661SAZ.bedpe,<ontologies.hic_January_24_2024_ontology.BedPe...,06_25_2024,pending


In [7]:
my_informant_dataframe.df.iloc[0]['informant'].__dict__['location']

'/home/cfrankston/Projects/Auxiliaries/bedpe_tools/bedpe_data/ENCFF661SAZ.bedpe'

In [8]:
this_informant_df = informant_class.Informant_Dataframe()

informants_list = []
informants_list.append(HiC_File(name='EZH2_KO_Merge.hic',
                  description='Bulk HiC data from the laboratory of Ted Braun at OHSU of hematopoietic stem cells after an EZH2 CRISPR knockout, merged from three technical replicates by the HiCkory authored by PhD. student Benjamin Skubi in the Yardimci Lab.',
                  tags=['Braun_Lab', 'EZH2_Knockout', 'hematopoietic_stem_cell', 'HiCkory'],
                  species='homo_sapiens',
                  location='/home/cfrankston/Projects/hic_scope/hic_files/KO.hic',
                  genome_assembly_name='GRCh38',
                  hic_type='in_situ'))

informants_list.append(HiC_File(name='EZH2_Mock_Merge.hic',
                  description='Bulk HiC data from the laboratory of Ted Braun at OHSU of hematopoietic stem cells controlling against an EZH2 CRISPR knockout, merged from three technical replicates by the HiCkory authored by PhD. student Benjamin Skubi in the Yardimci Lab.',
                  tags=['Braun_Lab', 'EZH2_Knockout', 'hematopoietic_stem_cell', 'HiCkory'],
                  species='homo_sapiens',
                  location='/home/cfrankston/Projects/hic_scope/hic_files/Mock.hic',
                  genome_assembly_name='GRCh38',
                  hic_type='in_situ'))

#print(KO_hic.__dict__)
#print('\n')

informants_list.append(hicstraw_getMatrixZoomData_Parameters(name='mzd_setting_1',
                                                      parameters={'chr1':1,
                                                                  'chr2':1,
                                                                  'obs_type':'observed',
                                                                  'norm':'VC_SQRT',
                                                                  'resolution_units':'BP',
                                                                  'res':10000}))
#print(my_mzd_params.__dict__)
#(hicstraw_getMatrixZoomData.__dict__)

Mustache = Algorithm()

loop_bedpes_list = informant_class.create_file_informant_list_from_folder(root_folder='/home/cfrankston/Projects/hic_scope/bedpe_files', use_location=True, attribute_sequence=['name'],
                                                                              informant_class=HiC_Loops_File, reference_informant_names=['Mustache'], description='observed_VC_SQRT_5000bp_diff_fdr2_0.05_mustache_fdr1_0.2_results_folder_February_21_2024',
                                                                              genome_assembly_name='GRCh38', gz=False, species='homo_sapiens')

informants_list += (loop_bedpes_list)

this_informant_df.append(informants_list)

this_informant_df.append([HiC_Loops_File(name='KO_VSMock.diffloop1.consensus.bedpe', description='Preliminary consensus loops produced between two Mustache loop calls at different normalizations and fdr rates and an arbitrary consensus score threshold at 10kbp resolution.', reference_informant_names=['Mustache'], tags=['consensus_features', 'EZH2_KO', 'hematopoietic_stem_cells'], genome_assembly_name='GRCh38', gz=False, species='homo_sapiens', location = '/home/cfrankston/Projects/consensus_features/consensus_features/results/KO_VS_Mock.diffloop1.consensus.bedpe')])

print(this_informant_df.df.loc[6]['informant'].__dict__)
this_informant_df.df

{'name': 'KO_VS_Mock.diffloop2', 'description': 'observed_VC_SQRT_5000bp_diff_fdr2_0.05_mustache_fdr1_0.2_results_folder_February_21_2024', 'tags': [], 'reference_informant_names': ['Mustache'], 'informant_class': 'HiC_Loops_File', 'reference_informant_name_redundancy_values': {'Mustache': None}, 'source_depth': 6, 'species': 'homo_sapiens', 'location': '/home/cfrankston/Projects/hic_scope/bedpe_files/KO_VS_Mock.diffloop2', 'external_locations': None, 'file_type': '.bedpe', 'genome_assembly_name': 'GRCh38', 'aliases': None, 'hic_file': None, 'feature_type': 'HiC_Loop', 'gz': False}


Unnamed: 0,name,informant,entry_time,verification_status
0,EZH2_KO_Merge.hic,<ontologies.hic_January_24_2024_ontology.HiC_F...,06_25_2024,pending
1,EZH2_Mock_Merge.hic,<ontologies.hic_January_24_2024_ontology.HiC_F...,06_25_2024,pending
2,mzd_setting_1,<ontologies.hic_January_24_2024_ontology.hicst...,06_25_2024,pending
3,KO_VS_Mock.loop2,<ontologies.hic_January_24_2024_ontology.HiC_L...,06_25_2024,pending
4,KO_VS_Mock.diffloop1,<ontologies.hic_January_24_2024_ontology.HiC_L...,06_25_2024,pending
5,KO_VS_Mock.loop1,<ontologies.hic_January_24_2024_ontology.HiC_L...,06_25_2024,pending
6,KO_VS_Mock.diffloop2,<ontologies.hic_January_24_2024_ontology.HiC_L...,06_25_2024,pending
7,KO_VSMock.diffloop1.consensus.bedpe,<ontologies.hic_January_24_2024_ontology.HiC_L...,06_25_2024,pending


In [9]:
this_informant_df.df['verification_status'] = True
this_informant_df.filter('isinstance(@informant, HiC_Loops_File)', additional_context={'HiC_Loops_File':HiC_Loops_File})

this_informant_df.save_df(df_pkl_path='/home/cfrankston/Projects/hic_scope/informant_dataframes/hic_scope_test_1.pkl')

Error evaluating expression: isinstance expected 2 arguments, got 1
Error evaluating expression: isinstance expected 2 arguments, got 1
Error evaluating expression: isinstance expected 2 arguments, got 1
Error evaluating expression: isinstance expected 2 arguments, got 1
Error evaluating expression: isinstance expected 2 arguments, got 1
Error evaluating expression: isinstance expected 2 arguments, got 1
Error evaluating expression: isinstance expected 2 arguments, got 1
Error evaluating expression: isinstance expected 2 arguments, got 1


In [12]:
consensus_trial_informant_df = informant_class.Informant_Dataframe()

putative_loops_inf_df = informant_class.create_file_informant_list_from_folder(root_folder = "/home/cfrankston/Projects/CEDAR_Projects/2024-02-21_EZH2-knockout-hic/data/data_March_13_2024",
use_location=True, attribute_sequence=['description','name'],informant_class=HiC_Loops_File, reference_informant_names=['Mustache'],
                                                                              genome_assembly_name='GRCh38', gz=False, species='homo_sapiens')

consensus_trial_informant_df.append(putative_loops_inf_df)
consensus_trial_informant_df.df

consensus_trial_informant_df.save_df(df_pkl_path='/home/cfrankston/Projects/consensus_features/consensus_features/informant_dataframes/loop_rep_infs_df')


In [11]:
consensus_trial_informant_df.df['informant'][0].__dict__

{'name': 'rep1_KO_VS_Mock.diffloop2',
 'description': 'observed_KR_10000bp_diff_fdr2_0.05_mustache_fdr1_0.15_results_folder_March_7_2024',
 'tags': [],
 'reference_informant_names': ['Mustache'],
 'informant_class': 'HiC_Loops_File',
 'reference_informant_name_redundancy_values': {'Mustache': None},
 'source_depth': 6,
 'species': 'homo_sapiens',
 'location': '/home/cfrankston/Projects/CEDAR_Projects/2024-02-21_EZH2-knockout-hic/data/observed_KR_10000bp_diff_fdr2_0.05_mustache_fdr1_0.15_results_folder_March_7_2024/KO_VS_Mock/rep1_KO_VS_Mock.diffloop2',
 'external_locations': None,
 'file_type': '.bedpe',
 'genome_assembly_name': 'GRCh38',
 'aliases': None,
 'hic_file': None,
 'feature_type': 'HiC_Loop',
 'gz': False}

## Launch Project using Imported Ontology and Desired Informant Dataframe

In [14]:
import pandas as pd
import subprocess
import snakemake

project_name = "ont_rdb_toy_project"
informant_class_path = "/home/cfrankston/Projects/ont_rdb/ont_rdb/informant_class.py"
ontology_script_path = "/home/cfrankston/Projects/ont_rdb/ont_rdb/ontologies/hic_January_24_2024_ontology.py"
informant_dataframe_path = "/home/cfrankston/Projects/consensus_features/consensus_features/informant_dataframes/loop_rep_infs_df"
base_directory = "/home/cfrankston/Projects/"

command = f"python  launch_project.py {project_name} {informant_class_path} {ontology_script_path} {informant_dataframe_path} {base_directory}"
result = subprocess.run(command, capture_output=True, text=True, shell=True)
# Check if the command was successful
if result.returncode == 0:
    # Print the standard output of the command
    print(f"Project named {project_name} is constructed.", result.stdout)
else:
    # Print the standard error if the command failed
    print("Command failed with error:", result.stderr)

Project named ont_rdb_toy_project is constructed. 
