In [None]:
import os
import sys
import yaml
import tools.objects as objs

with open("/home/jovyan/work/input_data/config/project_config.yml", 'r') as file:
    config = yaml.safe_load(file)

In [None]:
# Initialize project
project = objs.Project(config)

In [None]:
# Create datasets (which hold raw data and metadata)
tx_dataset = objs.TX(project, overwrite=True)
mx_dataset = objs.MX(project, overwrite=True)

In [None]:
# Create analysis (collection of datasets and methods for performing integration)
analysis = objs.Analysis(project, datasets=[tx_dataset, mx_dataset], overwrite=True)

In [None]:
# Get metadata and data from disk, cache, or generate from source
tx_dataset.get_raw_metadata(overwrite=False)
tx_dataset.get_raw_data(overwrite=False)
mx_dataset.get_raw_metadata(overwrite=False)
mx_dataset.get_raw_data(overwrite=False)

In [None]:
# Link analysis datasets by finding corresponding samples and metadata
analysis.link_metadata(overwrite=False)
analysis.link_data(overwrite=False)

In [None]:
# Filter out rare features from analysis datasets based on minimum observed value or proportion of missing values across samples
analysis.filter_all_datasets(overwrite=False)

In [None]:
# Filter out features from analysis datasets that were not impacted by experimentation based on low variance across samples
analysis.devariance_all_datasets(overwrite=False)

In [None]:
# Scale features in all analysis datasets to a shared, normalized distribution
analysis.scale_all_datasets(overwrite=False)

In [None]:
# Filter out features from analysis datasets based on low within-replicate reproducibility
analysis.replicability_test_all_datasets(overwrite=False)

In [None]:
# Check data distributions after all normalization steps
analysis.plot_dataset_distributions(show_plot=True)

In [None]:
# Check dimension reduction plots after data normalization steps
analysis.plot_pca_all_datasets(overwrite=False)

In [None]:
# Perform dataset integration of linked metadata and data
analysis.integrate_metadata(overwrite=False)
analysis.integrate_data(overwrite=False)

In [None]:
# Subset features using statistical tests
analysis.perform_feature_selection(overwrite=False)

In [None]:
# Calculate correlations with custom parameters
analysis.calculate_correlated_features(overwrite=True)

In [None]:
# Plot correlation network
analysis.plot_correlation_network(overwrite=True)

In [None]:
# Run multi-omics factor analysis
analysis.run_mofa2_analysis(overwrite=False)