In [None]:
import sys
import os
import subprocess
from pathlib import Path

# Add project root to path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
sys.path.append(project_root)


# CTCF

In [None]:
OUTPUT_DIR = ""

# Genomic region
CHROMOSOME = "chr10"
START = 21000000
END = 23000000

# Base paths
EXPERIMENT_BASE = ""
DEEPCHIA_BASE = ""
DATA_BASE = ""
REFERENCE_GENOME = ""

# Current experiment configuration - MODIFY THIS FOR EACH RUN
CURRENT_EXPERIMENTS = {
    ('GM12878', 'CTCF', 10000): {
        'experiment_dir': '250710_GM12878_10000_NONE_ChIAPETMatrixPredictor_efeaturesq_high_CTCF_ChIA-PET_log1p_true_hictype_hic_chiatype_hic',
        'deepchia_dir': '250211_10000_NONE',
        'experiment_version': 1,
        'deepchia_version': 0,
        'loss_y_limit': 0.25,
        'hic_file': 'ENCFF379AWZ.hic',
        'bigwig_files': {
            'CTCF': 'GM12878_ctcf.bw'
        }
    },
    ('GM12878', 'CTCF', 5000): {
        'experiment_dir': '250711_GM12878_5000_NONE_ChIAPETMatrixPredictor_efeaturesq_high_CTCF_ChIA-PET_log1p_true_hictype_hic_chiatype_hic',
        'deepchia_dir': '250330_5000_NONE',
        'experiment_version': 0,
        'deepchia_version': 9,
        'loss_y_limit': 0.125,
        'hic_file': 'ENCFF379AWZ.hic',
        'bigwig_files': {
            'CTCF': 'GM12878_ctcf.bw'
        }
    }
    # Add more experiments here as needed
}

# Which experiments to run (set to None to run all in CURRENT_EXPERIMENTS)
TARGET_CELL_LINES = ['GM12878']  # or None for all
TARGET_PROTEINS = ['CTCF']       # or None for all  
TARGET_RESOLUTIONS = [10000, 5000]  # or None for all
TARGET_PLOT_TYPES = ['all']  

In [None]:
from gchia.Metrics.lossplot import plot_val_loss
from gchia.Metrics.SCCplot import plot_scc
from gchia.Metrics.visualize import load_and_visualize_region

def setup_output_dir(cell_line, protein, resolution):
    """Create output directory for specific cell line, protein and resolution."""
    output_path = Path(OUTPUT_DIR) / cell_line / protein / f"{resolution//1000}kb"
    output_path.mkdir(parents=True, exist_ok=True)
    return str(output_path)

def generate_visualization(cell_line, protein, resolution):
    """Generate visualization panel."""
    if (cell_line, protein, resolution) not in CURRENT_EXPERIMENTS:
        print(f"‚ùå No configuration found for {cell_line} {protein} {resolution//1000}kb")
        return
        
    config = CURRENT_EXPERIMENTS[(cell_line, protein, resolution)]
    output_dir = setup_output_dir(cell_line, protein, resolution)
    
    print(f"üé® Generating visualization for {cell_line} {protein} {resolution//1000}kb...")
    
    # HiC configuration
    hic_config = {
        f'{protein} ChIA-PET {resolution//1000}kb': {
            'path': f'{DATA_BASE}/{cell_line}/{protein}_ChIA-PET/{config["hic_file"]}',
            'resolution': resolution
        }
    }
    
    # BigWig configuration - now supports multiple files
    bigwig_config = {}
    for track_name, filename in config["bigwig_files"].items():
        bigwig_config[track_name] = f'{DATA_BASE}/{cell_line}/bigWig_files/{filename}'
    
    # Prediction configuration
    pred_config = {
        'label': f'Predicted {resolution//1000}kb',
        'path': f'{EXPERIMENT_BASE}/{config["experiment_dir"]}/{cell_line}/predictions/pred_matrix_{CHROMOSOME}_{START}_{END}.npy',
        'log1p': True,
        'insert_at_start': True
    }
    
    output_file = f'{output_dir}/visualization.png'
    
    try:
        load_and_visualize_region(
            chrom=CHROMOSOME,
            start=START,
            end=END,
            hic_files_config=hic_config,
            bigwig_files_config=bigwig_config,
            pred_file_config=pred_config,
            output=output_file,
            fig_width=4.5,
            colorbar=False,
            interval_between_hic=0,
            interval_after_hic_block=0.05,
            interval_between_epi=0.05
        )
        print(f"‚úÖ Generated visualization: {output_file}")
    except Exception as e:
        print(f"‚ùå Failed to generate visualization: {e}")

def generate_loss_plot(cell_line, protein, resolution):
    """Generate loss plot panel."""
    if (cell_line, protein, resolution) not in CURRENT_EXPERIMENTS:
        print(f"‚ùå No configuration found for {cell_line} {protein} {resolution//1000}kb")
        return
        
    config = CURRENT_EXPERIMENTS[(cell_line, protein, resolution)]
    output_dir = setup_output_dir(cell_line, protein, resolution)
    
    print(f"üìà Generating loss plot for {cell_line} {protein} {resolution//1000}kb...")
    
    file_dict = {
        f'{EXPERIMENT_BASE}/{config["experiment_dir"]}/logs/lightning_logs/version_{config["experiment_version"]}/metrics.csv': 'GraphChIAr',
        f'{DEEPCHIA_BASE}/{config["deepchia_dir"]}/checkpoints/logs/version_{config["deepchia_version"]}/metrics.csv': 'DeepChIA-PET'
    }
    
    output_file = f'{output_dir}/loss.svg'
    
    try:
        plot_val_loss(
            file_dict,
            y_limit=config['loss_y_limit'],
            smoothing_window=15,
            smoothing_polyorder=4,
            max_epoch=35,
            output_path=output_file
        )
        print(f"‚úÖ Generated loss plot: {output_file}")
    except Exception as e:
        print(f"‚ùå Failed to generate loss plot: {e}")

def generate_scc_plot(cell_line, protein, resolution):
    """Generate SCC plot panel."""
    if (cell_line, protein, resolution) not in CURRENT_EXPERIMENTS:
        print(f"‚ùå No configuration found for {cell_line} {protein} {resolution//1000}kb")
        return
        
    config = CURRENT_EXPERIMENTS[(cell_line, protein, resolution)]
    output_dir = setup_output_dir(cell_line, protein, resolution)
    
    print(f"üìä Generating SCC plot for {cell_line} {protein} {resolution//1000}kb...")
    
    file_paths = {
        f'{EXPERIMENT_BASE}/{config["experiment_dir"]}/{cell_line}/metrics.npy': 'GraphChIAr',
        f'{DEEPCHIA_BASE}/{config["deepchia_dir"]}/{cell_line}/metrics/scc_metrics.npy': 'DeepChIA-PET'
    }
    
    output_file = f'{output_dir}/scc.svg'
    
    try:
        plot_scc(
            file_paths=file_paths,
            resolution=resolution,
            output_path=output_file
        )
        print(f"‚úÖ Generated SCC plot: {output_file}")
    except Exception as e:
        print(f"‚ùå Failed to generate SCC plot: {e}")



def run_experiments():
    """Run experiments based on TARGET settings."""
    print("üöÄ Starting experiment execution...")
    print("=" * 60)
    
    # Determine which experiments to run
    experiments_to_run = []
    
    for (cell_line, protein, resolution) in CURRENT_EXPERIMENTS.keys():
        # Filter by targets
        if TARGET_CELL_LINES and cell_line not in TARGET_CELL_LINES:
            continue
        if TARGET_PROTEINS and protein not in TARGET_PROTEINS:
            continue
        if TARGET_RESOLUTIONS and resolution not in TARGET_RESOLUTIONS:
            continue
            
        experiments_to_run.append((cell_line, protein, resolution))
    
    if not experiments_to_run:
        print("‚ùå No experiments match the target criteria!")
        return
    
    print(f"üìã Will run {len(experiments_to_run)} experiments:")
    for cell_line, protein, resolution in experiments_to_run:
        print(f"   - {cell_line} {protein} {resolution//1000}kb")
    print()
    
    # Run experiments
    for cell_line, protein, resolution in experiments_to_run:
        print(f"üî¨ Processing {cell_line} {protein} {resolution//1000}kb")
        print("-" * 40)
        
        if 'all' in TARGET_PLOT_TYPES or 'visualization' in TARGET_PLOT_TYPES:
            generate_visualization(cell_line, protein, resolution)
        
        if 'all' in TARGET_PLOT_TYPES or 'loss' in TARGET_PLOT_TYPES:
            generate_loss_plot(cell_line, protein, resolution)
        
        if 'all' in TARGET_PLOT_TYPES or 'scc' in TARGET_PLOT_TYPES:
            generate_scc_plot(cell_line, protein, resolution)
        
        print()
    
    print("üéâ All experiments completed!")
    print("=" * 60)



In [None]:

run_experiments()

# RAD21

In [None]:
# OUTPUT_DIR = "../../gchia/Figure"

# # Genomic region
# CHROMOSOME = "chr10"
# START = 21000000
# END = 23000000

# # Base paths
# EXPERIMENT_BASE = "../../results"
# DEEPCHIA_BASE = "~/PaperCode/DeepChIA-PET/results"
# DATA_BASE = "../../data"
# REFERENCE_GENOME = "../../ReferenceGenome/hg38/hg38.chrom.sizes"

# # Current experiment configuration - MODIFY THIS FOR EACH RUN
# CURRENT_EXPERIMENTS = {
#     ('GM12878', 'RAD21', 10000): {
#         'experiment_dir': '250901_GM12878_10000_NONE_ChIAPETMatrixPredictor_efeaturesq_high_RAD21_ChIA-PET_log1p_true_hictype_hic_chiatype_hic',
#         'deepchia_dir': '250408_GM12878_RAD21_to_HCT116_10000_NONE',
#         'experiment_version': 0,
#         'deepchia_version': 0,
#         'loss_y_limit': 0.14,
#         'hic_file': 'ENCFF416RNE.hic',
#         'bigwig_files': {
#             'RAD21': 'GM12878_RAD21.bw'
#         }
#     },
#     ('GM12878', 'RAD21', 5000): {
#         'experiment_dir': '250714_GM12878_5000_NONE_ChIAPETMatrixPredictor_efeaturesq_high_RAD21_ChIA-PET_log1p_true_hictype_hic_chiatype_hic',
#         'deepchia_dir': '250826_GM12878_RAD21_to_HCT116_5000_NONE',
#         'experiment_version': 1,
#         'deepchia_version': 0,
#         'loss_y_limit': 0.06,
#         'hic_file': 'ENCFF416RNE.hic',
#         'bigwig_files': {
#             'RAD21': 'GM12878_RAD21.bw'
#         }
#     }
#     # Add more experiments here as needed
# }

# # Which experiments to run (set to None to run all in CURRENT_EXPERIMENTS)
# TARGET_CELL_LINES = ['GM12878']  # or None for all
# TARGET_PROTEINS = ['RAD21']       # or None for all  
# TARGET_RESOLUTIONS = [10000, 5000]  # or None for all
# TARGET_PLOT_TYPES = ['all']



In [None]:
OUTPUT_DIR = "../../gchia/Figure"

# Genomic region
CHROMOSOME = "chr10"
START = 21000000
END = 23000000

# Base paths
EXPERIMENT_BASE = ""
DEEPCHIA_BASE = ""
DATA_BASE = ""
REFERENCE_GENOME = ""

# Current experiment configuration - MODIFY THIS FOR EACH RUN
CURRENT_EXPERIMENTS = {
    ('GM12878', 'RAD21', 10000): {
        'experiment_dir': '250904_GM12878_10000_NONE_ChIAPETMatrixPredictor_efeature_noseq_RAD21_ChIA-PET_log1p_true_hictype_hic_chiatype_hic',
        'deepchia_dir': '250408_GM12878_RAD21_to_HCT116_10000_NONE',
        'experiment_version': 1,
        'deepchia_version': 0,
        'loss_y_limit': 0.14,
        'hic_file': 'ENCFF416RNE.hic',
        'bigwig_files': {
            'RAD21': 'GM12878_RAD21.bw'
        }
    },
    ('GM12878', 'RAD21', 5000): {
        'experiment_dir': '250905_GM12878_5000_NONE_ChIAPETMatrixPredictor_efeature_noseq_RAD21_ChIA-PET_log1p_true_hictype_hic_chiatype_hic',
        'deepchia_dir': '250826_GM12878_RAD21_to_HCT116_5000_NONE',
        'experiment_version': 0,
        'deepchia_version': 0,
        'loss_y_limit': 0.06,
        'hic_file': 'ENCFF416RNE.hic',
        'bigwig_files': {
            'RAD21': 'GM12878_RAD21.bw'
        }
    }
    # Add more experiments here as needed
}

# Which experiments to run (set to None to run all in CURRENT_EXPERIMENTS)
TARGET_CELL_LINES = ['GM12878']  # or None for all
TARGET_PROTEINS = ['RAD21']       # or None for all  
TARGET_RESOLUTIONS = [10000, 5000]  # or None for all
TARGET_PLOT_TYPES = ['all']



In [None]:
run_experiments()

# POLR2A

In [None]:
# OUTPUT_DIR = "../../gchia/Figure"

# # Genomic region
# CHROMOSOME = "chr10"
# START = 21000000
# END = 23000000

# # Base paths
# EXPERIMENT_BASE = "../../results"
# DEEPCHIA_BASE = "~/PaperCode/DeepChIA-PET/results"
# DATA_BASE = "../../data"
# REFERENCE_GENOME = "../../ReferenceGenome/hg38/hg38.chrom.sizes"

# # Current experiment configuration - MODIFY THIS FOR EACH RUN
# CURRENT_EXPERIMENTS = {
#     ('GM12878', 'POLR2A', 10000): {
#         'experiment_dir': '250805_GM12878_10000_NONE_ChIAPETMatrixPredictor_efeaturesq_high_POLR2A_ChIA-PET_log1p_true_hictype_hic_chiatype_hic',
#         'deepchia_dir': '250828_GM12878_POLR2A_to_HCT116_10000_NONE',
#         'experiment_version': 0,
#         'deepchia_version': 1,
#         'loss_y_limit': 0.06,
#         'hic_file': 'ENCFF791SUY.hic',
#         'bigwig_files': {
#             'POLR2A': 'GM12878_POLR2A.bw'
#         }
#     },
#     ('GM12878', 'POLR2A', 5000): {
#         'experiment_dir': '250806_GM12878_5000_NONE_ChIAPETMatrixPredictor_efeaturesq_high_POLR2A_ChIA-PET_log1p_true_hictype_hic_chiatype_hic',
#         'deepchia_dir': '250902_GM12878_POLR2A_to_HCT116_5000_NONE',
#         'experiment_version': 0,
#         'deepchia_version': 2,
#         'loss_y_limit': 0.025,
#         'hic_file': 'ENCFF791SUY.hic',
#         'bigwig_files': {
#             'POLR2A': 'GM12878_POLR2A.bw'
#         }
#     }
#     # Add more experiments here as needed
# }

# # Which experiments to run (set to None to run all in CURRENT_EXPERIMENTS)
# TARGET_CELL_LINES = ['GM12878']  # or None for all
# TARGET_PROTEINS = ['POLR2A']       # or None for all  
# TARGET_RESOLUTIONS = [10000, 5000]  # or None for all
# TARGET_PLOT_TYPES = ['all']



In [None]:
OUTPUT_DIR = "../../gchia/Figure"

# Genomic region
CHROMOSOME = "chr10"
START = 21000000
END = 23000000

# Base paths
EXPERIMENT_BASE = "../../results"
DEEPCHIA_BASE = "~/PaperCode/DeepChIA-PET/results"
DATA_BASE = "../../data"
REFERENCE_GENOME = "../../ReferenceGenome/hg38/hg38.chrom.sizes"

# Current experiment configuration - MODIFY THIS FOR EACH RUN
CURRENT_EXPERIMENTS = {
    ('GM12878', 'POLR2A', 10000): {
        'experiment_dir': '250904_GM12878_10000_NONE_ChIAPETMatrixPredictor_efeature_noseq_POLR2A_ChIA-PET_log1p_true_hictype_hic_chiatype_hic',
        'deepchia_dir': '250828_GM12878_POLR2A_to_HCT116_10000_NONE',
        'experiment_version': 4,
        'deepchia_version': 1,
        'loss_y_limit': 0.06,
        'hic_file': 'ENCFF791SUY.hic',
        'bigwig_files': {
            'POLR2A': 'GM12878_POLR2A.bw'
        }
    },
    ('GM12878', 'POLR2A', 5000): {
        'experiment_dir': '250905_GM12878_5000_NONE_ChIAPETMatrixPredictor_efeature_noseq_POLR2A_ChIA-PET_log1p_true_hictype_hic_chiatype_hic',
        'deepchia_dir': '250902_GM12878_POLR2A_to_HCT116_5000_NONE',
        'experiment_version': 0,
        'deepchia_version': 2,
        'loss_y_limit': 0.025,
        'hic_file': 'ENCFF791SUY.hic',
        'bigwig_files': {
            'POLR2A': 'GM12878_POLR2A.bw'
        }
    }
    # Add more experiments here as needed
}

# Which experiments to run (set to None to run all in CURRENT_EXPERIMENTS)
TARGET_CELL_LINES = ['GM12878']  # or None for all
TARGET_PROTEINS = ['POLR2A']       # or None for all  
TARGET_RESOLUTIONS = [10000, 5000]  # or None for all
TARGET_PLOT_TYPES = ['all']



In [None]:
run_experiments()

# ablation study

In [None]:
OUTPUT_DIR = "../../gchia/Figure"
EXPERIMENT_BASE = "../../results"
#DEEPCHIA_BASE = "~/PaperCode/DeepChIA-PET/results"
DATA_BASE = "../../data"
REFERENCE_GENOME = "../../ReferenceGenome/hg38/hg38.chrom.sizes"
OUTPUT_DIR = "../../gchia/Figure"

# Genomic region
CHROMOSOME = "chr10"
START = 21000000
END = 23000000
# Current experiment configuration - MODIFY THIS FOR EACH RUN
CURRENT_EXPERIMENTS = {
    ('GM12878', 'CTCF', 10000): {
        'experiment_dir': '250710_GM12878_10000_NONE_ChIAPETMatrixPredictor_efeaturesq_high_CTCF_ChIA-PET_log1p_true_hictype_hic_chiatype_hic',
        # 'deepchia_dir': '250408_GM12878_RAD21_to_HCT116_10000_NONE',
        'ablation_dir':"250824_GM12878_10000_NONE_ChIAPETMatrixPredictor_efeature_noseq_CTCF_ChIA-PET_log1p_true_hictype_hic_chiatype_hic",
        'experiment_version': 1,
        'ablation_version': 0,
        'ablation_name': "Ablated Model",
        'loss_y_limit': 0.25,
        'hic_file': 'ENCFF379AWZ.hic',
        'bigwig_files': {
            'CTCF': 'GM12878_ctcf.bw'
        }
    },
    ('GM12878', 'CTCF', 5000): {
        'experiment_dir': '250711_GM12878_5000_NONE_ChIAPETMatrixPredictor_efeaturesq_high_CTCF_ChIA-PET_log1p_true_hictype_hic_chiatype_hic',
        # 'deepchia_dir': '250408_GM12878_RAD21_to_HCT116_5000_NONE',
        'ablation_dir':"250825_GM12878_5000_NONE_ChIAPETMatrixPredictor_efeature_noseq_CTCF_ChIA-PET_log1p_true_hictype_hic_chiatype_hic",
        'experiment_version': 0,
        'ablation_version': 0,
        'loss_y_limit': 0.12,
        'ablation_name': "Ablated Model",
        'hic_file': 'ENCFF379AWZ.hic',
        'bigwig_files': {
            'CTCF': 'GM12878_ctcf.bw'
        }
    }
    # Add more experiments here as needed
}

# Which experiments to run (set to None to run all in CURRENT_EXPERIMENTS)
TARGET_CELL_LINES = ['GM12878']  # or None for all
TARGET_PROTEINS = ['CTCF']       # or None for all  
TARGET_RESOLUTIONS = [10000, 5000]  # or None for all
TARGET_PLOT_TYPES = ['all']


In [None]:
from gchia.Metrics.lossplot import plot_val_loss
from gchia.Metrics.SCCplot import plot_scc
from gchia.Metrics.visualize import load_and_visualize_region

def setup_output_dir(cell_line, protein, resolution):
    """Create output directory for specific cell line, protein and resolution."""
    output_path = Path(OUTPUT_DIR) / cell_line / protein / f"{resolution//1000}kb" / "ablation"
    output_path.mkdir(parents=True, exist_ok=True)
    return str(output_path)


def generate_loss_plot(cell_line, protein, resolution):
    """Generate loss plot panel."""
    if (cell_line, protein, resolution) not in CURRENT_EXPERIMENTS:
        print(f"‚ùå No configuration found for {cell_line} {protein} {resolution//1000}kb")
        return
        
    config = CURRENT_EXPERIMENTS[(cell_line, protein, resolution)]
    output_dir = setup_output_dir(cell_line, protein, resolution)
    
    print(f"üìà Generating loss plot for {cell_line} {protein} {resolution//1000}kb...")
    
    file_dict = {
        f'{EXPERIMENT_BASE}/{config["experiment_dir"]}/logs/lightning_logs/version_{config["experiment_version"]}/metrics.csv': 'Full Model',
        f'{EXPERIMENT_BASE}/{config["ablation_dir"]}/logs/lightning_logs/version_{config["ablation_version"]}/metrics.csv': f'{config["ablation_name"]}'
    }
    
    output_file = f'{output_dir}/loss.svg'
    
    try:
        
        plot_val_loss(
            file_dict,
            y_limit=config['loss_y_limit'],
            smoothing_window=15,
            smoothing_polyorder=4,
            max_epoch=35,
            output_path=output_file
        )
        print(f"‚úÖ Generated loss plot: {output_file}")
    except Exception as e:
        print(f"‚ùå Failed to generate loss plot: {e}")

def generate_scc_plot(cell_line, protein, resolution):
    """Generate SCC plot panel."""
    if (cell_line, protein, resolution) not in CURRENT_EXPERIMENTS:
        print(f"‚ùå No configuration found for {cell_line} {protein} {resolution//1000}kb")
        return
        
    config = CURRENT_EXPERIMENTS[(cell_line, protein, resolution)]
    output_dir = setup_output_dir(cell_line, protein, resolution)
    
    print(f"üìä Generating SCC plot for {cell_line} {protein} {resolution//1000}kb...")
    
    file_paths = {
        f'{EXPERIMENT_BASE}/{config["experiment_dir"]}/{cell_line}/metrics.npy': 'Full Model',
        f'{EXPERIMENT_BASE}/{config["ablation_dir"]}/{cell_line}/metrics.npy': f'{config["ablation_name"]}'
   }
    
    output_file = f'{output_dir}/scc.svg'
    
    try:
        plot_scc(
            file_paths=file_paths,
            resolution=resolution,
            output_path=output_file
        )
        print(f"‚úÖ Generated SCC plot: {output_file}")
    except Exception as e:
        print(f"‚ùå Failed to generate SCC plot: {e}")



def run_experiments():
    """Run experiments based on TARGET settings."""
    print("üöÄ Starting experiment execution...")
    print("=" * 60)
    
    # Determine which experiments to run
    experiments_to_run = []
    
    for (cell_line, protein, resolution) in CURRENT_EXPERIMENTS.keys():
        # Filter by targets
        if TARGET_CELL_LINES and cell_line not in TARGET_CELL_LINES:
            continue
        if TARGET_PROTEINS and protein not in TARGET_PROTEINS:
            continue
        if TARGET_RESOLUTIONS and resolution not in TARGET_RESOLUTIONS:
            continue
            
        experiments_to_run.append((cell_line, protein, resolution))
    
    if not experiments_to_run:
        print("‚ùå No experiments match the target criteria!")
        return
    
    print(f"üìã Will run {len(experiments_to_run)} experiments:")
    for cell_line, protein, resolution in experiments_to_run:
        print(f"   - {cell_line} {protein} {resolution//1000}kb")
    print()
    
    # Run experiments
    for cell_line, protein, resolution in experiments_to_run:
        print(f"üî¨ Processing {cell_line} {protein} {resolution//1000}kb")
        print("-" * 40)
        
        
        if 'all' in TARGET_PLOT_TYPES or 'loss' in TARGET_PLOT_TYPES:
            generate_loss_plot(cell_line, protein, resolution)
        
        if 'all' in TARGET_PLOT_TYPES or 'scc' in TARGET_PLOT_TYPES:
            generate_scc_plot(cell_line, protein, resolution)
        
    
        print()
    
    print("üéâ All experiments completed!")
    print("=" * 60)



In [None]:
run_experiments()

In [None]:
OUTPUT_DIR = "../../gchia/Figure"
EXPERIMENT_BASE = "../../results"
#DEEPCHIA_BASE = "~/PaperCode/DeepChIA-PET/results"
DATA_BASE = "../../data"
REFERENCE_GENOME = "../../ReferenceGenome/hg38/hg38.chrom.sizes"
OUTPUT_DIR = "../../gchia/Figure"

# Genomic region
CHROMOSOME = "chr10"
START = 21000000
END = 23000000
# Current experiment configuration - MODIFY THIS FOR EACH RUN
CURRENT_EXPERIMENTS = {
    ('GM12878', 'RAD21', 10000): {
        'experiment_dir': '250904_GM12878_10000_NONE_ChIAPETMatrixPredictor_efeature_noseq_RAD21_ChIA-PET_log1p_true_hictype_hic_chiatype_hic',
        # 'deepchia_dir': '250408_GM12878_RAD21_to_HCT116_10000_NONE',
        'ablation_dir':"250830_GM12878_10000_NONE_ChIAPETMatrixPredictor_efeature_noseq_RAD21_ChIA-PET_log1p_true_hictype_hic_chiatype_hic",
        'experiment_version': 1,
        'ablation_version': 0,
        'ablation_name': "Ablated Model",
        'loss_y_limit': 0.14,
        'hic_file': 'ENCFF416RNE.hic',
        'bigwig_files': {
            'RAD21': 'GM12878_RAD21.bw'
        }
    },
    ('GM12878', 'RAD21', 5000): {
        'experiment_dir': '250905_GM12878_5000_NONE_ChIAPETMatrixPredictor_efeature_noseq_RAD21_ChIA-PET_log1p_true_hictype_hic_chiatype_hic',
        # 'deepchia_dir': '250408_GM12878_RAD21_to_HCT116_5000_NONE',
        'ablation_dir':"250830_GM12878_5000_NONE_ChIAPETMatrixPredictor_efeature_noseq_RAD21_ChIA-PET_log1p_true_hictype_hic_chiatype_hic",
        'experiment_version': 0,
        'ablation_version': 0,
        'loss_y_limit': 0.06,
        'ablation_name': "Ablated Model",
        'hic_file': 'ENCFF416RNE.hic',
        'bigwig_files': {
            'RAD21': 'GM12878_RAD21.bw'
        }
    },
    # Add more experiments here as needed
    ('GM12878', 'POLR2A', 10000): {
        'experiment_dir': '250904_GM12878_10000_NONE_ChIAPETMatrixPredictor_efeature_noseq_POLR2A_ChIA-PET_log1p_true_hictype_hic_chiatype_hic',
        # 'deepchia_dir': '250408_GM12878_RAD21_to_HCT116_10000_NONE',
        'ablation_dir':"250902_GM12878_10000_NONE_ChIAPETMatrixPredictor_efeature_noseq_POLR2A_ChIA-PET_log1p_true_hictype_hic_chiatype_hic",
        'experiment_version': 4,
        'ablation_version': 0,
        'ablation_name': "Ablated Model",
        'loss_y_limit': 0.06,
        'hic_file': 'ENCFF791SUY.hic',
        'bigwig_files': {
            'POLR2A': 'GM12878_POLR2A.bw'
        }
    },
    ('GM12878', 'POLR2A', 5000): {
        'experiment_dir': '250905_GM12878_5000_NONE_ChIAPETMatrixPredictor_efeature_noseq_POLR2A_ChIA-PET_log1p_true_hictype_hic_chiatype_hic',
        # 'deepchia_dir': '250408_GM12878_RAD21_to_HCT116_5000_NONE',
        'ablation_dir':"250904_GM12878_5000_NONE_ChIAPETMatrixPredictor_efeature_noseq_POLR2A_ChIA-PET_log1p_true_hictype_hic_chiatype_hic",
        'experiment_version': 0,
        'ablation_version': 0,
        'loss_y_limit': 0.025,
        'ablation_name': "Ablated Model",
        'hic_file': 'ENCFF791SUY.hic',
        'bigwig_files': {
            'POLR2A': 'GM12878_POLR2A.bw'
        }
    }
}

# Which experiments to run (set to None to run all in CURRENT_EXPERIMENTS)
TARGET_CELL_LINES = ['GM12878']  # or None for all
TARGET_PROTEINS = ['POLR2A','RAD21']       # or None for all  
TARGET_RESOLUTIONS = [10000, 5000]  # or None for all
TARGET_PLOT_TYPES = ['all']


In [None]:
run_experiments()

# Cross cell line

In [None]:
OUTPUT_DIR = "../../gchia/Figure"

# Genomic region
CHROMOSOME = "chr10"
START = 15500000
END = 17500000

# Base paths
EXPERIMENT_BASE = "../../results"
DEEPCHIA_BASE = "~/PaperCode/DeepChIA-PET/results"
DATA_BASE = "../../data"
REFERENCE_GENOME = "../../ReferenceGenome/hg38/hg38.chrom.sizes"

# Cross-cell-line experiment configuration - MODIFY THIS FOR EACH RUN
CROSS_CELL_EXPERIMENTS = {
    ('GM12878', 'IMR90', 'CTCF', 10000): {
        'experiment_dir': '250710_GM12878_10000_NONE_ChIAPETMatrixPredictor_efeaturesq_high_CTCF_ChIA-PET_log1p_true_hictype_hic_chiatype_hic',
        'source_cell': 'GM12878',  # Training cell line
        'target_cell': 'IMR90',    # Prediction target cell line
        'hic_files': {
            'GM12878': 'ENCFF379AWZ.hic',
            'IMR90': 'ENCFF934OLI.hic'
        },
        'bigwig_files': {
            'GM12878': {
                'CTCF': 'GM12878_ctcf.bw'
            },
            'IMR90': {
                'CTCF': 'IMR90_ctcf.bw'
            }
        },
        'hic_comparison_files': {
            'IMR90': '4DNFIH7TH4MF.hic'       # Âè™ÈúÄË¶ÅIMR90 Hi-CÊñá‰ª∂Áî®‰∫éÂØπÊØî
        }
    }
    # Add more cross-cell-line experiments here as needed
}

# Which experiments to run (set to None to run all in CROSS_CELL_EXPERIMENTS)
TARGET_SOURCE_CELLS = ['GM12878']   # Source cell lines (training) or None for all
TARGET_TARGET_CELLS = ['IMR90']     # Target cell lines (prediction) or None for all  
TARGET_PROTEINS = ['CTCF']          # or None for all  
TARGET_RESOLUTIONS = [10000]        # or None for all
TARGET_PLOT_TYPES = ['all']         # 'all', 'intra_visualization', 'cross_visualization', 'cross_loop_analysis'

from pathlib import Path
import subprocess
from gchia.Metrics.visualize import load_and_visualize_region

def setup_output_dir(source_cell, target_cell, protein, resolution):
    """Create output directory for cross-cell-line experiment."""
    output_path = Path(OUTPUT_DIR) / "cross_cell" / f"{source_cell}_to_{target_cell}" / protein / f"{resolution//1000}kb"
    output_path.mkdir(parents=True, exist_ok=True)
    return str(output_path)

def generate_intra_cell_visualization(source_cell, target_cell, protein, resolution):
    """Generate visualization for intra-cell-line prediction (same cell line as training)."""
    key = (source_cell, target_cell, protein, resolution)
    if key not in CROSS_CELL_EXPERIMENTS:
        print(f"‚ùå No configuration found for {source_cell}->{target_cell} {protein} {resolution//1000}kb")
        return
        
    config = CROSS_CELL_EXPERIMENTS[key]
    output_dir = setup_output_dir(source_cell, target_cell, protein, resolution)
    
    print(f"üé® Generating intra-cell visualization for {source_cell} {protein} {resolution//1000}kb...")
    
    # HiC configuration for source cell
    hic_config = {
        f'{protein} ChIA-PET {resolution//1000}kb': {
            'path': f'{DATA_BASE}/{source_cell}/{protein}_ChIA-PET/{config["hic_files"][source_cell]}',
            'resolution': resolution
        }
    }
    
    # BigWig configuration for source cell
    bigwig_config = {}
    for track_name, filename in config["bigwig_files"][source_cell].items():
        bigwig_config[track_name] = f'{DATA_BASE}/{source_cell}/bigWig_files/{filename}'
    
    # Intra-cell prediction configuration (same cell line prediction)
    pred_config = {
        'label': f'Predicted {resolution//1000}kb',
        'path': f'{EXPERIMENT_BASE}/{config["experiment_dir"]}/{source_cell}/predictions/pred_matrix_{CHROMOSOME}_{START}_{END}.npy',
        'log1p': True,
        'insert_at_start': True
    }
    
    output_file = f'{output_dir}/intra_cell_visualization.svg'
    
    try:
        load_and_visualize_region(
            chrom=CHROMOSOME,
            start=START,
            end=END,
            hic_files_config=hic_config,
            bigwig_files_config=bigwig_config,
            pred_file_config=pred_config,
            output=output_file,
            fig_width=4.5,
            colorbar=False,
            interval_between_hic=0,
            interval_after_hic_block=0.05,
            interval_between_epi=0.05
        )
        print(f"‚úÖ Generated intra-cell visualization: {output_file}")
    except Exception as e:
        print(f"‚ùå Failed to generate intra-cell visualization: {e}")

def generate_cross_cell_visualization(source_cell, target_cell, protein, resolution):
    """Generate visualization for cross-cell-line prediction."""
    key = (source_cell, target_cell, protein, resolution)
    if key not in CROSS_CELL_EXPERIMENTS:
        print(f"‚ùå No configuration found for {source_cell}->{target_cell} {protein} {resolution//1000}kb")
        return
        
    config = CROSS_CELL_EXPERIMENTS[key]
    output_dir = setup_output_dir(source_cell, target_cell, protein, resolution)
    
    print(f"üé® Generating cross-cell visualization for {source_cell}->{target_cell} {protein} {resolution//1000}kb...")
    
    # HiC configuration for target cell (ground truth)
    hic_config = {
        f'{protein} ChIA-PET {resolution//1000}kb': {
            'path': f'{DATA_BASE}/{target_cell}/{protein}_ChIA-PET/{config["hic_files"][target_cell]}',
            'resolution': resolution
        }
    }
    
    # BigWig configuration for target cell
    bigwig_config = {}
    for track_name, filename in config["bigwig_files"][target_cell].items():
        bigwig_config[track_name] = f'{DATA_BASE}/{target_cell}/bigWig_files/{filename}'
    
    # Cross-cell prediction configuration
    pred_config = {
        'label': f'Predicted {resolution//1000}kb',
        'path': f'{EXPERIMENT_BASE}/{config["experiment_dir"]}/{source_cell}to{target_cell}/predictions/pred_matrix_{CHROMOSOME}_{START}_{END}.npy',
        'log1p': True,
        'insert_at_start': True
    }
    
    output_file = f'{output_dir}/cross_cell_visualization.svg'
    
    try:
        load_and_visualize_region(
            chrom=CHROMOSOME,
            start=START,
            end=END,
            hic_files_config=hic_config,
            bigwig_files_config=bigwig_config,
            pred_file_config=pred_config,
            output=output_file,
            fig_width=4.5,
            colorbar=False,
            interval_between_hic=0,
            interval_after_hic_block=0.05,
            interval_between_epi=0.05
        )
        print(f"‚úÖ Generated cross-cell visualization: {output_file}")
    except Exception as e:
        print(f"‚ùå Failed to generate cross-cell visualization: {e}")


def run_cross_cell_experiments():
    """Run cross-cell-line experiments based on TARGET settings."""
    print("üöÄ Starting cross-cell-line experiment execution...")
    print("=" * 60)
    
    # Determine which experiments to run
    experiments_to_run = []
    
    for (source_cell, target_cell, protein, resolution) in CROSS_CELL_EXPERIMENTS.keys():
        # Filter by targets
        if TARGET_SOURCE_CELLS and source_cell not in TARGET_SOURCE_CELLS:
            continue
        if TARGET_TARGET_CELLS and target_cell not in TARGET_TARGET_CELLS:
            continue
        if TARGET_PROTEINS and protein not in TARGET_PROTEINS:
            continue
        if TARGET_RESOLUTIONS and resolution not in TARGET_RESOLUTIONS:
            continue
            
        experiments_to_run.append((source_cell, target_cell, protein, resolution))
    
    if not experiments_to_run:
        print("‚ùå No experiments match the target criteria!")
        return
    
    print(f"üìã Will run {len(experiments_to_run)} cross-cell experiments:")
    for source_cell, target_cell, protein, resolution in experiments_to_run:
        print(f"   - {source_cell} -> {target_cell} {protein} {resolution//1000}kb")
    print()
    
    # Run experiments
    for source_cell, target_cell, protein, resolution in experiments_to_run:
        print(f"üî¨ Processing {source_cell} -> {target_cell} {protein} {resolution//1000}kb")
        print("-" * 40)
        
        if 'all' in TARGET_PLOT_TYPES or 'intra_visualization' in TARGET_PLOT_TYPES:
            generate_intra_cell_visualization(source_cell, target_cell, protein, resolution)
        
        if 'all' in TARGET_PLOT_TYPES or 'cross_visualization' in TARGET_PLOT_TYPES:
            generate_cross_cell_visualization(source_cell, target_cell, protein, resolution)
        
        if 'all' in TARGET_PLOT_TYPES or 'cross_loop_analysis' in TARGET_PLOT_TYPES:
            generate_cross_cell_loop_analysis(source_cell, target_cell, protein, resolution)
        
        print()
    
    print("üéâ All cross-cell experiments completed!")
    print("=" * 60)

# Execute the experiments
if __name__ == "__main__":
    run_cross_cell_experiments()

# 1kb

In [None]:
OUTPUT_DIR = "../../gchia/Figure"
CAESAR_BASE = "../../results"
# Genomic region
CHROMOSOME = "chr10"
START = 21500000
END = 22000000

# Base paths
EXPERIMENT_BASE = "../../results"
DATA_BASE = "../../data"
REFERENCE_GENOME = "../../ReferenceGenome/hg38/hg38.chrom.sizes"

# Current experiment configuration - MODIFY THIS FOR EACH RUN
CURRENT_EXPERIMENTS = {
    ('GM12878', 'CTCF', 1000): {
        'experiment_dir': '250712_GM12878_1000_NONE_ChIAPETMatrixPredictor_efeaturesq_high_CTCF_ChIA-PET_log1p_true_hictype_hic_chiatype_hic',
        'experiment_version': 0,
        'caesar_dir': '251129_GM12878_1000_Caesar_pytorch_offset0',
        'loss_y_limit': 0.1,
        'hic_file': 'ENCFF379AWZ.hic',
        'bigwig_files': {
            'CTCF': 'GM12878_ctcf.bw'
        }
    }, ('GM12878', 'RAD21', 1000): {
        'experiment_dir': '250624_GM12878_1000_NONE_ChIAPETMatrixPredictor_efeaturesq_high_RAD21_ChIA-PET_log1p_true_hictype_hic_chiatype_hic',
        'experiment_version': 0,
        'caesar_dir':'251130_GM12878_1000_Caesar_pytorch_offset0RAD21',
        'loss_y_limit': 0.1,
        'hic_file': 'ENCFF416RNE.hic',
        'bigwig_files': {
            'RAD21': 'GM12878_RAD21.bw'
        }
    }, ('GM12878', 'POLR2A', 1000): {
        'experiment_dir': '250902_GM12878_1000_NONE_ChIAPETMatrixPredictor_efeaturesq_high_POLR2A_ChIA-PET_log1p_true_hictype_hic_chiatype_hic',
        'experiment_version': 1,
        'caesar_dir':'251130_GM12878_1000_Caesar_pytorch_offset0',
        'loss_y_limit': 0.1,
        'hic_file': 'ENCFF791SUY.hic',
        'bigwig_files': {
            'POLR2A': 'GM12878_POLR2A.bw'
        }
    }
}

# Which experiments to run (set to None to run all in CURRENT_EXPERIMENTS)
TARGET_CELL_LINES = ['GM12878']  # or None for all
TARGET_PROTEINS = ['CTCF','RAD21','POLR2A']       # or None for all  
TARGET_RESOLUTIONS = [1000]  # or None for all
TARGET_PLOT_TYPES = ['all']  

In [None]:
from gchia.Metrics.lossplot import plot_val_loss
from gchia.Metrics.SCCplot import plot_scc
from gchia.Metrics.visualize import load_and_visualize_region

def setup_output_dir(cell_line, protein, resolution):
    """Create output directory for specific cell line, protein and resolution."""
    output_path = Path(OUTPUT_DIR) / cell_line / protein / f"{resolution//1000}kb"
    output_path.mkdir(parents=True, exist_ok=True)
    return str(output_path)

def generate_visualization(cell_line, protein, resolution):
    """Generate visualization panel."""
    if (cell_line, protein, resolution) not in CURRENT_EXPERIMENTS:
        print(f"‚ùå No configuration found for {cell_line} {protein} {resolution//1000}kb")
        return
        
    config = CURRENT_EXPERIMENTS[(cell_line, protein, resolution)]
    output_dir = setup_output_dir(cell_line, protein, resolution)
    
    print(f"üé® Generating visualization for {cell_line} {protein} {resolution//1000}kb...")
    
    # HiC configuration
    hic_config = {
        f'{protein} ChIA-PET {resolution//1000}kb': {
            'path': f'{DATA_BASE}/{cell_line}/{protein}_ChIA-PET/{config["hic_file"]}',
            'resolution': resolution
        }
    }
    
    # BigWig configuration - now supports multiple files
    bigwig_config = {}
    for track_name, filename in config["bigwig_files"].items():
        bigwig_config[track_name] = f'{DATA_BASE}/{cell_line}/bigWig_files/{filename}'
    
    # Prediction configuration
    pred_config = {
        'label': f'Predicted {resolution//1000}kb',
        'path': f'{EXPERIMENT_BASE}/{config["experiment_dir"]}/{cell_line}/predictions/pred_matrix_{CHROMOSOME}_{START}_{END}.npy',
        'log1p': True,
        'insert_at_start': True
    }
    
    output_file = f'{output_dir}/_{START}_{END}visualization.png'
    
    try:
        load_and_visualize_region(
            chrom=CHROMOSOME,
            start=START,
            end=END,
            hic_files_config=hic_config,
            bigwig_files_config=bigwig_config,
            pred_file_config=pred_config,
            output=output_file,
            fig_width=4.5,
            colorbar=False,
            interval_between_hic=0,
            interval_after_hic_block=0.05,
            interval_between_epi=0.05,
            maxperc=99
        )
        print(f"‚úÖ Generated visualization: {output_file}")
    except Exception as e:
        print(f"‚ùå Failed to generate visualization: {e}")

def generate_loss_plot(cell_line, protein, resolution):
    """Generate loss plot panel."""
    if (cell_line, protein, resolution) not in CURRENT_EXPERIMENTS:
        print(f"‚ùå No configuration found for {cell_line} {protein} {resolution//1000}kb")
        return
        
    config = CURRENT_EXPERIMENTS[(cell_line, protein, resolution)]
    output_dir = setup_output_dir(cell_line, protein, resolution)
    
    print(f"üìà Generating loss plot for {cell_line} {protein} {resolution//1000}kb...")
    
    file_dict = {
        f'{EXPERIMENT_BASE}/{config["experiment_dir"]}/logs/lightning_logs/version_{config["experiment_version"]}/metrics.csv': 'GraphChIAr',
        # f'{DEEPCHIA_BASE}/{config["deepchia_dir"]}/checkpoints/logs/version_{config["deepchia_version"]}/metrics.csv': 'DeepChIA-PET'
    }
    
    output_file = f'{output_dir}/loss.svg'
    
    try:
        plot_val_loss(
            file_dict,
            y_limit=config['loss_y_limit'],
            smoothing_window=15,
            smoothing_polyorder=4,
            max_epoch=35,
            output_path=output_file
        )
        print(f"‚úÖ Generated loss plot: {output_file}")
    except Exception as e:
        print(f"‚ùå Failed to generate loss plot: {e}")

def generate_scc_plot(cell_line, protein, resolution):
    """Generate SCC plot panel."""
    if (cell_line, protein, resolution) not in CURRENT_EXPERIMENTS:
        print(f"‚ùå No configuration found for {cell_line} {protein} {resolution//1000}kb")
        return
        
    config = CURRENT_EXPERIMENTS[(cell_line, protein, resolution)]
    output_dir = setup_output_dir(cell_line, protein, resolution)
    
    print(f"üìä Generating SCC plot for {cell_line} {protein} {resolution//1000}kb...")
    
    file_paths = {
        f'{EXPERIMENT_BASE}/{config["experiment_dir"]}/{cell_line}/metrics.npy': 'GraphChIAr',
        # f'{DEEPCHIA_BASE}/{config["deepchia_dir"]}/{cell_line}/metrics/scc_metrics.npy': 'DeepChIA-PET'
    }
    baseline_dir = config.get("caesar_dir")
    if baseline_dir:
        file_paths[f'{CAESAR_BASE}/{baseline_dir}/{cell_line}/metrics.npy'] = 'CAESAR'
    
    
    output_file = f'{output_dir}/scc.svg'
    
    try:
        plot_scc(
            file_paths=file_paths,
            resolution=resolution,
            output_path=output_file,
            min_y = 0
        )
        print(f"‚úÖ Generated SCC plot: {output_file}")
    except Exception as e:
        print(f"‚ùå Failed to generate SCC plot: {e}")

def run_experiments():
    """Run experiments based on TARGET settings."""
    print("üöÄ Starting experiment execution...")
    print("=" * 60)
    
    # Determine which experiments to run
    experiments_to_run = []
    
    for (cell_line, protein, resolution) in CURRENT_EXPERIMENTS.keys():
        # Filter by targets
        if TARGET_CELL_LINES and cell_line not in TARGET_CELL_LINES:
            continue
        if TARGET_PROTEINS and protein not in TARGET_PROTEINS:
            continue
        if TARGET_RESOLUTIONS and resolution not in TARGET_RESOLUTIONS:
            continue
            
        experiments_to_run.append((cell_line, protein, resolution))
    
    if not experiments_to_run:
        print("‚ùå No experiments match the target criteria!")
        return
    
    print(f"üìã Will run {len(experiments_to_run)} experiments:")
    for cell_line, protein, resolution in experiments_to_run:
        print(f"   - {cell_line} {protein} {resolution//1000}kb")
    print()
    
    # Run experiments
    for cell_line, protein, resolution in experiments_to_run:
        print(f"üî¨ Processing {cell_line} {protein} {resolution//1000}kb")
        print("-" * 40)
        
        if 'all' in TARGET_PLOT_TYPES or 'visualization' in TARGET_PLOT_TYPES:
            generate_visualization(cell_line, protein, resolution)
        
        if 'all' in TARGET_PLOT_TYPES or 'loss' in TARGET_PLOT_TYPES:
            generate_loss_plot(cell_line, protein, resolution)
        
        if 'all' in TARGET_PLOT_TYPES or 'scc' in TARGET_PLOT_TYPES:
            generate_scc_plot(cell_line, protein, resolution)
        
        print()
    
    print("üéâ All experiments completed!")
    print("=" * 60)



In [None]:
run_experiments()

# micro-c

In [None]:
OUTPUT_DIR = "../../gchia/Figure"

# Genomic region
CHROMOSOME = "chr10"
START = 21000000
END = 21500000

# Base paths
EXPERIMENT_BASE = "../../results"
CAESAR_BASE = "../../results"
DATA_BASE = "../../data"
REFERENCE_GENOME = "../../ReferenceGenome/hg38/hg38.chrom.sizes"

# Micro-C experiment configuration - MODIFY THIS FOR EACH RUN
MICRO_C_EXPERIMENTS = {
    ('H1-hESC', 'Micro-C', 1000): {
        'experiment_dir': '250414_H1-hESC_1000_NONE_ChIAPETMatrixPredictor_efeaturesq_high_micro-c_log1p_true_hictype_hic_chiatype_hic_PREDICT_ONLY_hicres_5000',
        'caesar_dir': '250414_H1-hESC_1000_NONE_Caesar_pytorch_micro-c_log1p_true_hictype_hic_chiatype_hic_hicres_5000',
        'experiment_version': 0,
        'caesar_version': 0,
        'loss_y_limit': 0.25,
        'micro_c_file': '4DNFI2TK7L2F.hic',  # Micro-C reference file
        'bigwig_files': {
            'CTCF': 'H1-hESC_ctcf.bw'
        }
    }
    # Add more experiments here as needed
}

# Which experiments to run (set to None to run all in MICRO_C_EXPERIMENTS)
TARGET_CELL_LINES = ['H1-hESC']    # or None for all
TARGET_PROTEINS = ['Micro-C']      # or None for all  
TARGET_RESOLUTIONS = [1000]        # or None for all
TARGET_PLOT_TYPES = ['all']        # 'all', 'visualization', 'loss', 'scc'

from pathlib import Path
import subprocess
from gchia.Metrics.lossplot import plot_val_loss
from gchia.Metrics.SCCplot import plot_scc
from gchia.Metrics.visualize import load_and_visualize_region

def setup_output_dir(cell_line, protein, resolution):
    """Create output directory for specific cell line, protein and resolution."""
    output_path = Path(OUTPUT_DIR) / cell_line / protein / f"{resolution//1000}kb"
    output_path.mkdir(parents=True, exist_ok=True)
    return str(output_path)

def generate_visualization(cell_line, protein, resolution):
    """Generate visualization panel."""
    if (cell_line, protein, resolution) not in MICRO_C_EXPERIMENTS:
        print(f"‚ùå No configuration found for {cell_line} {protein} {resolution}bp")
        return
        
    config = MICRO_C_EXPERIMENTS[(cell_line, protein, resolution)]
    output_dir = setup_output_dir(cell_line, protein, resolution)
    
    print(f"üé® Generating visualization for {cell_line} {protein} {resolution}bp...")
    
    # Micro-C configuration
    hic_config = {
        f'{protein} {resolution//1000}kb': {
            'path': f'{DATA_BASE}/{cell_line}/micro-C/{config["micro_c_file"]}',
            'resolution': resolution
        }
    }
    
    # BigWig configuration - supports multiple files
    bigwig_config = {}
    for track_name, filename in config["bigwig_files"].items():
        bigwig_config[track_name] = f'{DATA_BASE}/{cell_line}/bigWig_files/{filename}'
    
    # Prediction configuration
    pred_config = {
        'label': f'Predicted {resolution//1000}kb',
        'path': f'{EXPERIMENT_BASE}/{config["experiment_dir"]}/{cell_line}/predictions/pred_matrix_{CHROMOSOME}_{START}_{END}.npy',
        'log1p': True,
        'insert_at_start': True
    }
    
    output_file = f'{output_dir}/visualization.png'
    
    try:
        load_and_visualize_region(
            chrom=CHROMOSOME,
            start=START,
            end=END,
            hic_files_config=hic_config,
            bigwig_files_config=bigwig_config,
            pred_file_config=pred_config,
            output=output_file,
            fig_width=4.5,
            colorbar=False,
            interval_between_hic=0,
            interval_after_hic_block=0.05,
            interval_between_epi=0.05,
            maxperc=98
        )
        print(f"‚úÖ Generated visualization: {output_file}")
    except Exception as e:
        print(f"‚ùå Failed to generate visualization: {e}")

def generate_loss_plot(cell_line, protein, resolution):
    """Generate loss plot panel."""
    if (cell_line, protein, resolution) not in MICRO_C_EXPERIMENTS:
        print(f"‚ùå No configuration found for {cell_line} {protein} {resolution}bp")
        return
        
    config = MICRO_C_EXPERIMENTS[(cell_line, protein, resolution)]
    output_dir = setup_output_dir(cell_line, protein, resolution)
    
    print(f"üìà Generating loss plot for {cell_line} {protein} {resolution}bp...")
    
    file_dict = {
        f'{EXPERIMENT_BASE}/{config["experiment_dir"]}/logs/lightning_logs/version_{config["experiment_version"]}/metrics.csv': 'GraphChIAr',
        f'{CAESAR_BASE}/{config["caesar_dir"]}/logs/lightning_logs/version_{config["caesar_version"]}/metrics.csv': 'CAESAR'
    }
    
    output_file = f'{output_dir}/loss.svg'
    
    try:
        plot_val_loss(
            file_dict,
            y_limit=config['loss_y_limit'],
            smoothing_window=15,
            smoothing_polyorder=4,
            max_epoch=35,
            output_path=output_file
        )
        print(f"‚úÖ Generated loss plot: {output_file}")
    except Exception as e:
        print(f"‚ùå Failed to generate loss plot: {e}")

def generate_scc_plot(cell_line, protein, resolution):
    """Generate SCC plot panel."""
    if (cell_line, protein, resolution) not in MICRO_C_EXPERIMENTS:
        print(f"‚ùå No configuration found for {cell_line} {protein} {resolution}bp")
        return
        
    config = MICRO_C_EXPERIMENTS[(cell_line, protein, resolution)]
    output_dir = setup_output_dir(cell_line, protein, resolution)
    
    print(f"üìä Generating SCC plot for {cell_line} {protein} {resolution}bp...")
    
    file_paths = {
        f'{EXPERIMENT_BASE}/{config["experiment_dir"]}/{cell_line}/metrics.npy': 'GraphChIAr',
        f'{CAESAR_BASE}/{config["caesar_dir"]}/{cell_line}/metrics.npy': 'CAESAR'
    }
    
    output_file = f'{output_dir}/scc.svg'
    
    try:
        plot_scc(
            file_paths=file_paths,
            resolution=resolution,
            output_path=output_file
        )
        print(f"‚úÖ Generated SCC plot: {output_file}")
    except Exception as e:
        print(f"‚ùå Failed to generate SCC plot: {e}")

def run_micro_c_experiments():
    """Run Micro-C experiments based on TARGET settings."""
    print("üöÄ Starting Micro-C experiment execution...")
    print("=" * 60)
    
    # Determine which experiments to run
    experiments_to_run = []
    
    for (cell_line, protein, resolution) in MICRO_C_EXPERIMENTS.keys():
        # Filter by targets
        if TARGET_CELL_LINES and cell_line not in TARGET_CELL_LINES:
            continue
        if TARGET_PROTEINS and protein not in TARGET_PROTEINS:
            continue
        if TARGET_RESOLUTIONS and resolution not in TARGET_RESOLUTIONS:
            continue
            
        experiments_to_run.append((cell_line, protein, resolution))
    
    if not experiments_to_run:
        print("‚ùå No experiments match the target criteria!")
        return
    
    print(f"üìã Will run {len(experiments_to_run)} Micro-C experiments:")
    for cell_line, protein, resolution in experiments_to_run:
        print(f"   - {cell_line} {protein} {resolution}bp")
    print()
    
    # Run experiments
    for cell_line, protein, resolution in experiments_to_run:
        print(f"üî¨ Processing {cell_line} {protein} {resolution}bp")
        print("-" * 40)
        
        if 'all' in TARGET_PLOT_TYPES or 'visualization' in TARGET_PLOT_TYPES:
            generate_visualization(cell_line, protein, resolution)
        
        if 'all' in TARGET_PLOT_TYPES or 'loss' in TARGET_PLOT_TYPES:
            generate_loss_plot(cell_line, protein, resolution)
        
        if 'all' in TARGET_PLOT_TYPES or 'scc' in TARGET_PLOT_TYPES:
            generate_scc_plot(cell_line, protein, resolution)
        
        print()
    
    print("üéâ All Micro-C experiments completed!")
    print("=" * 60)

   

In [None]:
run_micro_c_experiments()

# 200bp

In [None]:
OUTPUT_DIR = "../../gchia/Figure"

# Genomic region
CHROMOSOME = "chr10"
START = 21000000
END = 21250000

# Base paths
EXPERIMENT_BASE = "../../results"
DATA_BASE = "../../data"
REFERENCE_GENOME = "../../ReferenceGenome/hg38/hg38.chrom.sizes"

# Current experiment configuration - MODIFY THIS FOR EACH RUN
CURRENT_EXPERIMENTS = {
    ('GM12878', 'CTCF', 200): {
        'experiment_dir': '250722_GM12878_200_NONE_ChIAPETMatrixPredictor_efeaturesq_super_CTCF_ChIA-PET_HIGH_log1p_true_hictype_hic_chiatype_hic_hicres_1000',
        'experiment_version': 0,
        'caesar_dir': '251125_GM12878_200_Caesar_pytorch_offset0_hicres_1000',
        'hic_file': '200bp.hic',
        'bigwig_files': {
            'CTCF': 'GM12878_ctcf.bw'
        }
    },
    ('GM12878', 'RAD21', 200): {
        'experiment_dir': '250721_GM12878_200_NONE_ChIAPETMatrixPredictor_efeaturesq_super_RAD21_ChIA-PET_HIGH_log1p_true_hictype_hic_chiatype_hic_hicres_1000',
        'experiment_version': 0,
        'caesar_dir': '251127_GM12878_200_Caesar_pytorch_offset0_hicres_1000',
        'hic_file': '200bp.hic',
        'bigwig_files': {
            'RAD21': 'GM12878_RAD21.bw'
        }
    
    },
    ('GM12878', 'POLR2A', 200): {
        'experiment_dir': '250903_GM12878_200_NONE_ChIAPETMatrixPredictor_efeaturesq_super_POLR2A_ChIA-PET_HIGH_log1p_true_hictype_hic_chiatype_hic_hicres_1000',
        'experiment_version': 0,
        'caesar_dir': '251128_GM12878_200_Caesar_pytorch_offset0_hicres_1000',
        'hic_file': '200bp.hic',
        'bigwig_files': {
            'POLR2A': 'GM12878_POLR2A.bw'
        }
    }
}

# Which experiments to run (set to None to run all in CURRENT_EXPERIMENTS)
TARGET_CELL_LINES = ['GM12878']  # or None for all
TARGET_PROTEINS = ['CTCF','RAD21']       # or None for all  
TARGET_RESOLUTIONS = [200]  # or None for all
TARGET_PLOT_TYPES = ['all']  

In [None]:
import os
from pathlib import Path

def check_file_exists(file_path, description=""):
    """Check if file exists and is accessible."""
    if not os.path.exists(file_path):
        print(f"‚ùå {description} file not found: {file_path}")
        return False
    if not os.access(file_path, os.R_OK):
        print(f"‚ùå {description} file not readable: {file_path}")
        return False
    return True

def setup_output_dir(cell_line, protein, resolution):
    """Create output directory for specific cell line, protein and resolution."""
    output_path = Path(OUTPUT_DIR) / cell_line / protein / f"{resolution}bp"
    try:
        output_path.mkdir(parents=True, exist_ok=True)
        return str(output_path)
    except Exception as e:
        print(f"‚ùå Failed to create output directory {output_path}: {e}")
        return None

def validate_experiment_files(cell_line, protein, resolution, config):
    """Validate all required files exist before running experiment."""
    missing_files = []
    
    # Check HiC file
    hic_path = f'{DATA_BASE}/{cell_line}/{protein}_ChIA-PET_HIGH/{config["hic_file"]}'
    if not check_file_exists(hic_path, "HiC"):
        missing_files.append(hic_path)
    
    # Check BigWig files
    for track_name, filename in config["bigwig_files"].items():
        bigwig_path = f'{DATA_BASE}/{cell_line}/bigWig_files/{filename}'
        if not check_file_exists(bigwig_path, f"BigWig ({track_name})"):
            missing_files.append(bigwig_path)
    
    # Check if prediction directory exists (file will be created during prediction)
    pred_dir = f'{EXPERIMENT_BASE}/{config["experiment_dir"]}/{cell_line}/predictions'
    if not os.path.exists(pred_dir):
        print(f"‚ö†Ô∏è  Prediction directory doesn't exist: {pred_dir}")
        try:
            os.makedirs(pred_dir, exist_ok=True)
            print(f"‚úÖ Created prediction directory: {pred_dir}")
        except Exception as e:
            print(f"‚ùå Failed to create prediction directory: {e}")
            missing_files.append(pred_dir)
    
    return missing_files

def generate_visualization(cell_line, protein, resolution):
    """Generate visualization panel with file validation."""
    if (cell_line, protein, resolution) not in CURRENT_EXPERIMENTS:
        print(f"‚ùå No configuration found for {cell_line} {protein} {resolution//1000}kb")
        return False
        
    config = CURRENT_EXPERIMENTS[(cell_line, protein, resolution)]
    
    # Validate all files first
    missing_files = validate_experiment_files(cell_line, protein, resolution, config)
    if missing_files:
        print(f"‚ùå Cannot proceed with {cell_line} {protein} {resolution//1000}kb - missing files:")
        for file_path in missing_files:
            print(f"   - {file_path}")
        return False
    
    output_dir = setup_output_dir(cell_line, protein, resolution)
    if not output_dir:
        return False
    
    print(f"üé® Generating visualization for {cell_line} {protein} {resolution//1000}kb...")
    
    # HiC configuration
    hic_config = {
        f'{protein} ChIA-PET {resolution}bp': {
            'path': f'{DATA_BASE}/{cell_line}/{protein}_ChIA-PET_HIGH/{config["hic_file"]}',
            'resolution': resolution
        }
    }
    
    # BigWig configuration
    bigwig_config = {}
    for track_name, filename in config["bigwig_files"].items():
        bigwig_config[track_name] = f'{DATA_BASE}/{cell_line}/bigWig_files/{filename}'
    
    # Prediction configuration
    pred_file_path = f'{EXPERIMENT_BASE}/{config["experiment_dir"]}/{cell_line}/predictions/pred_matrix_{CHROMOSOME}_{START}_{END}.npy'
    pred_config = {
        'label': f'Predicted {resolution}bp',
        'path': pred_file_path,
        'log1p': True,
        'insert_at_start': True
    }
    
    output_file = f'{output_dir}/{START}_{END}visualization.png'
    
    try:
        # Check if prediction file exists (it might be generated during the process)
        if not os.path.exists(pred_file_path):
            print(f"‚ö†Ô∏è  Prediction file doesn't exist yet: {pred_file_path}")
        
        load_and_visualize_region(
            chrom=CHROMOSOME,
            start=START,
            end=END,
            hic_files_config=hic_config,
            bigwig_files_config=bigwig_config,
            pred_file_config=pred_config,
            output=output_file,
            fig_width=4.5,
            colorbar=False,
            interval_between_hic=0,
            interval_after_hic_block=0.05,
            interval_between_epi=0.05,
            maxperc=99
        )
        
        # Verify output file was created
        if check_file_exists(output_file, "Output visualization"):
            print(f"‚úÖ Generated visualization: {output_file}")
            return True
        else:
            print(f"‚ùå Visualization file was not created: {output_file}")
            return False
            
    except FileNotFoundError as e:
        print(f"‚ùå File not found during visualization: {e}")
        return False
    except PermissionError as e:
        print(f"‚ùå Permission denied: {e}")
        return False
    except Exception as e:
        print(f"‚ùå Failed to generate visualization: {e}")
        return False
def generate_scc_plot(cell_line, protein, resolution):
    """Generate SCC plot panel."""
    if (cell_line, protein, resolution) not in CURRENT_EXPERIMENTS:
        print(f"‚ùå No configuration found for {cell_line} {protein} {resolution}bp")
        return
        
    config = CURRENT_EXPERIMENTS[(cell_line, protein, resolution)]
    output_dir = setup_output_dir(cell_line, protein, resolution)
    
    print(f"üìä Generating SCC plot for {cell_line} {protein} {resolution}bp...")
    file_paths = {
        f'{EXPERIMENT_BASE}/{config["experiment_dir"]}/{cell_line}/metrics.npy': 'GraphChIAr'
    }
    baseline_dir = config.get("caesar_dir")
    if baseline_dir:
        file_paths[f'{CAESAR_BASE}/{baseline_dir}/{cell_line}/metrics.npy'] = 'CAESAR'
    
    output_file = f'{output_dir}/scc.svg'
    
    try:
        plot_scc(
            file_paths=file_paths,
            resolution=resolution,
            output_path=output_file,
            min_y = 0
        )
        print(f"‚úÖ Generated SCC plot: {output_file}")
    except Exception as e:
        print(f"‚ùå Failed to generate SCC plot: {e}")
    
def run_experiments():
    """Run experiments based on TARGET settings with enhanced error handling."""
    print("üöÄ Starting experiment execution...")
    print("=" * 60)
    
    # Check if required base directories exist
    if not os.path.exists(DATA_BASE):
        print(f"‚ùå Data base directory not found: {DATA_BASE}")
        return
    
    if not os.path.exists(EXPERIMENT_BASE):
        print(f"‚ùå Experiment base directory not found: {EXPERIMENT_BASE}")
        return
    
    # Determine which experiments to run
    experiments_to_run = []
    
    for (cell_line, protein, resolution) in CURRENT_EXPERIMENTS.keys():
        # Filter by targets
        if TARGET_CELL_LINES and cell_line not in TARGET_CELL_LINES:
            continue
        if TARGET_PROTEINS and protein not in TARGET_PROTEINS:
            continue
        if TARGET_RESOLUTIONS and resolution not in TARGET_RESOLUTIONS:
            continue
            
        experiments_to_run.append((cell_line, protein, resolution))
    
    if not experiments_to_run:
        print("‚ùå No experiments match the target criteria!")
        return
    
    print(f"üìã Will run {len(experiments_to_run)} experiments:")
    for cell_line, protein, resolution in experiments_to_run:
        print(f"   - {cell_line} {protein} {resolution//1000}kb")
    print()
    
    # Run experiments with success tracking
    successful_experiments = 0
    failed_experiments = 0
    
    for cell_line, protein, resolution in experiments_to_run:
        print(f"üî¨ Processing {cell_line} {protein} {resolution//1000}kb")
        print("-" * 40)
        
        success = True
        if 'all' in TARGET_PLOT_TYPES or 'visualization' in TARGET_PLOT_TYPES:
            if not generate_visualization(cell_line, protein, resolution):
                success = False
        if 'all' in TARGET_PLOT_TYPES or 'scc' in TARGET_PLOT_TYPES:
            try:
                generate_scc_plot(cell_line, protein, resolution)
            except Exception as e:
                print(f"‚ùå SCC plot generation failed: {e}")
                success = False
        if success:
            successful_experiments += 1
            print(f"‚úÖ Completed {cell_line} {protein} {resolution//1000}kb")
        else:
            failed_experiments += 1
            print(f"‚ùå Failed {cell_line} {protein} {resolution//1000}kb")
        
        print()
    
    print("üéâ Experiment execution summary:")
    print(f"   ‚úÖ Successful: {successful_experiments}")
    print(f"   ‚ùå Failed: {failed_experiments}")
    print("=" * 60)

In [None]:
run_experiments()

In [None]:
OUTPUT_DIR = "../../gchia/Figure"

# Genomic region
CHROMOSOME = "chr10"
START = 22500000
END = 22750000

# Base paths
EXPERIMENT_BASE = "../../results"
DATA_BASE = "../../data"
REFERENCE_GENOME = "../../ReferenceGenome/hg38/hg38.chrom.sizes"

# Current experiment configuration - MODIFY THIS FOR EACH RUN
CURRENT_EXPERIMENTS = {
    ('GM12878', 'POLR2A', 200): {
        'experiment_dir': '250903_GM12878_200_NONE_ChIAPETMatrixPredictor_efeaturesq_super_POLR2A_ChIA-PET_HIGH_log1p_true_hictype_hic_chiatype_hic_hicres_1000',
        'experiment_version': 0,
        'caesar_dir': '251128_GM12878_200_Caesar_pytorch_offset0_hicres_1000',
        'hic_file': '200bp.hic',
        'bigwig_files': {
            'POLR2A': 'GM12878_POLR2A.bw'
        }
    }
}

# Which experiments to run (set to None to run all in CURRENT_EXPERIMENTS)
TARGET_CELL_LINES = ['GM12878']  # or None for all
TARGET_PROTEINS = ['POLR2A']       # or None for all  
TARGET_RESOLUTIONS = [200]  # or None for all
TARGET_PLOT_TYPES = ['all']  

In [None]:
run_experiments()