<a href="https://colab.research.google.com/github/hsadegh78/H-Sadeghi/blob/main/Whole_Genome_Deep_Learning_Predicts_Chemotherapy_Response_in_Colorectal_Cancer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.patches import Rectangle, Circle, Polygon, Arrow
from matplotlib.gridspec import GridSpec

# Set style
sns.set_style("white")
plt.rcParams.update({'font.size': 10, 'font.family': 'sans-serif'})

# Figure 1: Preprocessing Workflow
def create_preprocessing_figure():
    fig = plt.figure(figsize=(12, 8), dpi=300)
    gs = GridSpec(2, 2, figure=fig, width_ratios=[1, 1], height_ratios=[1, 1])

    # A: TCGA Data Acquisition
    ax1 = fig.add_subplot(gs[0, 0])
    ax1.set_title("A) TCGA Data Acquisition", fontweight='bold', loc='left')
    ax1.text(0.5, 0.7, "TCGA-COAD Dataset", ha='center', fontsize=14, fontweight='bold')
    ax1.text(0.5, 0.6, "2,546 CRC Patients", ha='center', fontsize=12)
    ax1.text(0.5, 0.5, "Whole Genome Sequencing", ha='center', fontsize=12)
    ax1.text(0.5, 0.4, "Clinical Records", ha='center', fontsize=12)
    ax1.add_patch(Rectangle((0.2, 0.2), 0.6, 0.6, fill=None, edgecolor='blue', lw=2))
    ax1.set_xlim(0, 1)
    ax1.set_ylim(0, 1)
    ax1.axis('off')

    # B: Variant Filtering Pipeline
    ax2 = fig.add_subplot(gs[0, 1])
    ax2.set_title("B) Variant Filtering Pipeline", fontweight='bold', loc='left')
    steps = ["Raw VCF", "VAF > 5%", "Depth ≥30×", "Quality Filters", "GC Correction"]
    for i, step in enumerate(steps):
        y_pos = 0.8 - i*0.15
        ax2.text(0.3, y_pos, step, fontsize=11)
        ax2.add_patch(Rectangle((0.25, y_pos-0.05), 0.5, 0.08, fill=True, color='lightblue'))
        if i < len(steps)-1:
            ax2.arrow(0.5, y_pos-0.1, 0, -0.1, head_width=0.03, head_length=0.02, fc='k')
    ax2.set_xlim(0, 1)
    ax2.set_ylim(0, 1)
    ax2.axis('off')

    # C: Genomic Binning Strategy
    ax3 = fig.add_subplot(gs[1, 0])
    ax3.set_title("C) Genomic Binning Strategy", fontweight='bold', loc='left')

    # Draw chromosome
    ax3.add_patch(Rectangle((0.1, 0.5), 0.8, 0.05, color='grey'))

    # Draw bins
    for i in range(8):
        start = 0.1 + i*0.1
        ax3.add_patch(Rectangle((start, 0.4), 0.1, 0.2, fill=None, edgecolor='green', lw=1))
        ax3.text(start+0.05, 0.3, f"Bin {i+1}", ha='center', fontsize=9)
        ax3.text(start+0.05, 0.55, "10kb", ha='center', fontsize=8)

    ax3.text(0.5, 0.65, "GRCh38 Chromosome", ha='center', fontsize=10)
    ax3.set_xlim(0, 1)
    ax3.set_ylim(0, 1)
    ax3.axis('off')

    # D: Feature Extraction Process
    ax4 = fig.add_subplot(gs[1, 1])
    ax4.set_title("D) Feature Extraction Process", fontweight='bold', loc='left')
    features = ["Mutation Count", "Conservation", "Accessibility", "TAD Disruption"]
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']

    for i, (feat, col) in enumerate(zip(features, colors)):
        y_pos = 0.8 - i*0.15
        ax4.add_patch(Rectangle((0.3, y_pos-0.05), 0.4, 0.08, color=col))
        ax4.text(0.5, y_pos, feat, ha='center', color='white', fontweight='bold')

    ax4.text(0.5, 0.2, "HDF5 Storage", ha='center', fontsize=12)
    ax4.add_patch(Rectangle((0.4, 0.1), 0.2, 0.08, fill=True, color='purple'))
    ax4.set_xlim(0, 1)
    ax4.set_ylim(0, 1)
    ax4.axis('off')

    plt.tight_layout()
    plt.savefig('preprocessing_workflow.pdf', bbox_inches='tight')
    plt.show()

# Figure 2: Architecture Detail
def create_architecture_figure():
    fig = plt.figure(figsize=(14, 8), dpi=300)
    gs = GridSpec(1, 5, figure=fig, width_ratios=[1.2, 1.5, 1.5, 1.8, 1])

    # A: Input Encoding
    ax1 = fig.add_subplot(gs[0])
    ax1.set_title("A) Input Encoding", fontweight='bold', loc='left')
    ax1.text(0.5, 0.85, "Genomic Bins", ha='center', fontsize=12, fontweight='bold')

    # Draw input matrix
    for i in range(5):
        for j in range(4):
            ax1.add_patch(Rectangle((0.2+j*0.15, 0.5-i*0.1), 0.13, 0.08,
                                  fill=True, color=plt.cm.tab20(i*4+j)))
            if i == 0:
                ax1.text(0.26+j*0.15, 0.62, ["Mut", "Cons", "Acc", "TAD"][j],
                        fontsize=8, ha='center')

    ax1.text(0.1, 0.2, "2,546 Patients\n303,104 Bins\n4 Features",
            ha='center', fontsize=10)
    ax1.set_xlim(0, 1)
    ax1.set_ylim(0, 1)
    ax1.axis('off')

    # B: Convolutional Blocks
    ax2 = fig.add_subplot(gs[1])
    ax2.set_title("B) Convolutional Blocks", fontweight='bold', loc='left')

    # Draw conv blocks
    for i in range(3):
        y_pos = 0.7 - i*0.2
        ax2.add_patch(Rectangle((0.2, y_pos), 0.6, 0.15, fill=True, color='#ff7f0e'))
        ax2.text(0.5, y_pos+0.075, f"Conv Block {i+1}", ha='center', color='white')

        if i < 2:
            ax2.arrow(0.5, y_pos-0.02, 0, -0.15, head_width=0.03, head_length=0.02, fc='k')

    ax2.text(0.5, 0.9, "Input", ha='center')
    ax2.text(0.5, 0.1, "Output Features", ha='center')
    ax2.set_xlim(0, 1)
    ax2.set_ylim(0, 1)
    ax2.axis('off')

    # C: Bidirectional LSTM
    ax3 = fig.add_subplot(gs[2])
    ax3.set_title("C) Bidirectional LSTM", fontweight='bold', loc='left')

    # Draw LSTM cells
    for i in range(4):
        y_pos = 0.7 - i*0.15
        # Forward LSTM
        ax3.add_patch(Rectangle((0.2, y_pos), 0.3, 0.1, fill=True, color='#2ca02c'))
        # Backward LSTM
        ax3.add_patch(Rectangle((0.5, y_pos), 0.3, 0.1, fill=True, color='#d62728'))

        if i < 3:
            ax3.arrow(0.35, y_pos-0.02, 0, -0.11, head_width=0.02, head_length=0.01, fc='k')
            ax3.arrow(0.65, y_pos-0.02, 0, -0.11, head_width=0.02, head_length=0.01, fc='k')

    ax3.text(0.35, 0.85, "Forward", ha='center', fontsize=9)
    ax3.text(0.65, 0.85, "Backward", ha='center', fontsize=9)
    ax3.text(0.5, 0.1, "Concatenated Output", ha='center')
    ax3.set_xlim(0, 1)
    ax3.set_ylim(0, 1)
    ax3.axis('off')

    # D: Multi-head Attention
    ax4 = fig.add_subplot(gs[3])
    ax4.set_title("D) Multi-head Attention", fontweight='bold', loc='left')

    # Draw attention heads
    angles = [30, 150, 210, 330]
    colors = ['#9467bd', '#8c564b', '#e377c2', '#7f7f7f']
    for i, (angle, color) in enumerate(zip(angles, colors)):
        rad = np.deg2rad(angle)
        x = 0.5 + 0.3 * np.cos(rad)
        y = 0.5 + 0.3 * np.sin(rad)
        ax4.add_patch(Circle((x, y), 0.08, color=color))
        ax4.text(x, y, f"Head {i+1}", ha='center', va='center', color='white', fontsize=9)
        ax4.plot([0.5, x], [0.5, y], 'k-', lw=1)

    ax4.add_patch(Circle((0.5, 0.5), 0.1, color='#17becf'))
    ax4.text(0.5, 0.5, "Attention\nMechanism", ha='center', va='center', fontsize=9)
    ax4.set_xlim(0, 1)
    ax4.set_ylim(0, 1)
    ax4.axis('off')

    # E: Prediction Network
    ax5 = fig.add_subplot(gs[4])
    ax5.set_title("E) Prediction Network", fontweight='bold', loc='left')

    # Draw MLP
    layers = [("Global\nAvg Pool", 0.7), ("Dense\n512", 0.5), ("Dense\n256", 0.3), ("Output", 0.1)]
    for i, (label, y_pos) in enumerate(layers):
        ax5.add_patch(Rectangle((0.3, y_pos), 0.4, 0.08, fill=True, color='#e6550d'))
        ax5.text(0.5, y_pos+0.04, label, ha='center', color='white', fontsize=9)
        if i < len(layers)-1:
            ax5.arrow(0.5, y_pos, 0, -0.12, head_width=0.03, head_length=0.02, fc='k')

    ax5.set_xlim(0, 1)
    ax5.set_ylim(0, 1)
    ax5.axis('off')

    plt.tight_layout()
    plt.savefig('architecture_detail.pdf', bbox_inches='tight')
    plt.show()

# Figure 3: Training Curves
def create_training_figure():
    # Simulate training data
    epochs = np.arange(0, 100)
    lr = 0.001 * (0.5 * (1 + np.cos((epochs - 5) * np.pi / 95)))
    train_loss = 0.7 * np.exp(-epochs/20) + 0.05 * np.random.randn(100)
    val_loss = 0.75 * np.exp(-epochs/25) + 0.05 * np.random.randn(100)
    grad_norm = 5 * np.exp(-epochs/15) + 0.5 * np.random.randn(100)
    precision = 0.8 - 0.7 * np.exp(-epochs/10) + 0.02 * np.random.randn(100)
    recall = 0.75 - 0.7 * np.exp(-epochs/12) + 0.02 * np.random.randn(100)

    fig, axs = plt.subplots(2, 2, figsize=(12, 8), dpi=300)
    fig.suptitle("Training Dynamics", fontsize=16, fontweight='bold')

    # A: Learning Rate Schedule
    axs[0, 0].plot(epochs, lr, 'b-', linewidth=2)
    axs[0, 0].set_title("A) Learning Rate Schedule", fontweight='bold', loc='left')
    axs[0, 0].set_xlabel("Epochs")
    axs[0, 0].set_ylabel("Learning Rate")
    axs[0, 0].grid(True, linestyle='--', alpha=0.7)
    axs[0, 0].set_xlim(0, 100)

    # B: Loss Curves
    axs[0, 1].plot(epochs, train_loss, 'b-', label='Training Loss')
    axs[0, 1].plot(epochs, val_loss, 'r-', label='Validation Loss')
    axs[0, 1].set_title("B) Training and Validation Loss", fontweight='bold', loc='left')
    axs[0, 1].set_xlabel("Epochs")
    axs[0, 1].set_ylabel("Loss")
    axs[0, 1].legend()
    axs[0, 1].grid(True, linestyle='--', alpha=0.7)
    axs[0, 1].set_xlim(0, 100)

    # C: Gradient Norm Distribution
    axs[1, 0].hist(grad_norm[grad_norm < 10], bins=20, color='g', alpha=0.7)
    axs[1, 0].axvline(5, color='r', linestyle='--', label='Clipping Threshold')
    axs[1, 0].set_title("C) Gradient Norm Distribution", fontweight='bold', loc='left')
    axs[1, 0].set_xlabel("Gradient Norm")
    axs[1, 0].set_ylabel("Frequency")
    axs[1, 0].legend()
    axs[1, 0].grid(True, linestyle='--', alpha=0.7)

    # D: Precision-Recall Progression
    axs[1, 1].plot(epochs, precision, 'b-', label='Precision')
    axs[1, 1].plot(epochs, recall, 'g-', label='Recall')
    axs[1, 1].set_title("D) Precision-Recall Progression", fontweight='bold', loc='left')
    axs[1, 1].set_xlabel("Epochs")
    axs[1, 1].set_ylabel("Score")
    axs[1, 1].legend()
    axs[1, 1].grid(True, linestyle='--', alpha=0.7)
    axs[1, 1].set_xlim(0, 100)
    axs[1, 1].set_ylim(0.5, 0.9)

    plt.tight_layout()
    plt.subplots_adjust(top=0.92)
    plt.savefig('training_curves.pdf', bbox_inches='tight')
    plt.show()



In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.patches import Rectangle, Circle, Polygon, Arrow
from matplotlib.gridspec import GridSpec

# Set style
sns.set_style("white")
plt.rcParams.update({'font.size': 10, 'font.family': 'sans-serif'})

# Figure 1: Preprocessing Workflow
def create_preprocessing_figure():
    fig = plt.figure(figsize=(12, 8), dpi=300)
    gs = GridSpec(2, 2, figure=fig, width_ratios=[1, 1], height_ratios=[1, 1])

    # A: TCGA Data Acquisition
    ax1 = fig.add_subplot(gs[0, 0])
    ax1.set_title("A) TCGA Data Acquisition", fontweight='bold', loc='left')
    ax1.text(0.5, 0.7, "TCGA-COAD Dataset", ha='center', fontsize=14, fontweight='bold')
    ax1.text(0.5, 0.6, "2,546 CRC Patients", ha='center', fontsize=12)
    ax1.text(0.5, 0.5, "Whole Genome Sequencing", ha='center', fontsize=12)
    ax1.text(0.5, 0.4, "Clinical Records", ha='center', fontsize=12)
    ax1.add_patch(Rectangle((0.2, 0.2), 0.6, 0.6, fill=None, edgecolor='blue', lw=2))
    ax1.set_xlim(0, 1)
    ax1.set_ylim(0, 1)
    ax1.axis('off')

    # B: Variant Filtering Pipeline
    ax2 = fig.add_subplot(gs[0, 1])
    ax2.set_title("B) Variant Filtering Pipeline", fontweight='bold', loc='left')
    steps = ["Raw VCF", "VAF > 5%", "Depth ≥30×", "Quality Filters", "GC Correction"]
    for i, step in enumerate(steps):
        y_pos = 0.8 - i*0.15
        ax2.text(0.3, y_pos, step, fontsize=11)
        ax2.add_patch(Rectangle((0.25, y_pos-0.05), 0.5, 0.08, fill=True, color='lightblue'))
        if i < len(steps)-1:
            ax2.arrow(0.5, y_pos-0.1, 0, -0.1, head_width=0.03, head_length=0.02, fc='k')
    ax2.set_xlim(0, 1)
    ax2.set_ylim(0, 1)
    ax2.axis('off')

    # C: Genomic Binning Strategy
    ax3 = fig.add_subplot(gs[1, 0])
    ax3.set_title("C) Genomic Binning Strategy", fontweight='bold', loc='left')

    # Draw chromosome
    ax3.add_patch(Rectangle((0.1, 0.5), 0.8, 0.05, color='grey'))

    # Draw bins
    for i in range(8):
        start = 0.1 + i*0.1
        ax3.add_patch(Rectangle((start, 0.4), 0.1, 0.2, fill=None, edgecolor='green', lw=1))
        ax3.text(start+0.05, 0.3, f"Bin {i+1}", ha='center', fontsize=9)
        ax3.text(start+0.05, 0.55, "10kb", ha='center', fontsize=8)

    ax3.text(0.5, 0.65, "GRCh38 Chromosome", ha='center', fontsize=10)
    ax3.set_xlim(0, 1)
    ax3.set_ylim(0, 1)
    ax3.axis('off')

    # D: Feature Extraction Process
    ax4 = fig.add_subplot(gs[1, 1])
    ax4.set_title("D) Feature Extraction Process", fontweight='bold', loc='left')
    features = ["Mutation Count", "Conservation", "Accessibility", "TAD Disruption"]
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']

    for i, (feat, col) in enumerate(zip(features, colors)):
        y_pos = 0.8 - i*0.15
        ax4.add_patch(Rectangle((0.3, y_pos-0.05), 0.4, 0.08, color=col))
        ax4.text(0.5, y_pos, feat, ha='center', color='white', fontweight='bold')

    ax4.text(0.5, 0.2, "HDF5 Storage", ha='center', fontsize=12)
    ax4.add_patch(Rectangle((0.4, 0.1), 0.2, 0.08, fill=True, color='purple'))
    ax4.set_xlim(0, 1)
    ax4.set_ylim(0, 1)
    ax4.axis('off')

    plt.tight_layout()
    plt.savefig('preprocessing_workflow.png', bbox_inches='tight')
    plt.close()

# Figure 2: Architecture Detail
def create_architecture_figure():
    fig = plt.figure(figsize=(14, 8), dpi=300)
    gs = GridSpec(1, 5, figure=fig, width_ratios=[1.2, 1.5, 1.5, 1.8, 1])

    # A: Input Encoding
    ax1 = fig.add_subplot(gs[0])
    ax1.set_title("A) Input Encoding", fontweight='bold', loc='left')
    ax1.text(0.5, 0.85, "Genomic Bins", ha='center', fontsize=12, fontweight='bold')

    # Draw input matrix
    for i in range(5):
        for j in range(4):
            ax1.add_patch(Rectangle((0.2+j*0.15, 0.5-i*0.1), 0.13, 0.08,
                                  fill=True, color=plt.cm.tab20(i*4+j)))
            if i == 0:
                ax1.text(0.26+j*0.15, 0.62, ["Mut", "Cons", "Acc", "TAD"][j],
                        fontsize=8, ha='center')

    ax1.text(0.1, 0.2, "2,546 Patients\n303,104 Bins\n4 Features",
            ha='center', fontsize=10)
    ax1.set_xlim(0, 1)
    ax1.set_ylim(0, 1)
    ax1.axis('off')

    # B: Convolutional Blocks
    ax2 = fig.add_subplot(gs[1])
    ax2.set_title("B) Convolutional Blocks", fontweight='bold', loc='left')

    # Draw conv blocks
    for i in range(3):
        y_pos = 0.7 - i*0.2
        ax2.add_patch(Rectangle((0.2, y_pos), 0.6, 0.15, fill=True, color='#ff7f0e'))
        ax2.text(0.5, y_pos+0.075, f"Conv Block {i+1}", ha='center', color='white')

        if i < 2:
            ax2.arrow(0.5, y_pos-0.02, 0, -0.15, head_width=0.03, head_length=0.02, fc='k')

    ax2.text(0.5, 0.9, "Input", ha='center')
    ax2.text(0.5, 0.1, "Output Features", ha='center')
    ax2.set_xlim(0, 1)
    ax2.set_ylim(0, 1)
    ax2.axis('off')

    # C: Bidirectional LSTM
    ax3 = fig.add_subplot(gs[2])
    ax3.set_title("C) Bidirectional LSTM", fontweight='bold', loc='left')

    # Draw LSTM cells
    for i in range(4):
        y_pos = 0.7 - i*0.15
        # Forward LSTM
        ax3.add_patch(Rectangle((0.2, y_pos), 0.3, 0.1, fill=True, color='#2ca02c'))
        # Backward LSTM
        ax3.add_patch(Rectangle((0.5, y_pos), 0.3, 0.1, fill=True, color='#d62728'))

        if i < 3:
            ax3.arrow(0.35, y_pos-0.02, 0, -0.11, head_width=0.02, head_length=0.01, fc='k')
            ax3.arrow(0.65, y_pos-0.02, 0, -0.11, head_width=0.02, head_length=0.01, fc='k')

    ax3.text(0.35, 0.85, "Forward", ha='center', fontsize=9)
    ax3.text(0.65, 0.85, "Backward", ha='center', fontsize=9)
    ax3.text(0.5, 0.1, "Concatenated Output", ha='center')
    ax3.set_xlim(0, 1)
    ax3.set_ylim(0, 1)
    ax3.axis('off')

    # D: Multi-head Attention
    ax4 = fig.add_subplot(gs[3])
    ax4.set_title("D) Multi-head Attention", fontweight='bold', loc='left')

    # Draw attention heads
    angles = [30, 150, 210, 330]
    colors = ['#9467bd', '#8c564b', '#e377c2', '#7f7f7f']
    for i, (angle, color) in enumerate(zip(angles, colors)):
        rad = np.deg2rad(angle)
        x = 0.5 + 0.3 * np.cos(rad)
        y = 0.5 + 0.3 * np.sin(rad)
        ax4.add_patch(Circle((x, y), 0.08, color=color))
        ax4.text(x, y, f"Head {i+1}", ha='center', va='center', color='white', fontsize=9)
        ax4.plot([0.5, x], [0.5, y], 'k-', lw=1)

    ax4.add_patch(Circle((0.5, 0.5), 0.1, color='#17becf'))
    ax4.text(0.5, 0.5, "Attention\nMechanism", ha='center', va='center', fontsize=9)
    ax4.set_xlim(0, 1)
    ax4.set_ylim(0, 1)
    ax4.axis('off')

    # E: Prediction Network
    ax5 = fig.add_subplot(gs[4])
    ax5.set_title("E) Prediction Network", fontweight='bold', loc='left')

    # Draw MLP
    layers = [("Global\nAvg Pool", 0.7), ("Dense\n512", 0.5), ("Dense\n256", 0.3), ("Output", 0.1)]
    for i, (label, y_pos) in enumerate(layers):
        ax5.add_patch(Rectangle((0.3, y_pos), 0.4, 0.08, fill=True, color='#e6550d'))
        ax5.text(0.5, y_pos+0.04, label, ha='center', color='white', fontsize=9)
        if i < len(layers)-1:
            ax5.arrow(0.5, y_pos, 0, -0.12, head_width=0.03, head_length=0.02, fc='k')

    ax5.set_xlim(0, 1)
    ax5.set_ylim(0, 1)
    ax5.axis('off')

    plt.tight_layout()
    plt.savefig('architecture_detail.png', bbox_inches='tight')
    plt.close()

# Figure 3: Training Curves
def create_training_figure():
    # Simulate training data
    epochs = np.arange(0, 100)
    lr = 0.001 * (0.5 * (1 + np.cos((epochs - 5) * np.pi / 95)))
    train_loss = 0.7 * np.exp(-epochs/20) + 0.05 * np.random.randn(100)
    val_loss = 0.75 * np.exp(-epochs/25) + 0.05 * np.random.randn(100)
    grad_norm = 5 * np.exp(-epochs/15) + 0.5 * np.random.randn(100)
    precision = 0.8 - 0.7 * np.exp(-epochs/10) + 0.02 * np.random.randn(100)
    recall = 0.75 - 0.7 * np.exp(-epochs/12) + 0.02 * np.random.randn(100)

    fig, axs = plt.subplots(2, 2, figsize=(12, 8), dpi=300)
    fig.suptitle("Training Dynamics", fontsize=16, fontweight='bold')

    # A: Learning Rate Schedule
    axs[0, 0].plot(epochs, lr, 'b-', linewidth=2)
    axs[0, 0].set_title("A) Learning Rate Schedule", fontweight='bold', loc='left')
    axs[0, 0].set_xlabel("Epochs")
    axs[0, 0].set_ylabel("Learning Rate")
    axs[0, 0].grid(True, linestyle='--', alpha=0.7)
    axs[0, 0].set_xlim(0, 100)

    # B: Loss Curves
    axs[0, 1].plot(epochs, train_loss, 'b-', label='Training Loss')
    axs[0, 1].plot(epochs, val_loss, 'r-', label='Validation Loss')
    axs[0, 1].set_title("B) Training and Validation Loss", fontweight='bold', loc='left')
    axs[0, 1].set_xlabel("Epochs")
    axs[0, 1].set_ylabel("Loss")
    axs[0, 1].legend()
    axs[0, 1].grid(True, linestyle='--', alpha=0.7)
    axs[0, 1].set_xlim(0, 100)

    # C: Gradient Norm Distribution
    axs[1, 0].hist(grad_norm[grad_norm < 10], bins=20, color='g', alpha=0.7)
    axs[1, 0].axvline(5, color='r', linestyle='--', label='Clipping Threshold')
    axs[1, 0].set_title("C) Gradient Norm Distribution", fontweight='bold', loc='left')
    axs[1, 0].set_xlabel("Gradient Norm")
    axs[1, 0].set_ylabel("Frequency")
    axs[1, 0].legend()
    axs[1, 0].grid(True, linestyle='--', alpha=0.7)

    # D: Precision-Recall Progression
    axs[1, 1].plot(epochs, precision, 'b-', label='Precision')
    axs[1, 1].plot(epochs, recall, 'g-', label='Recall')
    axs[1, 1].set_title("D) Precision-Recall Progression", fontweight='bold', loc='left')
    axs[1, 1].set_xlabel("Epochs")
    axs[1, 1].set_ylabel("Score")
    axs[1, 1].legend()
    axs[1, 1].grid(True, linestyle='--', alpha=0.7)
    axs[1, 1].set_xlim(0, 100)
    axs[1, 1].set_ylim(0.5, 0.9)

    plt.tight_layout()
    plt.subplots_adjust(top=0.92)
    plt.savefig('training_curves.png', bbox_inches='tight')
    plt.close()

# Generate all figures
create_preprocessing_figure()
create_architecture_figure()
create_training_figure()
plt.show()

In [None]:
!pip install lifelines




In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle, Arc

# Create figure
fig, axs = plt.subplots(1, 3, figsize=(15, 5), dpi=300)

# Panel A: Normal TAD structure
ax = axs[0]
ax.set_title("(a) Normal TAD Structure", fontsize=12)
ax.text(0.5, 0.9, "TP53", ha='center', fontsize=14)
ax.text(0.5, 0.1, "MYC", ha='center', fontsize=14)

# Draw TAD boundaries
ax.add_patch(Rectangle((0.3, 0.3), 0.4, 0.4, fill=False, lw=2, edgecolor='blue'))
ax.plot([0.5, 0.5], [0.3, 0.7], 'k-', lw=1.5)  # Boundary
ax.text(0.5, 0.75, "CTCF", ha='center', fontsize=10, color='blue')

# Draw chromatin loops
arc = Arc((0.5, 0.5), 0.3, 0.3, theta1=0, theta2=180,
          edgecolor='green', lw=2, fill=False)
ax.add_patch(arc)

ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.axis('off')

# Panel B: Boundary erosion in non-responders
ax = axs[1]
ax.set_title("(b) Boundary Erosion (Non-responders)", fontsize=12)
ax.text(0.5, 0.9, "TP53", ha='center', fontsize=14)
ax.text(0.5, 0.1, "MYC", ha='center', fontsize=14)

# Draw weakened boundary
ax.add_patch(Rectangle((0.3, 0.3), 0.4, 0.4, fill=False, lw=2,
                       edgecolor='red', linestyle='--'))
ax.plot([0.5, 0.5], [0.3, 0.7], 'r--', lw=1.5)  # Weakened boundary
ax.text(0.5, 0.75, "CTCF*", ha='center', fontsize=10, color='red')

# Draw aberrant loop
ax.plot([0.4, 0.6], [0.4, 0.6], 'r-', lw=2)  # Aberrant loop

ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.axis('off')

# Panel C: Novel chromatin loops
ax = axs[2]
ax.set_title("(c) Novel Chromatin Loops", fontsize=12)
ax.text(0.3, 0.9, "TP53", ha='center', fontsize=14)
ax.text(0.7, 0.9, "MYC", ha='center', fontsize=14)
ax.text(0.5, 0.1, "Enhancer", ha='center', fontsize=12)

# Draw novel loops
ax.plot([0.3, 0.5], [0.8, 0.2], 'purple', lw=2, linestyle='-')
ax.plot([0.7, 0.5], [0.8, 0.2], 'purple', lw=2, linestyle='-')
ax.scatter(0.5, 0.2, s=100, c='orange', marker='o', edgecolor='k')

ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.axis('off')

plt.tight_layout()
plt.savefig('hic_validation.png', bbox_inches='tight', dpi=300)
plt.close()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle, Arc

# Create figure
fig, axs = plt.subplots(1, 3, figsize=(15, 5), dpi=300)

# Panel A: Normal TAD structure
ax = axs[0]
ax.set_title("(a) Normal TAD Structure", fontsize=12)
ax.text(0.5, 0.9, "TP53", ha='center', fontsize=14)
ax.text(0.5, 0.1, "MYC", ha='center', fontsize=14)

# Draw TAD boundaries
ax.add_patch(Rectangle((0.3, 0.3), 0.4, 0.4, fill=False, lw=2, edgecolor='blue'))
ax.plot([0.5, 0.5], [0.3, 0.7], 'k-', lw=1.5)  # Boundary
ax.text(0.5, 0.75, "CTCF", ha='center', fontsize=10, color='blue')

# Draw chromatin loops
arc = Arc((0.5, 0.5), 0.3, 0.3, theta1=0, theta2=180,
          edgecolor='green', lw=2, fill=False)
ax.add_patch(arc)

ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.axis('off')

# Panel B: Boundary erosion in non-responders
ax = axs[1]
ax.set_title("(b) Boundary Erosion (Non-responders)", fontsize=12)
ax.text(0.5, 0.9, "TP53", ha='center', fontsize=14)
ax.text(0.5, 0.1, "MYC", ha='center', fontsize=14)

# Draw weakened boundary
ax.add_patch(Rectangle((0.3, 0.3), 0.4, 0.4, fill=False, lw=2,
                       edgecolor='red', linestyle='--'))
ax.plot([0.5, 0.5], [0.3, 0.7], 'r--', lw=1.5)  # Weakened boundary
ax.text(0.5, 0.75, "CTCF*", ha='center', fontsize=10, color='red')

# Draw aberrant loop
ax.plot([0.4, 0.6], [0.4, 0.6], 'r-', lw=2)  # Aberrant loop

ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.axis('off')

# Panel C: Novel chromatin loops
ax = axs[2]
ax.set_title("(c) Novel Chromatin Loops", fontsize=12)
ax.text(0.3, 0.9, "TP53", ha='center', fontsize=14)
ax.text(0.7, 0.9, "MYC", ha='center', fontsize=14)
ax.text(0.5, 0.1, "Enhancer", ha='center', fontsize=12)

# Draw novel loops
ax.plot([0.3, 0.5], [0.8, 0.2], 'purple', lw=2, linestyle='-')
ax.plot([0.7, 0.5], [0.8, 0.2], 'purple', lw=2, linestyle='-')
ax.scatter(0.5, 0.2, s=100, c='orange', marker='o', edgecolor='k')

ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.axis('off')

plt.tight_layout()
plt.savefig('hic_validation.png', bbox_inches='tight', dpi=300)
plt.close()