## Visualize accessibility

In [1]:
import glob 
import os
import scanpy as sc
import numpy as np

def print_ccre_info(adata, ccre_key):
    # mean chromatin accessibility for a specific CCRE
    print(f"cCRE: {ccre_key}")
    print(f"Mean accessibility: {np.mean(adata.X[:, adata.var_names.get_loc(ccre_key)])}")

    # fraction of cells in cell type
    nonzero_fraction = (adata.X[:, adata.var_names.get_loc(ccre_key)] != 0).sum()
    cell_total = (adata.X[:, adata.var_names.get_loc(ccre_key)]).shape[0]
    if cell_total > 0:
        print(f"Open Percentage: {nonzero_fraction / cell_total * 100:.2f}%")
    else:
        print("Open Percentage: 0.00%")


CCRE_KEYS = [
    "chr16:15947721-15948121", 
    "chr16:15948723-15949123", 
    "chr16:48155636-48156136",
    "chr16:48156659-48157158", 
    "chr16:48157334-48157734", 
    "chr16:48247311-48247711", 
    "chr21:14271631-14272131", 
    "chr21:14272601-14273101", 
    "chr21:14273789-14274288", 
    "chr6:43427045-43428279", 
    "chr6:43768813-43769358", 
    "chr6:43769442-43769941", 
    "chr6:43769973-43770373", 
    "chr12:15972817-15973217", 
    "chr12:15976222-15976721", 
    "chr12:15977368-15977768", 
    "chr14:34035125-34035525", 
    "chr14:34035627-34036191", 
    "chr14:34036397-34036896", 
    "chr14:34038216-34038616", 
    "chr14:34038653-34039053", 
    "chr14:34462655-34463154", 
    "chr14:34465274-34465674",
    "chr14:34466526-34467026",
]


"""
    Tumor 
"""
print()
print("-" * 40)
print("Tumor Cells")
print("-" * 40)
print()

data_dir = '../data/insilico/raw_h5ad/'
h5ad_files = glob.glob(os.path.join(data_dir, '*-T*_ccRCC.h5ad'))
for h5ad_file in h5ad_files:
    adata = sc.read_h5ad(h5ad_file)
    adata = adata[adata.obs['Sample_type'] == 'Tumor']

    print(f"File: {os.path.basename(h5ad_file)}")
    print(f"Total cells: {adata.shape[0]}")
    print()

    for ccre_key in CCRE_KEYS:
        print_ccre_info(adata, ccre_key)  
    
    print()
    print("-" * 40)
    print()

"""
    Normal 
"""
print()
print("-" * 40)
print("Normal Cells")
print("-" * 40)
print()

data_dir = '../data/insilico/raw_h5ad/'
h5ad_files = glob.glob(os.path.join(data_dir, '*-N*_ccRCC.h5ad'))
for h5ad_file in h5ad_files:
    adata = sc.read_h5ad(h5ad_file)
    adata = adata[adata.obs['Sample_type'] == 'Normal']

    print(f"File: {os.path.basename(h5ad_file)}")
    print(f"Total cells: {adata.shape[0]}")
    print()

    for ccre_key in CCRE_KEYS:
        print_ccre_info(adata, ccre_key)  

    print()
    print("-" * 40)
    print()



----------------------------------------
Tumor Cells
----------------------------------------


----------------------------------------
Normal Cells
----------------------------------------



In [None]:
import glob
import os
import scanpy as sc
import numpy as np
import pandas as pd

def extract_ccre_info(adata, ccre_key):
    acc_values = adata.X[:, adata.var_names.get_loc(ccre_key)]
    mean_acc = np.mean(acc_values)
    open_fraction = (acc_values != 0).sum() / acc_values.shape[0] * 100 if acc_values.shape[0] > 0 else 0.0
    return mean_acc, open_fraction

def process_files(label, pattern, output_csv):
    data_dir = '../data/insilico/raw_h5ad/'
    h5ad_files = glob.glob(os.path.join(data_dir, pattern))

    results = []

    print()
    print("-" * 40)
    print(f"{label} Cells")
    print("-" * 40)
    print()

    for h5ad_file in h5ad_files:
        adata = sc.read_h5ad(h5ad_file)
        adata = adata[adata.obs['Sample_type'] == label]

        sample_name = os.path.basename(h5ad_file)
        print(f"File: {sample_name}")
        print(f"Total cells: {adata.shape[0]}")
        print()

        for ccre_key in CCRE_KEYS:
            mean_acc, open_pct = extract_ccre_info(adata, ccre_key)
            results.append({
                "File": sample_name,
                "cCRE": ccre_key,
                "CellCount": adata.shape[0],
                "MeanAccessibility": mean_acc,
                "OpenPercentage": open_pct
            })

    # Save to CSV
    df = pd.DataFrame(results)
    df.to_csv(output_csv, index=False)
    print(f"✅ Saved: {output_csv}")
    print()

# Target cCREs
CCRE_KEYS = [
    "chr16:15947721-15948121", "chr16:15948723-15949123", "chr16:48155636-48156136",
    "chr16:48156659-48157158", "chr16:48157334-48157734", "chr16:48247311-48247711",
    "chr21:14271631-14272131", "chr21:14272601-14273101", "chr21:14273789-14274288",
    "chr6:43427045-43428279", "chr6:43768813-43769358", "chr6:43769442-43769941",
    "chr6:43769973-43770373", "chr12:15972817-15973217", "chr12:15976222-15976721",
    "chr12:15977368-15977768", "chr14:34035125-34035525", "chr14:34035627-34036191",
    "chr14:34036397-34036896", "chr14:34038216-34038616", "chr14:34038653-34039053",
    "chr14:34462655-34463154", "chr14:34465274-34465674", "chr14:34466526-34467026",
]

# Run for Tumor and Normal
process_files(label="Tumor", pattern="*-T*_ccRCC.h5ad", output_csv="ccre_tumor_stats.csv")
process_files(label="Normal", pattern="*-N*_ccRCC.h5ad", output_csv="ccre_normal_stats.csv")


In [None]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as mcolors
import matplotlib.gridspec as gridspec

# --- 데이터 설정 ---
gene = 'ABCC1'
regions = ['chr16:15947721-15948121', 'chr6:43427045-43428279']
cell_types = ['Cancer cell', 'Normal cell']
data = {
    ('chr16:15947721-15948121', 'Normal cell'): (0.02, 5),
    ('chr16:15947721-15948121', 'Cancer cell'): (0.03, 8),
    ('chr6:43427045-43428279', 'Normal cell'): (0.1, 12),
    ('chr6:43427045-43428279', 'Cancer cell'): (0.3, 18),
}
accessibility_values = [acc for acc, _ in data.values()]
vmin, vmax = 0.0, max(accessibility_values)
cmap = cm.Reds
norm = mcolors.Normalize(vmin=vmin, vmax=vmax)

# --- Figure + GridSpec 설정 ---
wight, height = len(regions)-1, len(cell_types)-1
fig = plt.figure(figsize=(3*wight, 2*height))
gs = gridspec.GridSpec(nrows=2, ncols=2, 
                       height_ratios=[2, 1], width_ratios=[2, 2], figure=fig, 
                       hspace=0.4, wspace=0.2
                       )

# (1) 메인 버블 차트
main_ax = fig.add_subplot(gs[0, 0])
for i, region in enumerate(regions):
    for j, cell_type in enumerate(cell_types):
        acc, pct = data.get((region, cell_type), (0, 0))
        color = cmap(norm(acc))  
        main_ax.scatter(i, j, s=pct * 10, c=[color], edgecolors='black')

main_ax.set_xticks([i for i in range(len(regions))])
main_ax.set_xticklabels(regions, rotation=45, ha='right')
main_ax.set_yticks([j for j in range(len(cell_types))])
main_ax.set_yticklabels(cell_types)
main_ax.set_xlim(-1, len(regions))
main_ax.set_ylim(-1, len(cell_types))
main_ax.set_title(f'{gene}')

# (2) size 범례 subplot 
size_ax = fig.add_subplot(gs[0, 1])  
sizes = [5, 10, 15, 20]
size_scale = 10
x = range(len(sizes))
y = [0.1] * len(sizes)
for i, s in enumerate(sizes):
    size_ax.scatter(x[i], y[i], s=s * size_scale, color='gray')
    size_ax.vlines(x[i], -0.35, -0.25, color='black', linewidth=1)
    size_ax.text(x[i], -0.45, f"{s}", ha='center', va='top', fontsize=9)
size_ax.text(
    0.5 * (x[0] + x[-1]), 0.45,
    "Fraction of cells\nin cell type (%)",
    ha='center', va='bottom', fontsize=10
)
size_ax.set_xlim(-0.5, len(sizes) - 0.5)
size_ax.set_ylim(-0.8, 1.0)
size_ax.axis('off')

# (3) colorbar subplot
cbar_ax = fig.add_subplot(gs[1, 1])
pos = cbar_ax.get_position()
new_height = pos.height * 0.6
new_pos = [pos.x0, pos.y0, pos.width, new_height]
cbar_ax.set_position(new_pos)
sm = cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = plt.colorbar(sm, cax=cbar_ax, orientation='horizontal')
cbar.ax.text(
    0.5, 1.2,
    'Mean accessibility\nin cell type',
    ha='center', va='bottom',
    fontsize=10,
    transform=cbar.ax.transAxes
)

# plot
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as mcolors
import matplotlib.gridspec as gridspec

# --- 데이터 설정 ---
gene = 'ABCC1'
regions = [
    "chr16:15947721-15948121", 		
    "chr16:15948723-15949123", 		
    "chr16:48155636-48156136",		
    "chr16:48156659-48157158", 		
    "chr16:48157334-48157734",		
    "chr16:48247311-48247711", 		
    "chr21:14271631-14272131", 		
    "chr21:14272601-14273101", 		
    "chr21:14273789-14274288", 		
    "chr6:43427045-43428279", 		
    "chr6:43768813-43769358", 		
    "chr6:43769442-43769941", 		
    "chr6:43769973-43770373", 		
    "chr12:15972817-15973217", 		
    "chr12:15976222-15976721", 		
    "chr12:15977368-15977768", 		
    "chr14:34035125-34035525", 		
    "chr14:34035627-34036191", 		
    "chr14:34036397-34036896", 		
    "chr14:34038216-34038616", 		
    "chr14:34038653-34039053", 		
    "chr14:34462655-34463154", 		
    "chr14:34465274-34465674", 		
    "chr14:34466526-34467026",	
]
cell_types = ['Cancer cell', 'Normal cell']
data = {
    # Cancer cell
    ("chr16:15947721-15948121", 'Cancer cell'):	(0.002363281, 0.16),	
    ("chr16:15948723-15949123", 'Cancer cell'):	(0.012926, 0.73),	
    ("chr16:48155636-48156136",	'Cancer cell'):	(0.003674774, 0.25),
    ("chr16:48156659-48157158", 'Cancer cell'):	(0.018604245, 1.11),	
    ("chr16:48157334-48157734",	'Cancer cell'):	(0.002694609, 0.20),
    ("chr16:48247311-48247711", 'Cancer cell'):	(0.006167485, 0.37),
    ("chr21:14271631-14272131", 'Cancer cell'):	(0.0011674,	0.09),
    ("chr21:14272601-14273101", 'Cancer cell'):	(0.001556534, 0.12),
    ("chr21:14273789-14274288", 'Cancer cell'):	(0.021068477, 1.24),
    ("chr6:43427045-43428279", 'Cancer cell'):	(0.60017637, 27.75),
    ("chr6:43768813-43769358", 'Cancer cell'):	(0.37205393, 17.72),
    ("chr6:43769442-43769941", 'Cancer cell'):	(0.405887107, 21.33),
    ("chr6:43769973-43770373", 'Cancer cell'):	(0.42217799, 21.78),
    ("chr12:15972817-15973217", 'Cancer cell'):	(0.006139004, 0.42),
    ("chr12:15976222-15976721", 'Cancer cell'):	(0.001743769, 0.16),
    ("chr12:15977368-15977768", 'Cancer cell'):	(0.005403881, 0.40),
    ("chr14:34035125-34035525", 'Cancer cell'):	(0.104215596, 6.09),
    ("chr14:34035627-34036191", 'Cancer cell'):	(0.53657506, 24.46),
    ("chr14:34036397-34036896", 'Cancer cell'):	(0.013444564, 0.87),
    ("chr14:34038216-34038616", 'Cancer cell'):	(0.018243594, 1.30),
    ("chr14:34038653-34039053", 'Cancer cell'):	(0.077095245, 4.49),
    ("chr14:34462655-34463154", 'Cancer cell'):	(0.04131554, 2.75),
    ("chr14:34465274-34465674", 'Cancer cell'):	(0.00916494, 0.62),
    ("chr14:34466526-34467026",	'Cancer cell'): (0.003991439, 0.26),
    # Normal cell
    ("chr16:15947721-15948121", 'Normal cell'):	(0.000106022,	0.77),
    ("chr16:15948723-15949123", 'Normal cell'):	(0.00360475,	2.78),
    ("chr16:48155636-48156136",	'Normal cell'):	(0.003816794,	0.95),
    ("chr16:48156659-48157158", 'Normal cell'):	(0.01749364,	5.57),
    ("chr16:48157334-48157734",	'Normal cell'):	(0.000212044,	1.07),
    ("chr16:48247311-48247711", 'Normal cell'):	(0.003816794,	1.98),
    ("chr21:14271631-14272131", 'Normal cell'):	(0.000636132,	0.21),	
    ("chr21:14272601-14273101", 'Normal cell'):	(0.002438507,	0.53),
    ("chr21:14273789-14274288", 'Normal cell'):	(0.013252756,	3.49),	
    ("chr6:43427045-43428279", 	'Normal cell'):	(0.17917727,	36.77),
    ("chr6:43768813-43769358", 	'Normal cell'):	(0.00879983,	4.00),
    ("chr6:43769442-43769941", 	'Normal cell'):	(0.067324,	19.95),
    ("chr6:43769973-43770373", 	'Normal cell'):	(0.10782443,	23.03),
    ("chr12:15972817-15973217", 'Normal cell'):	(0.002756573,	1.15),
    ("chr12:15976222-15976721", 'Normal cell'):	(0.001484309,	0.53),
    ("chr12:15977368-15977768", 'Normal cell'):	(0.001696353,	1.27),
    ("chr14:34035125-34035525", 'Normal cell'):	(0.000848176,	0.53),
    ("chr14:34035627-34036191", 'Normal cell'):	(0.002014419,	0.80),
    ("chr14:34036397-34036896", 'Normal cell'):	(0.000636132,	0.41),
    ("chr14:34038216-34038616", 'Normal cell'):	(0.002014419,	0.98),
    ("chr14:34038653-34039053", 'Normal cell'):	(0.01759966,	3.32),
    ("chr14:34462655-34463154", 'Normal cell'):	(0.015797287,	9.18),
    ("chr14:34465274-34465674", 'Normal cell'):	(0.002968617,	2.13),
    ("chr14:34466526-34467026",	'Normal cell'): (0.001484309,	0.98),
}
accessibility_values = [acc for acc, _ in data.values()]
vmin, vmax = 0.0, max(accessibility_values)
cmap = cm.Reds
norm = mcolors.Normalize(vmin=vmin, vmax=vmax)

In [None]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as mcolors

# 공통 설정
gene = 'ABCC1'
regions = [ ... ]  # 기존과 동일 (생략)
cell_types = ['Cancer cell', 'Normal cell']
data = { ... }     # 기존과 동일 (생략)

accessibility_values = [acc for acc, _ in data.values()]
vmin, vmax = 0.0, max(accessibility_values)
cmap = cm.Reds
norm = mcolors.Normalize(vmin=vmin, vmax=vmax)

# ----------- 1. 메인 버블 차트 -----------
fig1, ax1 = plt.subplots(figsize=(0.6*len(regions), 2.5))
for i, region in enumerate(regions):
    for j, cell_type in enumerate(cell_types):
        acc, pct = data.get((region, cell_type), (0, 0))
        color = cmap(norm(acc))
        ax1.scatter(i, j, s=pct * 10, c=[color], edgecolors='black')

ax1.set_xticks([i for i in range(len(regions))])
ax1.set_xticklabels(regions, rotation=45, ha='right')
ax1.set_yticks([j for j in range(len(cell_types))])
ax1.set_yticklabels(cell_types)
ax1.set_xlim(-1, len(regions))
ax1.set_ylim(-1, len(cell_types))
ax1.set_title(f'{gene}: Bubble chart')
plt.tight_layout()
plt.show()


# ----------- 2. Size 범례 차트 -----------
fig2, ax2 = plt.subplots(figsize=(4, 2))
sizes = [5, 10, 15, 20]
size_scale = 10
x = range(len(sizes))
y = [0.1] * len(sizes)

for i, s in enumerate(sizes):
    ax2.scatter(x[i], y[i], s=s * size_scale, color='gray')
    ax2.vlines(x[i], -0.35, -0.25, color='black', linewidth=1)
    ax2.text(x[i], -0.45, f"{s}", ha='center', va='top', fontsize=9)

ax2.text(
    0.5 * (x[0] + x[-1]), 0.45,
    "Fraction of cells\nin cell type (%)",
    ha='center', va='bottom', fontsize=10
)
ax2.set_xlim(-0.5, len(sizes) - 0.5)
ax2.set_ylim(-0.8, 1.0)
ax2.axis('off')
plt.tight_layout()
plt.show()


# ----------- 3. Colorbar 차트 -----------
fig3, ax3 = plt.subplots(figsize=(4, 1.2))
sm = cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = plt.colorbar(sm, cax=ax3, orientation='horizontal')
cbar.ax.text(
    0.5, 1.2,
    'Mean accessibility\nin cell type',
    ha='center', va='bottom',
    fontsize=10,
    transform=cbar.ax.transAxes
)
plt.tight_layout()
plt.show()
