In [1]:
import pandas as pd

hypo = pd.read_csv("hypomethylated_genes.bed", sep="\t", header=None)

In [2]:
hypo.columns = ["Chr", "Start", "End"]

In [3]:
hypo.head()

Unnamed: 0,Chr,Start,End
0,chr1,826206,827422
1,chr1,998962,1000072
2,chr1,1373730,1375338
3,chr1,2528745,2530145
4,chr1,2554724,2556911


In [4]:
# Load combined methylation matrix
cpg_matrix = pd.read_csv("/data/cephfs-1/work/groups/kühnen/users/cama15_c/meth_matrix_files/meth_matrix_10_samples.csv", sep=",")

cpg_matrix.rename(columns={'Chromosome': 'Chr'}, inplace=True)

In [5]:
# Identify coverage columns (ending in '_cov')
cov_cols = [col for col in cpg_matrix.columns if col.endswith('_cov')]

# Keep rows where at least 3 samples have coverage ≥ 10
cpg_filtered = (cpg_matrix[cov_cols] >= 10).sum(axis=1) >= 3
cpg_matrix_filtered = cpg_matrix[cpg_filtered].copy()

In [6]:
import pyranges as pr

# Convert your dataframes to PyRanges objects
# Make sure the column names are exactly: Chromosome, Start, End
hypo_pr = pr.PyRanges(hypo.rename(columns={"Chr":"Chromosome"}))
cpg_pr = pr.PyRanges(cpg_matrix_filtered.rename(columns={"Chr":"Chromosome"}))

# Find overlaps between hypomethylated regions and CpGs
overlaps = cpg_pr.join(hypo_pr)

# Convert back to DataFrame
matched_cpgs = overlaps.df

# Save results
matched_cpgs.to_csv("cpgs_within_hypo_regions.csv", sep=",", index=False)

In [7]:
print(matched_cpgs.head(10))

  Chromosome   Start     End  Naive_1_frac  Naive_1_cov  Formative_1_frac  \
0       chr1  826224  826226         0.125         16.0             0.633   
1       chr1  826732  826734         0.000         11.0             0.143   
2       chr1  826794  826796         0.053         19.0             0.000   
3       chr1  826798  826800         0.000         19.0             0.000   
4       chr1  826851  826853         0.000         17.0             0.000   
5       chr1  826863  826865         0.000         16.0             0.000   
6       chr1  827229  827231         0.000         12.0             0.000   
7       chr1  827233  827235         0.000         10.0             0.000   
8       chr1  827347  827349         0.091         11.0             0.000   
9       chr1  827357  827359         0.000         12.0             0.000   

   Formative_1_cov  Primed_1_frac  Primed_1_cov  Formative_Neuron_1_frac  ...  \
0             30.0          0.824          17.0                      0.