- Prepare input for FitHiC2
- Run FitHiC2
- Filter results

In [1]:
import pandas as pd
import numpy as np
import cooler

# Create input for fithic

In [7]:
# Load hi-c matrices
res = 100_000

# clr_path = {'NeuN+': "NeuNplus.our_and_Hu2021_merged.mcool",
#             'NeuN-': "NeuNminus.our_and_Hu2021_merged.mcool"}
# clr_path = {'NeuN+': "NeuNplus.our.mcool",
#             'NeuN-': "NeuNminus.our.mcool"}
# clr_path = {'NeuN+': "NeuNpos.Hu2021.mcool",
#             'NeuN-': "NeuNneg.Hu2021.mcool"}
# clr_path = {'EN': "MTG.EN.2786_cells.10kb.mcool",
#             'IN': "MTG.IN.2786_cells.mcool"}
clr_path = {'ASC': "MTG_LEC_A46_FI_M1C.ASC.2719_cells.10kb.mcool",
            'ODC': "MTG_LEC_A46_FI_M1C.ODC.2719_cells.10kb.mcool"}

# grp = 'NeuN+'
ct = 'ODC'

clr = cooler.Cooler(clr_path[ct] + f'::resolutions/{res}')

In [8]:
# Get bias values
bins = clr.bins()[:]
bins = bins.loc[bins['chrom'] != 'chrM']
bins['midpoint'] = ( (bins['start'] + bins['end']) / 2 ).astype(int)

# From fithic paper: "As long as the bias values are scaled to have an average of 1 
# and high values represent loci with higher overall raw counts, 
# FitHiC2 will be able to use them in significance assignment."
bins['bias'] = (1 / bins['weight']) / (1 / bins['weight']).mean()
bins.head()

Unnamed: 0,chrom,start,end,weight,midpoint,bias
0,chr1,0,100000,,50000,
1,chr1,100000,200000,,150000,
2,chr1,200000,300000,,250000,
3,chr1,300000,400000,,350000,
4,chr1,400000,500000,,450000,


In [9]:
# Save bias values to file
# out_name = f'fithic.bias.HC_{ct}.ourAndHu2021_merged.sampled.{res // 1000}kb.txt.gz'
# out_name = f'fithic.bias.HC_{ct}.our_merged.sampled.{res // 1000}kb.txt.gz'
out_name = f'fithic.bias.{ct}.MTG.Tian2023.sampled.{res // 1000}kb.txt.gz'

bins[['chrom', 'midpoint', 'bias']]\
    .to_csv(out_name, sep='\t', header=False, index=False, na_rep=-1)

In [10]:
# Get fragments values
# fr_out_name = f'fithic.fragments.HC_{ct}.ourAndHu2021_merged.sampled.{res // 1000}kb.txt.gz'
# fr_out_name = f'fithic.fragments.HC_{ct}.our_merged.sampled.{res // 1000}kb.txt.gz'
# fr_out_name = f'fithic.fragments.HC_{ct}.Hu2021.sampled.{res // 1000}kb.txt.gz'
fr_out_name = f'fithic.fragments.{ct}.MTG.Tian2023.sampled.{res // 1000}kb.txt.gz'


mtx_arr = np.zeros(bins.shape[0])

# Get column sums of cis contacts
k=0
for i, chrom1 in enumerate(clr.chromnames):
    for j, chrom2 in enumerate(clr.chromnames):
        if i <= j:
            mtx = clr.matrix(balance=False).fetch(chrom1, chrom2)
            if i == j:
                mtx[np.arange(mtx.shape[0]), np.arange(mtx.shape[0])] = 0 # del 1st diag
            mtx_colsum = np.sum(mtx, axis=1)
            mtx_arr[k:k+len(mtx_colsum)] += mtx_colsum
            
    k+=len(mtx_colsum)
    
bins['colsum'] = mtx_arr.astype(int)

# Save fragments values to a file
bins[['chrom', 'start', 'midpoint', 'colsum', 'end']]\
    .to_csv(fr_out_name, sep='\t', header=False, index=False)

In [11]:
# Get and save interaction values
# in_out_name = f'fithic.interactions.HC_{ct}.ourAndHu2021_merged.sampled.{res // 1000}kb.txt.gz'
# in_out_name = f'fithic.interactions.HC_{ct}.our_merged.sampled.{res // 1000}kb.txt.gz'
in_out_name = f'fithic.interactions.{ct}.MTG.Tian2023.sampled.{res // 1000}kb.txt.gz'

for i, chrom in enumerate(clr.chromnames):
    pix = clr.pixels(join=True).fetch(chrom)
    pix = pix.loc[pix['count'] > 0]
    pix.loc[:, 'count'] = pix['count'].astype(int)
    pix['midpoint1'] = ( (pix['start1'] + pix['end1']) / 2 ).astype(int)
    pix['midpoint2'] = ( (pix['start2'] + pix['end2']) / 2 ).astype(int)
    if i == 0:
        pix[['chrom1', 'midpoint1', 'chrom2', 'midpoint2', 'count']]\
            .to_csv(in_out_name, sep='\t', header=False, index=False)
    else: # append
        pix[['chrom1', 'midpoint1', 'chrom2', 'midpoint2', 'count']]\
            .to_csv(in_out_name, mode='a', sep='\t', header=False, index=False)

pd.read_table(in_out_name, nrows=10).head()

Unnamed: 0,chr1,50000,chr1.1,50000.1,41
0,chr1,50000,chr1,150000,306
1,chr1,50000,chr1,250000,28
2,chr1,50000,chr1,350000,1
3,chr1,50000,chr1,650000,9
4,chr1,50000,chr1,850000,1


# Run fithic from console

# Filter fithic results to keep significant interactions

In [2]:
# Load fithic table and drop non-significant interactions
res = 100_000

# fithic_path = {'NeuN+': 'fithic.100kb.NeuN+.all/FitHiC.spline_pass1.res100000.significances.txt.gz',
#                'NeuN-': 'fithic.100kb.NeuN-.all/FitHiC.spline_pass1.res100000.significances.txt.gz'}
# fithic_path = {
#     'NeuN+': 'fithic.HC_NeuN+.our_merged.sampled.100kb.all/FitHiC.spline_pass1.res100000.significances.txt.gz',
#     'NeuN-': 'fithic.HC_NeuN-.our_merged.sampled.100kb.all/FitHiC.spline_pass1.res100000.significances.txt.gz'
# }
# fithic_path = {
#     'NeuN+': 'fithic.HC_NeuN+.Hu2021.sampled.100kb.all/FitHiC.spline_pass1.res100000.significances.txt.gz',
#     'NeuN-': 'fithic.HC_NeuN-.Hu2021.sampled.100kb.all/FitHiC.spline_pass1.res100000.significances.txt.gz'
# }
fithic_path = {
    'EN': 'fithic.EN.MTG.Tian2023.sampled.100kb.all/FitHiC.spline_pass1.res100000.significances.txt.gz',
    'IN': 'fithic.IN.MTG.Tian2023.sampled.100kb.all/FitHiC.spline_pass1.res100000.significances.txt.gz'
}
cts = list(fithic_path.keys())

fithic = {
    ct: pd.read_table( 
            path, 
            usecols=['chr1', 'fragmentMid1', 'chr2', 'fragmentMid2', 'q-value'], 
            dtype={'chr1': 'category', 'fragmentMid1': int, 'chr2': 'category', 'fragmentMid2': int},
        )
    for ct, path in fithic_path.items()
}

for ct in cts:
    print(ct)
    fithic[ct] = fithic[ct].loc[fithic[ct]['q-value'] < 0.05].reset_index(drop=True)\
                           .rename(columns={'chr1': 'chrom1', 'chr2': 'chrom2'})
    fithic[ct]['start1'] = ( fithic[ct]['fragmentMid1'] - res // 2 ).astype(int)
    fithic[ct]['end1'] = ( fithic[ct]['fragmentMid1'] + res // 2 ).astype(int)
    fithic[ct]['start2'] = ( fithic[ct]['fragmentMid2'] - res // 2 ).astype(int)
    fithic[ct]['end2'] = ( fithic[ct]['fragmentMid2'] + res // 2 ).astype(int)

fithic[ct].head()

EN
IN


Unnamed: 0,chrom1,fragmentMid1,chrom2,fragmentMid2,q-value,start1,end1,start2,end2
0,chr1,50000,chr2,113550000,6.752831e-14,0,100000,113500000,113600000
1,chr1,50000,chr2,113650000,0.002436446,0,100000,113600000,113700000
2,chr1,50000,chr2,242146764,3.434134e-07,0,100000,242096764,242196764
3,chr1,50000,chr4,50000,2.7290820000000003e-31,0,100000,0,100000
4,chr1,50000,chr4,190150000,2.004531e-36,0,100000,190100000,190200000


In [3]:
# Save significant fithic interactions
# for ct in ["NeuN+", 'NeuN-']:
#     fithic[ct][['chrom1', 'start1', 'end1', 'chrom2', 'start2', 'end2', 'q-value']]\
#         .to_pickle(f"fithic.100kb.{ct}.our_merged.sampled.all.qvalue_0.05.pkl")
# for ct in ["NeuN+", 'NeuN-']:
#     fithic[ct][['chrom1', 'start1', 'end1', 'chrom2', 'start2', 'end2', 'q-value']]\
#         .to_pickle(f"fithic.100kb.{ct}.Hu2021.sampled.all.qvalue_0.05.pkl")
for ct in cts:
    fithic[ct][['chrom1', 'start1', 'end1', 'chrom2', 'start2', 'end2', 'q-value']]\
        .to_pickle(f"fithic.100kb.{ct}.MTG.Tian2023.sampled.all.qvalue_0.05.pkl")