In [44]:
import numpy as np
import os
import pandas as pd
from matplotlib.colors import LogNorm
import networkx as nx
import matplotlib.pyplot as plt
%matplotlib inline
import PIL.Image
from read_HiC import name_chromosomes

import sys
path='/Users/jialechen/Desktop/PhD/CT/Pang_2022_GenomeBiol_3D/functions/'
sys.path.append(path)

from plotting_tools import set_layout
from genome_topology import normalize_psc
from genome_topology import get_matrix
from genome_topology import fractal_dimension
from genome_topology import make_graph

In [45]:
import os
current_path = os.getcwd()
print("Current working directory:", current_path)

Current working directory: /Users/jialechen/Desktop/PhD/CT/Pang_2022_GenomeBiol_3D


In [46]:
import os
new_path = '/Users/jialechen/Desktop/PhD/CT/Pang_2022_GenomeBiol_3D'
os.chdir(new_path)
print("Changed working directory to:", new_path)

Changed working directory to: /Users/jialechen/Desktop/PhD/CT/Pang_2022_GenomeBiol_3D


## CIRCUIT TOPOLOGY ANALISIS OF CHROMOSOME FROM SINGLE CELL Hi-C CONTACT LISTS

This notebook processes single cell Hi-C contact lists (in txt format) to extract topological parameters such as:
- CT parameters (percentage of series, parallel and cross, number of contacts) 
- Network clustering coefficient
- Fractal dimension (calculated over the CT topology matrix)

Load contact pair data file. Each file contains contacts from one cell.

In [48]:
n_all_chr=22 #human
chr_vec=name_chromosomes(n_all_chr)
save_data= True
save_matrix= True
plot_matrix=True

Set path for printing results

Calculate topological parameters looping over all chromosomes in a cell

In [49]:
P=np.zeros(n_all_chr)
S=np.zeros(n_all_chr)
X=np.zeros(n_all_chr)
Dim_fractal = np.zeros(n_all_chr)
clustering = np.zeros(n_all_chr)
r2_fractalfit = np.zeros(n_all_chr)
N_contacts=np.zeros(n_all_chr)

In [None]:
import os

# 假设你的文件都在 'data' 目录下
data_dir = 'data'

# 获取 'data' 目录下所有以 'GSM6081054_Cell_ID_' 开头，以 '.contact' 结尾的文件名
file_names = [f for f in os.listdir(data_dir) if f.startswith('GSM') and f.endswith('.txt')]

# 初始化结果数据结构
all_results = []

# Loop through file names
for file in file_names:
    # 构建文件的完整路径
    file_path = os.path.join(data_dir, file)

    # 读取数据
    contacts = pd.read_csv(file_path, sep='\t')

    # 提取 cell ID
    cell = file[11:20]
    path_savematrix='results/matrices/{}'.format(cell)
    path_savedata='results/CT parameters'

    # 初始化结果数组
    N_contacts = np.zeros(len(chr_vec))
    P = np.zeros(len(chr_vec))
    S = np.zeros(len(chr_vec))
    X = np.zeros(len(chr_vec))
    Dim_fractal = np.zeros(len(chr_vec))
    clustering = np.zeros(len(chr_vec))
    r2_fractalfit = np.zeros(len(chr_vec))

    # Loop through chromosomes
    for t, chrom in enumerate(chr_vec):
        contacts_chr = contacts[(contacts['chr_A'] == chrom) & (contacts['chr_B'] == chrom)]
        index = [contacts_chr['pos_A'], contacts_chr['pos_B']]
        index = np.array(index)
        index = np.transpose(index)
        N_contacts[t] = len(index)

        mat, psc = get_matrix(index, chrom)
        P[t], S[t], X[t] = normalize_psc(psc, N_contacts[t])
        Dim_fractal[t], r2_fractalfit[t] = fractal_dimension(mat, plot_fig=0)
        G = make_graph(index)
        clustering[t] = nx.average_clustering(G)

        if plot_matrix:
            plt.figure(figsize=(5, 5))
            plt.imshow(mat)
            plt.title('{}'.format(chrom))

        if save_matrix:
            os.makedirs(path_savematrix, exist_ok=True) 
            PIL.Image.fromarray(mat.astype(float)).save(
                "{}/top_matrix_{}_{}.tif".format(path_savematrix,cell, chrom))
           
        #Save results
        if save_data:
            topology_parameters = {'Parallel (%)':P, 'Series (%)':S, 'Cross (%)':X,
                           'N contacts': N_contacts, 'Fractal dimension':Dim_fractal, 
                           'r squared': r2_fractalfit, 'Clustering': clustering}
    
            topology_parameters= pd.DataFrame(topology_parameters)
    
            topology_parameters.to_csv('{}/Top_parameters_{}.csv'.format(path_savedata, cell))
            

    # 创建包含样本名的 DataFrame
    topology_parameters = {
        'Sample': [cell] * len(chr_vec),  # 添加样本名列
        'Parallel (%)': P,
        'Series (%)': S,
        'Cross (%)': X,
        'N contacts': N_contacts,
        'Fractal dimension': Dim_fractal,
        'r squared': r2_fractalfit,
        'Clustering': clustering
    }

    all_results.append({
        'cell': cell,
        'topology_parameters': pd.DataFrame(topology_parameters)
    })

# 合并所有结果到一个 DataFrame
all_results_df = pd.concat([result['topology_parameters'] for result in all_results], ignore_index=True)

# 保存所有结果到 CSV 文件
if save_data:
    all_results_df.to_csv('{}/Top_parameters_all_samples.csv'.format(path_savedata), index=False)


  plt.figure(figsize=(5, 5))
