In [19]:
import numpy as np
import matplotlib.pyplot as plt
import PIL.Image
import networkx as nx
import pandas as pd
import string

from functions.genome_topology import open_pdb
from functions.genome_topology import select_chrom
from functions.genome_topology import geom_distance
from functions.genome_topology import make_graph
from functions.genome_topology import fractal_dimension
from functions.genome_topology import get_matrix
from functions.genome_topology import normalize_psc

In [20]:
def load_pdb(file_path):
    """
    Load a PDB file and extract atom coordinates.

    Parameters:
    file_path (str): Path to the PDB file.

    Returns:
    int: The number of atoms.
    list of tuples: List where each tuple represents the coordinates (x, y, z) of an atom.
    """
    coordinates = []  # List to hold the coordinates
    with open(file_path, 'r') as pdb_file:
        for line in pdb_file:
            if line.startswith('ATOM') or line.startswith('HETATM'):
                # Extract the x, y, and z coordinates for each atom
                x = float(line[30:38].strip())
                y = float(line[38:46].strip())
                z = float(line[46:54].strip())
                coordinates.append((x, y, z))  # Add the coordinates as a tuple to the list
    
    n = len(coordinates)  # Number of atoms
    return n, coordinates  # Return the number of atoms and the list of coordinates


def main(parameters, path_data,path_results):
    letters=list(string.ascii_lowercase)
    chr_vec=['chr {}'.format(letter) for letter in letters[:n_all_chr]]
    cell=path_data[-5:] 
    
    for n_chr, chrom in enumerate(chr_vec):
        print('analyzing the chrom',chrom)
        
        # n, coord= select_chrom(n_chr, path_data) 
        n, coord_list = load_pdb(path_data)
        coord = np.array(coord_list)  # Convert list of tuples into NumPy array       
        iterations=int(n/parameters['resolution'])
        start_iteration= int(parameters['init']/parameters['resolution'])
        
        Parallel=np.zeros(iterations)
        Series=np.zeros(iterations)
        Cross=np.zeros(iterations)
        Dim_fractal=np.zeros(iterations)
        r2_fractalfit=np.zeros(iterations)
        N_contacts=np.zeros(iterations)
        clustering= np.zeros(iterations)
        
        for t in range(start_iteration,iterations):
            
            n_atoms=int(resolution*(t+1))
            print('analyzing the n_tom',n_atoms)
            coord_cut=coord[0: n_atoms]  
            dist, N_contacts[t], index=geom_distance(coord_cut, 
            parameters['cutoff'], parameters['neighbors'])
             
            try:
                
                mat, stats = get_matrix(index,chrom)
                Parallel[t], Series[t], Cross[t]=normalize_psc(stats,N_contacts[t])
                Dim_fractal[t], r2_fractalfit[t]=fractal_dimension(mat, plot_fig=0)
                G=make_graph(index)
                clustering[t]= nx.average_clustering(G)
            except:
                print('WARNING: NOT ENOUGH CONTACTS FOR ANALYSIS')
        
        topology_parameters = {'Parallel (%)':Parallel, 'Series (%)':Series, 
        'Cross (%)':Cross, 'N contacts': N_contacts,
        'Fractal dimension':Dim_fractal, 'r squared': r2_fractalfit,
        'Clustering': clustering}
        topology_parameters= pd.DataFrame(topology_parameters)
        topology_parameters.to_csv('{}/Top_parameters_{}_{}.csv'.format(
            path_results, cell, chrom))

     
        
        
r_cutoff=1.0
neighbours=1
n_all_chr=20
resolution=5
start_from= 0
parameters={'cutoff':r_cutoff,'neighbors': neighbours,'N chromosomes': 
              n_all_chr, 'resolution': resolution, 'init': start_from}

In [None]:
import os

folder = './pdb/'
cell_ids = [f for f in os.listdir(folder) if f.startswith('Cell')]

for cell_id in cell_ids:
    path_data = os.path.join(folder, cell_id, 'chr_a.pdb')
    path_results = os.path.join('results', 'cumulative analysis', cell_id)
    
    if not os.path.exists(path_results):
        os.makedirs(path_results)
    
    main(parameters, path_data, path_results)


analyzing the chrom chr a
analyzing the n_tom 5
analyzing the n_tom 10
analyzing the n_tom 15
analyzing the n_tom 20
analyzing the n_tom 25
analyzing the n_tom 30
analyzing the n_tom 35
analyzing the n_tom 40
analyzing the n_tom 45
analyzing the n_tom 50
analyzing the n_tom 55
analyzing the n_tom 60
analyzing the n_tom 65
analyzing the n_tom 70
analyzing the n_tom 75
analyzing the n_tom 80
analyzing the n_tom 85
analyzing the n_tom 90
analyzing the n_tom 95
analyzing the n_tom 100
analyzing the n_tom 105
analyzing the n_tom 110
analyzing the n_tom 115
analyzing the n_tom 120
analyzing the n_tom 125
analyzing the n_tom 130
analyzing the n_tom 135
analyzing the n_tom 140
analyzing the n_tom 145
analyzing the n_tom 150
analyzing the n_tom 155
analyzing the n_tom 160
analyzing the n_tom 165
analyzing the n_tom 170
analyzing the n_tom 175
analyzing the n_tom 180
analyzing the n_tom 185
analyzing the n_tom 190
analyzing the n_tom 195
analyzing the n_tom 200
analyzing the n_tom 205
analyzing 

analyzing the n_tom 1675
analyzing the n_tom 1680
analyzing the n_tom 1685
analyzing the n_tom 1690
analyzing the n_tom 1695
analyzing the n_tom 1700
analyzing the n_tom 1705
analyzing the n_tom 1710
analyzing the n_tom 1715
analyzing the n_tom 1720
analyzing the n_tom 1725
analyzing the n_tom 1730
analyzing the n_tom 1735
analyzing the n_tom 1740
analyzing the n_tom 1745
analyzing the n_tom 1750
analyzing the n_tom 1755
analyzing the n_tom 1760
analyzing the n_tom 1765
analyzing the n_tom 1770
analyzing the n_tom 1775
analyzing the n_tom 1780
analyzing the n_tom 1785
analyzing the n_tom 1790
analyzing the n_tom 1795
analyzing the n_tom 1800
analyzing the n_tom 1805
analyzing the n_tom 1810
analyzing the n_tom 1815
analyzing the n_tom 1820
analyzing the n_tom 1825
analyzing the n_tom 1830
analyzing the n_tom 1835
analyzing the n_tom 1840
analyzing the n_tom 1845
analyzing the n_tom 1850
analyzing the n_tom 1855
analyzing the n_tom 1860
analyzing the n_tom 1865
analyzing the n_tom 1870


analyzing the n_tom 3315
analyzing the n_tom 3320
analyzing the n_tom 3325
analyzing the n_tom 3330
analyzing the n_tom 3335
analyzing the n_tom 3340
analyzing the n_tom 3345
analyzing the n_tom 3350
analyzing the n_tom 3355
analyzing the n_tom 3360
analyzing the n_tom 3365
analyzing the n_tom 3370
analyzing the n_tom 3375
analyzing the n_tom 3380
analyzing the n_tom 3385
analyzing the n_tom 3390
analyzing the n_tom 3395
analyzing the n_tom 3400
analyzing the n_tom 3405
analyzing the n_tom 3410
analyzing the n_tom 3415
analyzing the n_tom 3420
analyzing the n_tom 3425
analyzing the n_tom 3430
analyzing the n_tom 3435
analyzing the n_tom 3440
analyzing the n_tom 3445
analyzing the n_tom 3450
analyzing the n_tom 3455
analyzing the n_tom 3460
analyzing the n_tom 3465
analyzing the n_tom 3470
analyzing the n_tom 3475
analyzing the n_tom 3480
analyzing the n_tom 3485
analyzing the n_tom 3490
analyzing the n_tom 3495
analyzing the n_tom 3500
analyzing the n_tom 3505
analyzing the n_tom 3510


analyzing the n_tom 4955
analyzing the n_tom 4960
analyzing the n_tom 4965
analyzing the n_tom 4970
analyzing the n_tom 4975
analyzing the n_tom 4980
analyzing the n_tom 4985
analyzing the n_tom 4990
analyzing the n_tom 4995
analyzing the n_tom 5000
analyzing the n_tom 5005
analyzing the n_tom 5010
analyzing the n_tom 5015
analyzing the n_tom 5020
analyzing the n_tom 5025
analyzing the n_tom 5030
analyzing the n_tom 5035
analyzing the n_tom 5040
analyzing the n_tom 5045
analyzing the n_tom 5050
analyzing the n_tom 5055
analyzing the n_tom 5060
analyzing the n_tom 5065
analyzing the n_tom 5070
analyzing the n_tom 5075
analyzing the n_tom 5080
analyzing the n_tom 5085
analyzing the n_tom 5090
analyzing the n_tom 5095
analyzing the n_tom 5100
analyzing the n_tom 5105
analyzing the n_tom 5110
analyzing the n_tom 5115
analyzing the n_tom 5120
analyzing the n_tom 5125
analyzing the n_tom 5130
analyzing the n_tom 5135
analyzing the n_tom 5140
analyzing the n_tom 5145
analyzing the n_tom 5150
