### Usando un archivo PDB
Se utilizó un archivo de una proteína x que encontré en PDB

In [2]:
!pip install Bio

Collecting Bio
  Downloading bio-1.7.1-py3-none-any.whl.metadata (5.7 kB)
Collecting biopython>=1.80 (from Bio)
  Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting gprofiler-official (from Bio)
  Downloading gprofiler_official-1.0.0-py3-none-any.whl.metadata (11 kB)
Collecting mygene (from Bio)
  Downloading mygene-3.2.2-py2.py3-none-any.whl.metadata (10 kB)
Collecting biothings-client>=0.2.6 (from mygene->Bio)
  Downloading biothings_client-0.4.1-py3-none-any.whl.metadata (10 kB)
Downloading bio-1.7.1-py3-none-any.whl (280 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m281.0/281.0 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m26.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading gprofiler_official-1.0.0-py3-none-any.whl (9.3

In [3]:
# Contact Map Logic

from Bio.PDB import PDBParser
import numpy as np
from sklearn.decomposition import PCA

def generate_contact_map(pdb_file):
    # Parse the PDB file
    parser = PDBParser()
    structure = parser.get_structure("protein", pdb_file)
    model = structure[0]

    # Get the coordinates of the C-alpha atoms
    c_alpha_atoms = []
    for chain in model:
        for residue in chain:
            try:
                c_alpha_atoms.append(residue["CA"].get_coord())
            except KeyError:
                continue

    # Calculate the pairwise distances between C-alpha atoms
    n_atoms = len(c_alpha_atoms)
    distances = np.zeros((n_atoms, n_atoms))
    for i in range(n_atoms):
        for j in range(i + 1, n_atoms):
            distances[i, j] = np.linalg.norm(c_alpha_atoms[i] - c_alpha_atoms[j])
            distances[j, i] = distances[i, j]

    # Define a threshold distance for determining contacts
    threshold = 8.0

    # Generate the contact map
    contact_map = np.where(distances <= threshold, 1, 0)

    # Apply PCA for dimensionality reduction
    pca = PCA(n_components=0.99)
    contact_map_1d = pca.fit_transform(contact_map)

    # Flatten the contact map into a 1D array
    contact_map_1d = contact_map_1d.flatten()

    return contact_map_1d

In [7]:
archivo= "/content/prueba.pdb"

In [9]:
contactmap= generate_contact_map(archivo)



In [15]:
contactmap

array([-0.03996882,  0.05989579, -0.01521856, ...,  0.0211225 ,
       -0.02227754,  0.07112611])

In [13]:
contactmap.shape

(317998,)

In [14]:
contactmap.ndim

1

In [17]:
contactmap.size

317998

### Utilizando un archivo PDB descargado de alfafold

In [18]:
archivo_alfa= "/content/pruebaalfa.pdb"

In [19]:
contactmap_alfa= generate_contact_map(archivo_alfa)

In [20]:
contactmap_alfa

array([-0.04434061, -0.10327664,  0.12603272, ..., -0.00455159,
        0.02786685,  0.08509771])

In [21]:
contactmap_alfa.size

252630