This is a notebook for fixing the residue numbers of my AF3 trimmed PDB files.

In [37]:
from biopandas.pdb import PandasPdb
import project_pipeline.scripts.utils as utils
import numpy as np
import pandas as pd
import os

In [38]:
def fix_trimmed(df, path):

    for index, row in df.iterrows():
        uniprot = row['UniProt']
        region_1_range = utils.string2range(row['region_1'])
        region_2_range = utils.string2range(row['region_2'])

        both_ranges = set(region_1_range + region_2_range)
        min_range = min(both_ranges)

        if min == 1:
            continue

        else:
            print(f'Fixing {uniprot}...')
            uniprot_dir = os.path.join(path, uniprot)
            files = os.listdir(uniprot_dir)
            for f in files:
                if '.pdb' in f:
                    ppdb = PandasPdb().read_pdb(os.path.join(uniprot_dir, f))
                    protein = ppdb.df['ATOM']
                    residues = protein['residue_number'].tolist()
                    if residues[0] == min_range:
                        continue
                    else:
                        new_residues = [(r + (min_range - 1)) for r in residues]
                        protein['residue_number'] = new_residues
                        ppdb.df['ATOM'] = protein
                        ppdb.to_pdb(path=os.path.join(uniprot_dir, f))

In [39]:
df = pd.read_csv('./project_pipeline/data/trimmed_proteins.csv', sep=';')
df = df.drop_duplicates()

path = './project_pipeline/data/input/Colabfold_cif/trimmed/'

df2 = fix_trimmed(df, path)

Fixing P07038...
Fixing Q8NQJ3...
Fixing P60240...
Fixing P28482...
Fixing P62826...
Fixing P12931...
Fixing P22681...
Fixing P21333...
Fixing Q9Y6K1...
Fixing P26358...
Fixing P29350...
Fixing P35520...
Fixing P27577...
Fixing O08967...
Fixing P53042...
Fixing P00579...
