In [None]:
import prody
import numpy as np

Hsp90_Inhibitors = ['5j20', '', '']

file_path = '/users/file.txt'

err_entries = []
with open(file_path, "w") as file:
    for pdbid in Hsp90_Inhibitors:
        try:
            # Read in the RCSB Protein Data Bank
            pdb = prody.parsePDB(pdbid)
            print(f"Downloaded {pdbid}")
            
            # Select atoms from Chain A
            chain1_atoms = pdb.select('chain A')
            if chain1_atoms is None:
                raise ValueError(f"Chain A not found in PDB {pdbid}")
            
            # Select specific atoms for distance calculation
            atoms1 = pdb.select('chain A and resnum 106 and name O')
            atoms2 = pdb.select('chain A and resnum 110 and name N')
            
            if atoms1 is None or atoms2 is None:
                raise ValueError(f"Atoms not found in PDB {pdbid}")
            
            if len(atoms1) != 1 or len(atoms2) != 1:
                raise ValueError(f"Selection returned more than one atom in PDB {pdbid}")

            # Get coordinates for the specified atoms
            coords1 = atoms1.getCoords()[0]  
            coords2 = atoms2.getCoords()[0] 
            
            # Calculate distance
            distance = prody.calcDistance(coords1, coords2)

            #Header
            file.write("PDB ID;Distance in Å\n")
            
            #Save the outcome in the txt doc
            file.write(f"{pdbid};{distance}\n")
            print(f"Distance between residues 106 and 110 in {pdbid}: {distance} in Å")
        
        except Exception as e:
            err_entries.append(pdbid)
            print(f"Error processing {pdbid}: {e}")

In [None]:
import pickle
import pandas as pd

# Load data into DataFrame
df = pd.read_csv('/users/file.txt', sep=';', header=None, names=['PDB ID', 'Distance in Å'])

# Convert 'Distance in Angstrom' column to numeric format
df['Distance in Å'] = pd.to_numeric(df['Distance in Å'], errors='coerce')

# Apply classification based on thresholds
def classify(value):
    if value <= 4:
        return 'Helix Binder'
    elif 4.1 < value <= 8.0:
        return 'Loop Binder'

# Create a new column for the class labels
df['Classification'] = df['Distance in Å'].apply(classify)

# Save the DataFrame as a text file
df.to_csv('/users/file_classification.txt', sep=';', index=False)

# Display the DataFrame with the new classification
print(df)

# Count the number of each classification
classification_counts = df['Classification'].value_counts()

# Display the counts
print("\nClassification counts:")
print(classification_counts)