<a href="https://colab.research.google.com/github/deepsharma26/SIRT-isoform_selective-/blob/main/Key_residues_identification_docked_complex.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Key residues idenfication in docked complexes


An in-house Python script by which we were able to identify the occurrence count of the key residues in at least 50 % of the compounds against one SIRT isoform.
The outcome of the occurrence count of the key residues has been provided in each case with annotation as Residue number (occurrence in number of compounds). For example, His363 (13) means that His363 interaction was observed in 13 compounds out of 15 compounds. This scrit is for SIRT7 docked compex data.

In [5]:
import pandas as pd
import re
from collections import defaultdict

# Load the Excel file into a pandas DataFrame
df = pd.read_excel('Trail.xlsx')

# Clean up column names by stripping any leading/trailing spaces
df.columns = df.columns.str.strip()

# Define the function to extract residues from the 'Interaction' column
def extract_residues(interaction_str):
    residues = re.findall(r'[A-Za-z]{3}\s?\d+', interaction_str)
    cleaned_residues = {res.replace(' ', '') for res in residues}
    return cleaned_residues

# Apply the extraction function to the 'Interaction' column
df['Residues'] = df['Interaction'].apply(extract_residues)

# Initialize a dictionary to count occurrences and store which compounds contain each residue
residue_info = defaultdict(lambda: {"count": 0, "compounds": set()})

# Iterate over the DataFrame to populate residue_info with occurrences and compound names
for idx, row in df.iterrows():
    compound_name = row['Compound Name']
    residues = row['Residues']

    # For each residue, update the count and track which compounds contain it
    for residue in residues:
        residue_info[residue]['count'] += 1
        residue_info[residue]['compounds'].add(compound_name)

# Convert the residue_info to a DataFrame for easier viewing
residue_summary = pd.DataFrame(
    [(residue, info['count'], ', '.join(info['compounds'])) for residue, info in residue_info.items()],
    columns=['Residue', 'Occurrence Count', 'Compounds']
)

# Display the result
print(residue_summary)

# Optional: Save the result to an Excel file
residue_summary.to_excel('residue_summary.xlsx', index=False)


   Residue  Occurrence Count  \
0   Asn221                12   
1    Arg27                 8   
2   Lys356                 8   
3   Arg305                11   
4   Tyr161                13   
5   Lys329                 4   
6   Trp309                13   
7   Glu260                 9   
8   Tyr216                15   
9   Asp330                 3   
10  Asn359                10   
11  Phe215                12   
12  Asp357                13   
13  Glu218                14   
14  Glu327                 7   
15  Trp141                12   
16  Gly111                 2   
17  Leu109                 1   
18  Ala308                11   
19   Met41                 9   
20  Gly358                 4   
21   Trp31                 7   
22  Asp306                 9   
23  Asn137                 1   
24  Ser160                 4   
25  Glu157                 1   
26  Pro217                 3   
27   Trp61                 1   
28  Asp162                 3   
29  Gln219                 2   
30  Leu3