In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from ipywidgets import interact, interactive, fixed, interact_manual



In [3]:
# load data from csv
df = pd.read_csv('pocket_active_sites_gass_prank.csv', sep=',', header=0)
# create new column with first two letters of EC number
df['EC_NUMBER_2'] = df['EC_NUMBER'].str[:3]

# create new column with first letter of EC number
df['EC_NUMBER_1'] = df['EC_NUMBER'].str[:1]

#strip whitespace from the values in the 'POCKET' column
df['POCKET'] = df['POCKET'].str.strip()

# filter according to POCKET (filter out pocket1 and pocket2)
df_filtered = df[df['POCKET'] != 'pocket1']
df_filtered = df_filtered[df_filtered['POCKET'] != 'pocket2']

# create a new column with only residue number (strip the chain)
df_filtered['RESIDUE_NUMBER'] = df_filtered['RESIDUE'].str[2:]

# create a new column with only chain 
df_filtered['CHAIN'] = df_filtered['RESIDUE'].str[0]

# filter data according to fitness score < 5
df_filtered = df_filtered[df_filtered['FITNESS'] < 5]


# form a column which calculates average FITNESS score for each POCKET in each PDB_ID
df_filtered['avg_fitness'] = df_filtered.groupby(['PDB_ID', 'POCKET'])['FITNESS'].transform('mean')

df_filtered.head()

Unnamed: 0,POCKET,RESIDUE,EC_NUMBER,FITNESS,PDB_ID,TEMPLATE_RESIDUE_NUMBER,EC_NUMBER_2,EC_NUMBER_1,RESIDUE_NUMBER,CHAIN,avg_fitness
10,pocket3,B_43,1.6.6.-,0.0101,2n0a,2,1.6,1,43,B,0.5624
11,pocket3,E_46,1.9.3.1,0.0403,2n0a,2,1.9,1,46,E,0.5624
12,pocket3,E_80,4.2.1.22,0.0192,2n0a,2,4.2,4,80,E,0.5624
13,pocket4,A_80,2.5.1.17,0.0255,2n0a,2,2.5,2,80,A,1.753629
14,pocket4,C_80,4.2.1.22,0.0365,2n0a,2,4.2,4,80,C,1.753629


In [4]:
def heatmap_pocket_pdbID_by_fitness_score(df_filtered_ec):
    heatmap_data = df_filtered_ec.pivot_table(index='POCKET', columns='PDB_ID', values='avg_fitness', aggfunc='mean')
    # Creating the heatmap
    plt.figure(figsize=(14, 10))
    sns.heatmap(heatmap_data, annot=False, cmap="viridis")
    plt.title('Heatmap of Average Fitness Scores by Pocket and PDB ID')
    plt.xlabel('PDB ID')
    plt.ylabel('Residue')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

In [5]:
@interact
def visualize_heatmap_pocket_pdbid_ecnumber(ec_number = df_filtered['EC_NUMBER'].unique()):
    # Filter data according to the selected EC number
    df_filtered_ec = df_filtered[df_filtered['EC_NUMBER'] == ec_number]
    heatmap_pocket_pdbID_by_fitness_score(df_filtered_ec)
   

interactive(children=(Dropdown(description='ec_number', options=('1.6.6.-', '1.9.3.1', '4.2.1.22', '2.5.1.17',…

In [6]:
@interact
def visualize_heatmap_pocket_pdbid_ecnumber(ec_number= df_filtered['EC_NUMBER_2'].unique()):
    # Filter data according to the selected EC number
    df_filtered_ec = df_filtered[df_filtered['EC_NUMBER_2'] == ec_number]
    heatmap_pocket_pdbID_by_fitness_score(df_filtered_ec)

interactive(children=(Dropdown(description='ec_number', options=('1.6', '1.9', '4.2', '2.5', '4.3', '4.1', '1.…

In [7]:
@interact
def visualize_heatmap_pocket_pdbid_ecnumber(ec_number= df_filtered['EC_NUMBER_1'].unique()):
    # Filter data according to the selected EC number
    df_filtered_ec = df_filtered[df_filtered['EC_NUMBER_1'] == ec_number]
    heatmap_pocket_pdbID_by_fitness_score(df_filtered_ec)

interactive(children=(Dropdown(description='ec_number', options=('1', '4', '2', '6', '5', nan, '3'), value='1'…

In [9]:
def heatmap_residue_pdbID_by_fitness_score(df_filtered_ec):
    # Pivot the data for the heatmap
    heatmap_data = df_filtered_ec.pivot_table(index='RESIDUE_NUMBER', columns='PDB_ID', values='FITNESS', aggfunc='mean')

    # Creating the heatmap
    plt.figure(figsize=(14, 10))
    sns.heatmap(heatmap_data, annot=False, cmap="viridis")
    plt.title('Heatmap of FITNESS Scores by Residue number and PDB ID')
    plt.xlabel('PDB ID')
    plt.ylabel('Residue')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

In [10]:
@interact
def visualize_heatmap_pocket_pdbid_ecnumber(ec_number = df_filtered['EC_NUMBER'].unique()):
    # Filter data according to the selected EC number
    df_filtered_ec = df_filtered[df_filtered['EC_NUMBER'] == ec_number]
    heatmap_residue_pdbID_by_fitness_score(df_filtered_ec)

interactive(children=(Dropdown(description='ec_number', options=('1.6.6.-', '1.9.3.1', '4.2.1.22', '2.5.1.17',…

In [11]:
@interact
def visualize_heatmap_pocket_pdbid_ecnumber(ec_number = df_filtered['EC_NUMBER_2'].unique()):
    # Filter data according to the selected EC number
    df_filtered_ec = df_filtered[df_filtered['EC_NUMBER_2'] == ec_number]
    heatmap_residue_pdbID_by_fitness_score(df_filtered_ec)

interactive(children=(Dropdown(description='ec_number', options=('1.6', '1.9', '4.2', '2.5', '4.3', '4.1', '1.…

In [12]:
@interact
def visualize_heatmap_pocket_pdbid_ecnumber(ec_number = df_filtered['EC_NUMBER_1'].unique()):
    # Filter data according to the selected EC number
    df_filtered_ec = df_filtered[df_filtered['EC_NUMBER_1'] == ec_number]
    heatmap_residue_pdbID_by_fitness_score(df_filtered_ec)

interactive(children=(Dropdown(description='ec_number', options=('1', '4', '2', '6', '5', nan, '3'), value='1'…

In [13]:
@interact
def visualize_heatmap_pocket_pdbid_ecnumber(ec_number = df_filtered['EC_NUMBER_2'].unique(), chain = df_filtered['CHAIN'].unique()):
    # Filter data according to the selected EC number
    df_filtered_ec = df_filtered[df_filtered['EC_NUMBER_2'] == ec_number]
    df_filtered_ec = df_filtered_ec[df_filtered_ec['CHAIN'] == chain]
    heatmap_residue_pdbID_by_fitness_score(df_filtered_ec)

interactive(children=(Dropdown(description='ec_number', options=('1.6', '1.9', '4.2', '2.5', '4.3', '4.1', '1.…