In [9]:
import pandas as pd
import os
import boto3
from io import StringIO
from biopandas.pdb import PandasPdb



In [2]:
# Define S3 path
bucket = 'bbio-data-platform'
key = 'dev/ab-discovery/parapred/NI006_Fv_paragraph_pred.csv'

# Initialize S3 client (uses your AWS credentials)
s3 = boto3.client('s3')

# Fetch the object
response = s3.get_object(Bucket=bucket, Key=key)

# Read into Pandas
csv_content = response['Body'].read().decode('utf-8')
df = pd.read_csv(StringIO(csv_content))


In [3]:
df['pred'].dtype

dtype('float64')

In [4]:
# import pandas as pd
import numpy as np

# Example DataFrame
# df = pd.DataFrame({'residue': [1, 2, 3, 4], 'score': [0.12, 0.45, 0.89, 0.67]})

# Apply background gradient (0–1 scale)
styled = df.style.background_gradient(subset=['pred'], cmap='RdYlGn', vmin=0, vmax=1)

# In Jupyter this renders as a color-formatted table
styled


Unnamed: 0,pdb,chain_type,chain_id,IMGT,AA,atom_num,x,y,z,pred
0,NI006_Fv_structure_imgt,H,H,25,VAL,340,-17.205,2.004,3.31,0.014362
1,NI006_Fv_structure_imgt,H,H,26,SER,356,-18.378,5.323,4.97,0.008897
2,NI006_Fv_structure_imgt,H,H,27,GLY,367,-18.44,9.011,3.529,0.105539
3,NI006_Fv_structure_imgt,H,H,28,GLY,374,-18.8,8.041,0.11,0.199427
4,NI006_Fv_structure_imgt,H,H,29,SER,381,-20.481,5.347,-1.939,0.251171
5,NI006_Fv_structure_imgt,H,H,30,ILE,392,-18.754,2.07,-2.887,0.180706
6,NI006_Fv_structure_imgt,H,H,31,ILE,411,-20.274,2.36,-6.166,0.696692
7,NI006_Fv_structure_imgt,H,H,34,SER,430,-18.373,4.948,-7.467,0.779182
8,NI006_Fv_structure_imgt,H,H,35,ARG,441,-17.131,4.446,-11.146,0.953349
9,NI006_Fv_structure_imgt,H,H,36,SER,465,-13.556,5.834,-10.285,0.942694


In [5]:
df.columns

Index(['pdb', 'chain_type', 'chain_id', 'IMGT', 'AA', 'atom_num', 'x', 'y',
       'z', 'pred'],
      dtype='object')

In [10]:
# Join ParaSurf predictions 
bucket = 'bbio-data-platform'
key = 'dev/ab-discovery/parasurf/NI006_Fv_structure_imgt_parasurf_pred/NI006_pred.pdb'

s3 = boto3.client('s3')

# Fetch the object

# Fetch the object and decode as string
response = s3.get_object(Bucket=bucket, Key=key)
pdb_text = response['Body'].read().decode('utf-8')

# Load PDB into PandasPdb using in-memory string buffer
ppdb = PandasPdb()
ppdb.read_pdb_from_list(StringIO(pdb_text).readlines())

# Access ATOM data as DataFrame
df_atoms = ppdb.df['ATOM']

In [12]:
df_atoms.columns

Index(['record_name', 'atom_number', 'blank_1', 'atom_name', 'alt_loc',
       'residue_name', 'blank_2', 'chain_id', 'residue_number', 'insertion',
       'blank_3', 'x_coord', 'y_coord', 'z_coord', 'occupancy', 'b_factor',
       'blank_4', 'segment_id', 'element_symbol', 'charge', 'line_idx'],
      dtype='object')

In [18]:
df_parasurf = df_atoms[['chain_id','residue_number','b_factor']].drop_duplicates()

In [19]:
df_parasurf.shape

(227, 3)

In [20]:
df_parasurf

Unnamed: 0,chain_id,residue_number,b_factor
0,H,1,0.710
9,H,2,0.544
16,H,3,0.005
25,H,4,0.001
33,H,5,0.030
...,...,...,...
1671,L,123,0.000
1680,L,124,0.000
1687,L,125,0.000
1696,L,126,0.000


In [21]:
styled_parasurf = df_parasurf.style.background_gradient(subset=['b_factor'], cmap='RdYlGn', vmin=0, vmax=1)

styled_parasurf

Unnamed: 0,chain_id,residue_number,b_factor
0,H,1,0.71
9,H,2,0.544
16,H,3,0.005
25,H,4,0.001
33,H,5,0.03
42,H,6,0.005
51,H,7,0.004
57,H,8,0.009
61,H,9,0.004
68,H,11,0.002
