In [1]:
from vertexai import generative_models
from vertexai.generative_models import GenerativeModel
import json

In [2]:
model = GenerativeModel(model_name="gemini-1.0-pro-vision")

In [3]:
## Provide pdb ID
ask_prompt='Please ask the user to provide a pdb ID for the protein of interest.'
response = model.generate_content(ask_prompt)
print(response.text)

To initiate the analysis, please provide the Protein Data Bank (PDB) ID for the protein of interest. This ID will allow us to access the structural data of the protein and proceed with the analysis. Please enter the PDB ID below:


In [4]:
pdb_ID='2L0J'
prompt_pdb='The pdb ID is '+pdb_ID
prompt_pdb

'The pdb ID is 2L0J'

In [5]:
#Feed pdb
feed_pdb = model.generate_content(prompt_pdb)

In [6]:
#Ask which PDB ID has been provided
pdb_check = model.generate_content(f'Which pdb ID has been provided?{feed_pdb.text}')

In [7]:
print(pdb_check.text)

2L0J


In [8]:
#What to do with the pdb file

In [9]:
#helper function to download pdb
import requests

def download_pdb(pdb_id, save_path):
    # Construct the URL for the PDB file
    pdb_url = f"https://files.rcsb.org/download/{pdb_id}.pdb"

    # Send a GET request to download the file
    response = requests.get(pdb_url)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Save the content to a file
        with open(save_path, 'wb') as f:
            f.write(response.content)
        print(f"Successfully downloaded {pdb_id}.pdb")
    else:
        print(f"Failed to download {pdb_id}.pdb")

In [10]:
# Example usage:
pdb_id = pdb_check.text  # Example PDB ID
save_path = pdb_check.text+'.pdb'  # Path to save the downloaded PDB file
download_pdb(pdb_id, save_path)


Successfully downloaded 2L0J.pdb


In [11]:
#generate code to open pdb and read positions of the CA atons
pdb_file=pdb_check.text+'.pdb'
code_prompt='Please write code to open the pdb and print the coordinates of the CA atoms to an array.'
open_pdb=model.generate_content([pdb_file,code_prompt])
print(open_pdb.text)

```python
import Bio.PDB
import numpy as np

# Open the PDB file
parser = Bio.PDB.PDBParser()
structure = parser.get_structure("2L0J", "2L0J.pdb")

# Get the coordinates of the CA atoms
ca_coords = []
for model in structure:
    for chain in model:
        for residue in chain:
            if residue.has_id("CA"):
                ca_coords.append(residue["CA"].get_coord())

# Convert the list of coordinates to a numpy array
ca_coords = np.array(ca_coords)

# Print the coordinates to the console
print(ca_coords)
```


In [38]:
#function to remove markdown from generated code
def remove_markdown_code_blocks(text):
    lines = text.split('\n')
    cleaned_lines = [line for line in lines if not line.strip().startswith('```')]
    cleaned_text = '\n'.join(cleaned_lines)
    return cleaned_text



In [35]:
cleaned_text = remove_markdown_code_blocks(open_pdb.text)

'import Bio.PDB\nimport numpy as np\n\n# Open the PDB file\nparser = Bio.PDB.PDBParser()\nstructure = parser.get_structure("2L0J", "2L0J.pdb")\n\n# Get the coordinates of the CA atoms\nca_coords = []\nfor model in structure:\n    for chain in model:\n        for residue in chain:\n            if residue.has_id("CA"):\n                ca_coords.append(residue["CA"].get_coord())\n\n# Convert the list of coordinates to a numpy array\nca_coords = np.array(ca_coords)\n\n# Print the coordinates to the console\nprint(ca_coords)'

In [37]:
import numpy as np
exec(cleaned_text)


[[  4.812  -8.317  19.129]
 [  6.511  -7.824  15.715]
 [  8.351  -4.579  15.207]
 ...
 [ 16.007  14.386 -22.258]
 [ 18.229  14.845 -19.134]
 [ 19.461  11.244 -18.624]]
