In [1]:
from vertexai import generative_models
from vertexai.generative_models import GenerativeModel
import json

In [17]:
model = GenerativeModel(model_name="gemini-1.0-pro-vision")

In [18]:
## Provide pdb ID
ask_prompt='Please ask the user to provide a pdb ID for the protein of interest.'
response = model.generate_content(ask_prompt)
print(response.text)

Please provide the PDB ID for the protein of interest. 

The PDB ID is a unique identifier for protein structures deposited in the Protein Data Bank, and it typically consists of four characters.


In [4]:
pdb_ID='2L0J'
prompt_pdb='The pdb ID is '+pdb_ID
prompt_pdb

'The pdb ID is 2L0J'

In [5]:
#Feed pdb
feed_pdb = model.generate_content(prompt_pdb)

In [6]:
#Ask which PDB ID has been provided
pdb_check = model.generate_content(f'Which pdb ID has been provided?{feed_pdb.text}')

In [7]:
print(pdb_check.text)

2L0J


In [8]:
#What to do with the pdb file

In [9]:
#helper function to download pdb
import requests

def download_pdb(pdb_id, save_path):
    # Construct the URL for the PDB file
    pdb_url = f"https://files.rcsb.org/download/{pdb_id}.pdb"

    # Send a GET request to download the file
    response = requests.get(pdb_url)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Save the content to a file
        with open(save_path, 'wb') as f:
            f.write(response.content)
        print(f"Successfully downloaded {pdb_id}.pdb")
    else:
        print(f"Failed to download {pdb_id}.pdb")

In [10]:
# Example usage:
pdb_id = pdb_check.text  # Example PDB ID
save_path = pdb_check.text+'.pdb'  # Path to save the downloaded PDB file
download_pdb(pdb_id, save_path)


Successfully downloaded 2L0J.pdb


In [11]:
#generate code to open pdb and read positions of the CA atons
pdb_file=pdb_check.text+'.pdb'
code_prompt='Please write code to open the pdb and print the coordinates of the CA atoms to an array.'
open_pdb=model.generate_content([pdb_file,code_prompt])
print(open_pdb.text)

```python
import Bio.PDB
import numpy as np

# Open the PDB file
parser = Bio.PDB.PDBParser()
structure = parser.get_structure('2L0J', '2L0J.pdb')

# Get the coordinates of the CA atoms
ca_coords = []
for model in structure:
    for chain in model:
        for residue in chain:
            if residue.has_id('CA'):
                ca_coords.append(residue['CA'].get_coord())

# Print the coordinates to an array
print(np.array(ca_coords))
```


In [12]:
#function to remove markdown from generated code
def remove_markdown_code_blocks(text):
    lines = text.split('\n')
    cleaned_lines = [line for line in lines if not line.strip().startswith('```')]
    cleaned_text = '\n'.join(cleaned_lines)
    return cleaned_text

In [13]:
cleaned_text = remove_markdown_code_blocks(open_pdb.text)

In [14]:
import numpy as np
exec(cleaned_text)

[[  4.812  -8.317  19.129]
 [  6.511  -7.824  15.715]
 [  8.351  -4.579  15.207]
 ...
 [ 16.007  14.386 -22.258]
 [ 18.229  14.845 -19.134]
 [ 19.461  11.244 -18.624]]


Compute Radius of gyration

In [19]:
request_prompt='Please ask the user which property they want to compute and do not provide any options.'
analysis_request = model.generate_content(request_prompt)
print(analysis_request.text)

Please specify the property you would like to compute:


In [20]:
name_property='Radius of gyration'
property_feed='The property to analyze is '+name_property
property_feed

'The property to analyze is Radius of gyration'

In [21]:
feed_property = model.generate_content(property_feed)

In [22]:
#specifiy analysis
specify_prompt='Please ask the user for which part of the system they want the property computed for. Do not provide any options'
specify_request = model.generate_content(specify_prompt)
print(specify_request.text)

Which part of the system do you want the property computed for?


In [38]:
name_part='Chain A'
specify_feed='The relevant part is the entire chain'# + name_part
specify_feed

'The relevant part is the entire chain'

In [39]:
feed_specify=model.generate_content(specify_feed)

In [40]:
#check if pdb, property and specification are correctly stored 

In [41]:
#Ask which PDB ID has been provided
analysis_check_prompt='Which pdb has been provided? Which property should be computed? For which part of the system?'
analysis_check = model.generate_content([analysis_check_prompt,feed_pdb.text,feed_property.text,feed_specify.text])
print(analysis_check.text)

**pdb:** 2L0J

**Property:** Radius of gyration

**System:** Human Interleukin-17A (IL-17A) in complex with its inhibitor, secukinumab


In [42]:
analysis_prompt='Please write code to perform the computation for the given PDB, Property and system part.'
pdb_analysis=model.generate_content([analysis_prompt,analysis_check.text])
print(pdb_analysis.text)

```
import numpy as np
import MDAnalysis as mda

# Load the PDB file
u = mda.Universe('2L0J.pdb')

# Select the protein atoms
protein = u.select_atoms('protein')

# Compute the radius of gyration
Rg = protein.radius_of_gyration()

# Print the radius of gyration
print(Rg)
```


In [43]:
analysis_code = remove_markdown_code_blocks(pdb_analysis.text)
exec(analysis_code)

17.887809073016886


In [45]:
analysis_prompt='Please write a function using MDAnalysis to perform the computation for the given PDB and Property. Write only the function'
pdb_analysis=model.generate_content([analysis_prompt,analysis_check.text])
print(pdb_analysis.text)

```
import MDAnalysis as mda
import numpy as np

def compute_radius_of_gyration(pdb, property):
  """
  Compute the radius of gyration for a given PDB file and property.

  Args:
    pdb (str): Path to the PDB file.
    property (str): The property to compute the radius of gyration for.

  Returns:
    float: The radius of gyration.
  """

  # Load the PDB file
  universe = mda.Universe(pdb)

  # Select the atoms to compute the radius of gyration for
  if property == "backbone":
    selection = "backbone"
  elif property == "heavy":
    selection = "heavy"
  elif property == "all":
    selection = "all"
  else:
    raise ValueError("Invalid property: {}".format(property))

  # Compute the radius of gyration
  radius_of_gyration = universe.select_atoms(selection).radius_of_gyration()

  # Return the radius of gyration
  return radius_of_gyration

# Compute the radius of gyration for the given PDB file and property
radius_of_gyration = compute_radius_of_gyration("2L0J.pdb", "heavy")

# P

In [46]:
remove_markdown_code_blocks(pdb_analysis.text)

'import MDAnalysis as mda\nimport numpy as np\n\ndef compute_radius_of_gyration(pdb, property):\n  """\n  Compute the radius of gyration for a given PDB file and property.\n\n  Args:\n    pdb (str): Path to the PDB file.\n    property (str): The property to compute the radius of gyration for.\n\n  Returns:\n    float: The radius of gyration.\n  """\n\n  # Load the PDB file\n  universe = mda.Universe(pdb)\n\n  # Select the atoms to compute the radius of gyration for\n  if property == "backbone":\n    selection = "backbone"\n  elif property == "heavy":\n    selection = "heavy"\n  elif property == "all":\n    selection = "all"\n  else:\n    raise ValueError("Invalid property: {}".format(property))\n\n  # Compute the radius of gyration\n  radius_of_gyration = universe.select_atoms(selection).radius_of_gyration()\n\n  # Return the radius of gyration\n  return radius_of_gyration\n\n# Compute the radius of gyration for the given PDB file and property\nradius_of_gyration = compute_radius_of_gy

In [47]:
exec(remove_markdown_code_blocks(pdb_analysis.text))

SelectionError: Unknown selection token: 'heavy'

In [36]:
compute_radius_of_gyration("2L0J.pdb", "radius_og_gyration", "Chain A")

15.132645440412087