In [1]:
from vertexai import generative_models
from vertexai.generative_models import GenerativeModel
import json

In [2]:
model = GenerativeModel(model_name="gemini-1.0-pro-vision")

In [3]:
## Provide pdb ID
ask_prompt='Please ask the user to provide a pdb ID for the protein of interest.'
response = model.generate_content(ask_prompt)
print(response.text)

Please provide the PDB ID of the protein you are interested in. The PDB ID is a unique identifier for each protein structure deposited in the Protein Data Bank. It can be found in the header of the PDB file or on the PDB website.


In [4]:
pdb_ID='2L0J'
prompt_pdb='The pdb ID is '+pdb_ID
prompt_pdb

'The pdb ID is 2L0J'

In [25]:
#Feed pdb
feed_pdb = model.generate_content(prompt_pdb)

In [6]:
#Ask which PDB ID has been provided
pdb_check = model.generate_content(f'Which pdb ID has been provided?{feed_pdb.text}')

In [7]:
print(pdb_check.text)

The provided PDB ID is 2L0J.


In [8]:
#What to do with the pdb file

In [9]:
#helper function to download pdb
import requests

def download_pdb(pdb_id, save_path):
    # Construct the URL for the PDB file
    pdb_url = f"https://files.rcsb.org/download/{pdb_id}.pdb"

    # Send a GET request to download the file
    response = requests.get(pdb_url)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Save the content to a file
        with open(save_path, 'wb') as f:
            f.write(response.content)
        print(f"Successfully downloaded {pdb_id}.pdb")
    else:
        print(f"Failed to download {pdb_id}.pdb")

In [10]:
# Example usage:
pdb_id = pdb_check.text  # Example PDB ID
save_path = pdb_check.text+'.pdb'  # Path to save the downloaded PDB file
download_pdb(pdb_id, save_path)


Failed to download The provided PDB ID is 2L0J..pdb


In [11]:
#generate code to open pdb and read positions of the CA atons
pdb_file=pdb_check.text+'.pdb'
code_prompt='Please write code to open the pdb and print the coordinates of the CA atoms to an array.'
open_pdb=model.generate_content([pdb_file,code_prompt])
print(open_pdb.text)

```
from Bio.PDB import PDBParser
import numpy as np

# Create a PDBParser object
parser = PDBParser()

# Parse the PDB file
structure = parser.get_structure("2L0J", "2L0J.pdb")

# Get the CA atoms
ca_atoms = [atom for atom in structure.get_atoms() if atom.name == "CA"]

# Print the coordinates of the CA atoms to an array
ca_coords = np.array([[atom.coord[0], atom.coord[1], atom.coord[2]] for atom in ca_atoms])

print(ca_coords)
```


In [12]:
#function to remove markdown from generated code
def remove_markdown_code_blocks(text):
    lines = text.split('\n')
    cleaned_lines = [line for line in lines if not line.strip().startswith('```')]
    cleaned_text = '\n'.join(cleaned_lines)
    return cleaned_text

In [13]:
cleaned_text = remove_markdown_code_blocks(open_pdb.text)

In [14]:
import numpy as np
exec(cleaned_text)

[[  4.812  -8.317  19.129]
 [  6.511  -7.824  15.715]
 [  8.351  -4.579  15.207]
 ...
 [ 16.007  14.386 -22.258]
 [ 18.229  14.845 -19.134]
 [ 19.461  11.244 -18.624]]


Compute Radius of gyration

In [15]:
request_prompt='Please ask the user which property they want to compute and do not provide any options.'
analysis_request = model.generate_content(request_prompt)
print(analysis_request.text)

Which property do you want to compute?


In [16]:
name_property='Radius of gyration'
property_feed='The property to analyze is '+name_property
property_feed

'The property to analyze is Radius of gyration'

In [17]:
feed_property = model.generate_content(property_feed)

In [18]:
#specifiy analysis
specify_prompt='Please ask the user for which part of the system they want the property computed for. Do not provide any options'
specify_request = model.generate_content(specify_prompt)
print(specify_request.text)

For which part of the system do you want the property computed?


In [27]:
name_part='Chain A'
specify_feed='The relevant part is ' + name_part
specify_feed

'The relevant part is Chain A'

In [28]:
feed_specify=model.generate_content(specify_feed)

In [29]:
#check if pdb, property and specification are correctly stored 

In [30]:
#Ask which PDB ID has been provided
analysis_check_prompt='Which pdb has been provided? Which property should be computed? For which part of the system?'
analysis_check = model.generate_content([analysis_check_prompt,feed_pdb.text,feed_property.text,feed_specify.text])
print(analysis_check.text)

**PDB ID:** 2L0J

**Property:** Radius of Gyration (Rg)

**System Part:** Chain A


In [32]:
analysis_prompt='Please write code to perform the computation for the given PDB, Property and system part.'
pdb_analysis=model.generate_content([analysis_prompt,analysis_check.text])
print(pdb_analysis.text)

```
import numpy as np
import MDAnalysis as mda

# Load the PDB file
u = mda.Universe('2L0J.pdb')

# Select the chain of interest
chain = u.select_atoms('chainid A')

# Compute the radius of gyration
Rg = chain.radius_of_gyration()

# Print the result
print(Rg)

```


In [35]:
analysis_code = remove_markdown_code_blocks(pdb_analysis.text)
exec(analysis_code)

15.132645440412087
