In [4]:
import requests

def get_pdb_entities(pdb_id):
    url = f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}"
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        return data
    else:
        print(f"Failed to retrieve data for PDB ID: {pdb_id}")
        return []



In [8]:
import requests

def get_pdb_entities(pdb_id):
    url = f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}"
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        return data
    else:
        print(f"Failed to retrieve data for PDB ID: {pdb_id}")
        return []

def fetch_entity_info(pdb_id, entity_id, entity_type):
    """Fetch entity info from RCSB PDB API based on type."""
    url = f"https://data.rcsb.org/rest/v1/core/{entity_type}/{pdb_id}/{entity_id}"
    response = requests.get(url)
    
    if response.status_code == 200:
        entity_data = response.json()
        if entity_type == "polymer_entity":
            description = entity_data.get("rcsb_polymer_entity", {}).get("pdbx_description", "Unknown")
            return {
                "id": entity_data.get("rcsb_id"),
                "name": description,
                "type": entity_data.get("entity_poly", {}).get("rcsb_entity_polymer_type", "Unknown")
            }
        elif entity_type == "nonpolymer_entity":
            description = entity_data.get("rcsb_nonpolymer_entity", {}).get("pdbx_description", "Unknown")
            return {
                "id": entity_data.get("rcsb_id"),
                "name": description,
                "type": "non-polymer"
            }
    return None

def get_pdb_entities_info(api_response):
    """Extract entity IDs from the API response and fetch their details."""
    pdb_id = api_response["entry"]["id"]
    entity_ids = api_response["rcsb_entry_container_identifiers"]["entity_ids"]
    polymer_entity_ids = api_response["rcsb_entry_container_identifiers"]["polymer_entity_ids"]
    nonpolymer_entity_ids = api_response["rcsb_entry_container_identifiers"]["non_polymer_entity_ids"]

    entities_info = []
    for entity_id in entity_ids:
        entity_type = "polymer_entity" if entity_id in polymer_entity_ids else "nonpolymer_entity"
        entity_info = fetch_entity_info(pdb_id, entity_id, entity_type)
        if entity_info:
            entities_info.append(entity_info)

    return entities_info

# Example usage with the JSON response you have
api_response = get_pdb_entities("4UAI")

entities_info = get_pdb_entities_info(api_response)
print(entities_info)


[{'id': '4UAI_1', 'name': 'Stromal cell-derived factor 1', 'type': 'Protein'}, {'id': '4UAI_2', 'name': '1-phenyl-3-[4-(1H-tetrazol-5-yl)phenyl]urea', 'type': 'non-polymer'}, {'id': '4UAI_3', 'name': 'SULFATE ION', 'type': 'non-polymer'}]


In [10]:
import requests
from Bio.PDB import PDBList, PDBParser

def search_pdb_by_sequence(query_sequence):
    # Define the URL for the RCSB PDB BLAST service
    blast_url = "https://www.rcsb.org/pdb/rest/getBlastPDB1"

    # Perform BLAST search
    params = {
        'sequence': query_sequence,
        'eCutOff': 10,  # E-value cutoff
        'matrix': 'BLOSUM62',  # Scoring matrix
        'outputFormat': 'XML',  # Output format
        'service': 'pdb'
    }
    response = requests.post(blast_url, data=params)
    response.raise_for_status()

    # Parse the XML response to get PDB IDs
    from xml.etree import ElementTree as ET
    root = ET.fromstring(response.text)
    pdb_ids = set(hit.find('hitId').text for hit in root.findall('.//hit'))

    print(f"Found {len(pdb_ids)} PDB entries with similar sequences.")

    # Path to store downloaded PDB files
    pdb_path = 'pdb_files'
    os.makedirs(pdb_path, exist_ok=True)

    # Initialize PDBList for downloading structures
    pdbl = PDBList()

    # Download and parse PDB files
    parser = PDBParser()
    for pdb_id in pdb_ids:
        pdb_file = pdbl.retrieve_pdb_file(pdb_id, pdir=pdb_path, file_format='pdb')
        structure = parser.get_structure(pdb_id, pdb_file)
        print(f"Downloaded and parsed structure: {pdb_id}")

if __name__ == '__main__':
    # Example chemokine sequence (you can replace this with any sequence of interest)
    chemokine_sequence = "KPVSLSYRCPCRFFESHVARANVKHLKILNTPNCALQIVARLKNNNRQVCIDPKLKWIQEYLEKALNKRFKM"
    search_pdb_by_sequence(chemokine_sequence)


HTTPError: 404 Client Error: Not Found for url: https://www.rcsb.org/pdb/rest/getBlastPDB1

In [3]:
import requests

def get_generic_numbers(entry_name):
    url = f"https://gpcrdb.org/services/residues/{entry_name}/"
    headers = {
        "accept": "application/json",
    }
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        residues = response.json()
        #generic_numbers = [residue['generic_number'] for residue in residues if 'generic_number' in residue]
        return residues
    else:
        return f"Error: Unable to fetch data for {entry_name}, status code {response.status_code}"

# Example usage
entry_name = "ackr3_human"
generic_numbers = get_generic_numbers(entry_name)
print(generic_numbers)


[{'sequence_number': 1, 'amino_acid': 'M', 'protein_segment': 'N-term', 'display_generic_number': None}, {'sequence_number': 2, 'amino_acid': 'D', 'protein_segment': 'N-term', 'display_generic_number': None}, {'sequence_number': 3, 'amino_acid': 'L', 'protein_segment': 'N-term', 'display_generic_number': None}, {'sequence_number': 4, 'amino_acid': 'H', 'protein_segment': 'N-term', 'display_generic_number': None}, {'sequence_number': 5, 'amino_acid': 'L', 'protein_segment': 'N-term', 'display_generic_number': None}, {'sequence_number': 6, 'amino_acid': 'F', 'protein_segment': 'N-term', 'display_generic_number': None}, {'sequence_number': 7, 'amino_acid': 'D', 'protein_segment': 'N-term', 'display_generic_number': None}, {'sequence_number': 8, 'amino_acid': 'Y', 'protein_segment': 'N-term', 'display_generic_number': None}, {'sequence_number': 9, 'amino_acid': 'S', 'protein_segment': 'N-term', 'display_generic_number': None}, {'sequence_number': 10, 'amino_acid': 'E', 'protein_segment': '

In [12]:
import requests
import pandas as pd

def fetch_pdb_ids_by_pfam(pfam_accession):
    """
    Fetches PDB entries associated with a given PFAM accession number.

    Parameters:
    - pfam_accession: PFAM accession number as a string.

    Returns:
    - A list of PDB entry documents.
    """
    url = "https://www.ebi.ac.uk/pdbe/search/pdb/select"
    query = f"pfam_accession:{pfam_accession}"
    params = {
        'q': query,
        'wt': 'json',
        'rows': 1000
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        return data['response']['docs']
    else:
        print("Failed to fetch data from PDBe API")
        return []

# Function to process the data from the response
def process_pdb_data(docs):
    details = []
    for doc in docs:
        pdb_id = doc.get('pdb_id', 'N/A')
        chemokine_uniprot = doc.get('uniprot_accession', ['N/A'])[0]  # Get only the first UniProt accession
        method = doc.get('experimental_method', ['N/A'])[0]  # Convert method to a string
        resolution = doc.get('resolution', 0)
        assembly_type = doc.get('assembly_type', [])
        state = "Dimer" if any("dimer" in at.lower() for at in assembly_type) else "Monomer"
        chain_ids = doc.get('chain_id', [])  # Keep ChainID as a list
        
        details.append({
            "PDB": pdb_id,
            "Chemokine_UniProt": chemokine_uniprot,
            "Method": method,
            "Resolution": resolution,
            "State": state,
            "ChainID": chain_ids
        })
    return details

# Main function to collect data
def main():
    pfam_accession = "PF00048"
    docs = fetch_pdb_ids_by_pfam(pfam_accession)
    
    if docs:
        # Process the fetched data
        details = process_pdb_data(docs)
        
        # Creating a DataFrame to store the details
        df = pd.DataFrame(details)
        # Save to CSV
        df.to_csv("chemokine_structures.csv", index=False)
        print("Data saved to chemokine_structures.csv")
    else:
        print("No data found.")

if __name__ == "__main__":
    main()


Data saved to chemokine_structures.csv
