In [2]:
import requests
import pandas as pd
from requests.exceptions import HTTPError, Timeout

BASE_URL = "https://www.sasbdb.org/rest-api/"  # Example API


def get_post(post_id: str):
    """Fetch a single post by ID with error handling."""
    url = f"{BASE_URL}entry/summary/?code={post_id}"
    try:
        response = requests.get(url, timeout=5)  # set timeout (best practice)
        response.raise_for_status()  # raise error for 4xx/5xx responses
        return response.json()  # parse JSON response into dict
    except Timeout:
        print("Request timed out.")
    except HTTPError as http_err:
        print(f"HTTP error occurred: {http_err}")
    except Exception as err:
        print(f"Unexpected error: {err}")
    return None


if __name__ == "__main__":
    post = get_post('SASDCM6')
    if post:
        print(post)


{'code': 'SASDCM6', 'status': 'Published', 'type_of_curve': 'Merged', 'angular_unit': '1/A', 'project': {'title': 'Site-specific monoubiquitination downregulates Rab5 by disrupting effector binding and guanine nucleotide conversion.', 'publication': {'title': 'Site-specific monoubiquitination downregulates Rab5 by disrupting effector binding and guanine nucleotide conversion.', 'author_list': 'Shin D, Na W, Lee JH, Kim G, Baek J, Park SH, Choi CY, Lee S', 'journal': 'Elife', 'doi': '10.7554/eLife.29154', 'pmid': '28968219', 'published_date': '2017 Oct 2'}, 'status': 'released', 'submitted_date': '2017-07-18', 'released_date': '2018-03-19'}, 'pddf_data': 'https://www.sasbdb.org/media/p_of_R_files/SASDCM6.out', 'intensities_data': 'https://www.sasbdb.org/media/intensities_files/SASDCM6.dat', 'intensities_log_plot': 'https://www.sasbdb.org/media/intensities_files/scattering_plots/SASDCM6_dat_img.png', 'intensities_kratky_plot': 'https://www.sasbdb.org/media/intensities_files/scattering_pl

In [37]:
sasdbdb_ids_eom_single = []
with open('./fwddynamicsinsasbdb/EOM codes single molecule.txt', "r") as f:
    for line in f:
        # Example line: "307 SASDC23,SASDC33,SASDC43,SASDC53"
        parts = line.strip().split()
        if len(parts) < 2:
            continue
        ids = parts[1].split(",")  # split multiple IDs
        sasdbdb_ids_eom_single.extend(ids)


In [69]:
sasdbdb_ids_idp_single = []
with open('./fwddynamicsinsasbdb/IDP codes single molecule.txt', "r") as f:
    for line in f:
        # Example line: "307 SASDC23,SASDC33,SASDC43,SASDC53"
        parts = line.strip().split()
        if len(parts) < 2:
            continue
        ids = parts[1].split(",")  # split multiple IDs
        sasdbdb_ids_idp_single.extend(ids)

In [45]:
sasdbdb_ids_pro_monomer = []
with open('./fwddynamicsinsasbdb/Protein-monomer codes single molecule.txt', "r") as f:
    for line in f:
        # Example line: "307 SASDC23,SASDC33,SASDC43,SASDC53"
        parts = line.strip().split()
        if len(parts) < 2:
            continue
        ids = parts[1].split(",")  # split multiple IDs
        sasdbdb_ids_pro_monomer.extend(ids)

In [73]:
sasdbdb_ids_SREFLEX = []
with open('./fwddynamicsinsasbdb/SREFLEX codes single molecule.txt', "r") as f:
    for line in f:
        # Example line: "307 SASDC23,SASDC33,SASDC43,SASDC53"
        parts = line.strip().split()
        if len(parts) < 2:
            continue
        ids = parts[1].split(",")  # split multiple IDs
        sasdbdb_ids_SREFLEX.extend(ids)

In [74]:
import pandas as pd

records = []
for entry_id in sasdbdb_ids_SREFLEX:
    print(f"Fetching data for {entry_id}...")
    post = get_post(entry_id)
    if post:  # safety check
        mol = post['experiment']['sample']['molecule'][0]
        records.append({
            "SASBDB_ID": entry_id,
            "uniprot_id": mol.get('uniprot_code'),
            "uniprot_range_first": mol.get('uniprot_range_first'),
            "uniprot_range_last": mol.get('uniprot_range_last'),
            "sequence": mol.get('sequence')
        })

df_sreflex = pd.DataFrame(records)
# print(df_ids_pro_monomer.head())

Fetching data for SASDC36...
Fetching data for SASDC46...
Fetching data for SASDD43...
Fetching data for SASDF83...
Fetching data for SASDFC4...
Fetching data for SASDFF5...
Fetching data for SASDF99...
Fetching data for SASDLH4...
Fetching data for SASDLF4...
Fetching data for SASDLL4...
Fetching data for SASDLJ4...
Fetching data for SASDLK4...
Fetching data for SASDLG4...
Fetching data for SASDHK9...
Fetching data for SASDHG9...
Fetching data for SASDHH9...
Fetching data for SASDHJ9...
Fetching data for SASDJ22...
Fetching data for SASDJC3...
Fetching data for SASDJD3...
Fetching data for SASDJE3...
Fetching data for SASDJB3...
Fetching data for SASDJA6...
Fetching data for SASDJ95...
Fetching data for SASDJA5...
Fetching data for SASDJF5...
Fetching data for SASDJH5...
Fetching data for SASDLY3...
Fetching data for SASDLU3...
Fetching data for SASDL54...
Fetching data for SASDL64...
Fetching data for SASDLZ3...
Fetching data for SASDMH2...
Fetching data for SASDMJ2...
Fetching data 

In [100]:
eom = pd.read_csv("./sasdbdb_ids_eom_single.csv")
eom

Unnamed: 0,SASBDB_ID,uniprot_id,uniprot_range_first,uniprot_range_last,sequence
0,SASDBQ2,Q5U5U6,,,MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFA...
1,SASDB94,A0A1Z1SYD5,64.0,243.0,SNAQFRQALASEHDALYNDAASPRIGAKDAKLVLVSFTDYNCPYCK...
2,SASDBZ6,Q8NBI3,,,MAGPAIHTAPMLFLVLLLPLELSLAGALAPGTPARNLPENHIDLPG...
3,SASDBH8,O95786,239.0,925.0,MHHHHHHAAASPFKPRNYQLELALPAMKGKNTIICAPTGCGKTFVS...
4,SASDC23,P08083,1.0,387.0,MGSNGADNAHNNAFGGGKNPGIGNTSGAGSNGSASSNRGNSNGWSW...
...,...,...,...,...,...
245,SASDV46,P10636,244.0,372.0,MQTAPVPMPDLKNVKSKIGSTENLKHQPGGGKVQIINKKLDLSNVQ...
246,SASDVA8,Q4WF55,1.0,200.0,MGSSHHHHHHSSGENLYFQGHMATQSSTELPQINMTTAEPTSANKR...
247,SASDVC8,P0C1C6,200.0,310.0,MSYYHHHHHHLESTSLYKKAGFTPTEEPPVIPEYYYGSGRRGDLSK...
248,SASDVD8,P0C1C6,200.0,310.0,MSYYHHHHHHLESTSLYKKAGFTPTREPPVIPEYYYGSGDKGDLSD...


In [101]:
def clean_sequence(seq: str) -> str:
    if not isinstance(seq, str):
        return seq  # skip non-string entries safely
    lines = seq.splitlines()
    # drop FASTA headers and whitespace
    cleaned = "".join(line.strip() for line in lines if not line.startswith(">"))
    return cleaned

In [104]:
eom["sequence"] = eom["sequence"].apply(clean_sequence)
eom

Unnamed: 0,SASBDB_ID,uniprot_id,uniprot_range_first,uniprot_range_last,sequence
0,SASDBQ2,Q5U5U6,,,MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFA...
1,SASDB94,A0A1Z1SYD5,64.0,243.0,SNAQFRQALASEHDALYNDAASPRIGAKDAKLVLVSFTDYNCPYCK...
2,SASDBZ6,Q8NBI3,,,MAGPAIHTAPMLFLVLLLPLELSLAGALAPGTPARNLPENHIDLPG...
3,SASDBH8,O95786,239.0,925.0,MHHHHHHAAASPFKPRNYQLELALPAMKGKNTIICAPTGCGKTFVS...
4,SASDC23,P08083,1.0,387.0,MGSNGADNAHNNAFGGGKNPGIGNTSGAGSNGSASSNRGNSNGWSW...
...,...,...,...,...,...
245,SASDV46,P10636,244.0,372.0,MQTAPVPMPDLKNVKSKIGSTENLKHQPGGGKVQIINKKLDLSNVQ...
246,SASDVA8,Q4WF55,1.0,200.0,MGSSHHHHHHSSGENLYFQGHMATQSSTELPQINMTTAEPTSANKR...
247,SASDVC8,P0C1C6,200.0,310.0,MSYYHHHHHHLESTSLYKKAGFTPTEEPPVIPEYYYGSGRRGDLSK...
248,SASDVD8,P0C1C6,200.0,310.0,MSYYHHHHHHLESTSLYKKAGFTPTREPPVIPEYYYGSGDKGDLSD...


In [103]:
eom.to_csv("pro_monomer.csv", index=False)