This code was copied from: https://docs.immport.org/apidocumentation/
Watch full tutorial at: Link soon to be posted
Learn what an API is: https://aws.amazon.com/what-is/api/

In [9]:
import sys
import requests
import json
import platform
import pandas as pd
from io import StringIO
import time
API_ENDPOINT_BASE_URL = "https://www.immport.org"
DATA_QUERY_URL = API_ENDPOINT_BASE_URL + "/data/query"
ASPERA_TOKEN_URL = API_ENDPOINT_BASE_URL + "/data/download/token"
IMMPORT_TOKEN_URL = "https://www.immport.org/auth/token"


def request_immport_token(immport_token_url, username, password):
    '''Request an ImmPort token

       :param username: ImmPort user name.
       :param password: ImmPort user password.

       return immport_token
    '''
    r = requests.post(immport_token_url,
                  data={'username': username, 'password': password})
    if r.status_code == 200:
        return r.json()['access_token']
    else:
        return None


def api_data_query(username, password, endpoint, immport_token_url, token=True, format="json"):
    '''Use the Data Query API by first checking ImmPort user credentials,
       retrieving an ImmPort token, then call the API endpoint to retrieve the results.

       param user_name: ImmPort username.
       param password: ImmPort password.
       param endpoint: Data Query endpoint
       param token: Indicates if endpoint requires a token
       param format: String (json, tsv)


       return: results or None
    '''
    headers = {}
    results = None

    if token:
        immport_token = request_immport_token(immport_token_url, username, password)
        if immport_token is None:
             print("ERROR: Credentials incorrect for ImmPort, unable to retrieve token", file = sys.stderr)
             return None
        if format == "json":
            headers = {
                'Authorization': "bearer " + immport_token,
                'Content-Type': "application/json"
            }
        else:
            headers = {
                'Authorization': "bearer " + immport_token,
                'Content-Type': "text/plain"
            }

    else:
        if format == "json":
            headers = {
                'Content-Type': "application/json"
            }
        else:
            headers = {
                'Content-Type': "text/plain"
            }

    results = requests.get(endpoint, headers=headers)
    if results.status_code == 200:
        if results is None:
             print("ERROR: API Endpoint failed to return results", file = sys.stderr)
             return None
        if format == "json":
            return results.json()
        else:
            return results.text
    else:
        print("ERROR: API Status Code: " + str(results.status_code), file = sys.stderr)
        return None

In [2]:
#Replace your own username and password here
username="TEST"
password="TEST"

In [10]:
#What studies do you want information from?
hlthyelisa = ['SDY224', 'SDY396', 'SDY564']

In [11]:
# Initialize the results DataFrame
elisaf_df = pd.DataFrame()

# File path to save the CSV
csv_file_path = r'ELISATUTORIAL.csv'

# Save an empty DataFrame to initialize the CSV
elisaf_df.to_csv(csv_file_path, index=False)

# Collect results in a list
results_list = []

In [12]:
# Track time for each study
total_studies = len(hlthyelisa)
start_time = time.time()
no_results_studies = []  # List to track study accessions with no results

# Loop through each study accession number
for i, study_accession in enumerate(hlthyelisa):
    study_start_time = time.time()
    endpoint = f"{DATA_QUERY_URL}/result/elisa?studyAccession={study_accession}&format=json"
    print(f"Loading data for {study_accession}...")
    results = api_data_query(username, password, endpoint, IMMPORT_TOKEN_URL, True, "json")
    
    if results:
        for result in results:
            result['study_accession'] = study_accession  # Add study accession to the result
            results_list.append(result)
    else:
        print(f"No results for study accession {study_accession}")
        no_results_studies.append(study_accession)

    # Convert the results list to a DataFrame
    elisaf_df = pd.DataFrame(results_list)
    
    # Save the updated DataFrame to the CSV file
    elisaf_df.to_csv(csv_file_path, index=False)

    # Calculate and print timing information
    study_end_time = time.time()
    study_time_taken = study_end_time - study_start_time
    elapsed_time = study_end_time - start_time
    remaining_studies = total_studies - (i + 1)
    estimated_time_remaining = (elapsed_time / (i + 1)) * remaining_studies
    
    print(f"Time taken for {study_accession}: {study_time_taken:.2f} seconds")
    print(f"Estimated time remaining: {estimated_time_remaining:.2f} seconds")

# Display the first few rows of the resulting DataFrame
elisaf_df.head(2)

if not no_results_studies:
    print("All studies returned results")
else:
    # Print the list of study accessions with no results
    print("Studies with no results:", no_results_studies)


Loading data for SDY224...
Time taken for SDY224: 10.10 seconds
Estimated time remaining: 20.19 seconds
Loading data for SDY396...
Time taken for SDY396: 10.58 seconds
Estimated time remaining: 10.34 seconds
Loading data for SDY564...
Time taken for SDY564: 9.78 seconds
Estimated time remaining: 0.00 seconds
All studies returned results


In [13]:
elisaf_df.head(2)

Unnamed: 0,resultId,ageEvent,ageEventSpecify,ageUnit,ancestralPopulation,analyteAccession,analytePreferred,analyteReported,armAccession,armName,...,subjectAccession,subjectPhenotype,unitPreferred,unitReported,valuePreferred,valueReported,treatmentAccession,studyTimeT0EventSpecify,studyTimeT0Event,study_accession
0,184478,Age at enrollment,,Years,,,,IgA,ARM926,TIV 2010,...,SUB114672,Subjects receiving TIV seasonal vaccine in the...,ng/ml,ng/ml,20635.3,20635.34198,TRT831,,Time of initial vaccine administration,SDY224
1,184479,Age at enrollment,,Years,,,,IgG,ARM926,TIV 2010,...,SUB114672,Subjects receiving TIV seasonal vaccine in the...,ng/ml,ng/ml,117373.0,117372.6129,TRT831,,Time of initial vaccine administration,SDY224


In [14]:
file_pathZ = r'All_Studies.xlsx'
studies_df = pd.read_excel(file_pathZ, sheet_name='Studies')

# Perform the merge again with correct column names and avoid dropping the study_accession column
merged_df = elisaf_df.merge(studies_df[['study_accession', 'condition_or_disease']], 
                          left_on='studyAccession', 
                          right_on='study_accession', 
                          how='left')

# Drop the duplicate column created by the merge
merged_df = merged_df.drop(columns=['study_accession_y'])
# Rename the column study_accession_x to study_accession
merged_df = merged_df.rename(columns={'study_accession_x': 'study_accession'})
merged_df.head(2)

Unnamed: 0,resultId,ageEvent,ageEventSpecify,ageUnit,ancestralPopulation,analyteAccession,analytePreferred,analyteReported,armAccession,armName,...,subjectPhenotype,unitPreferred,unitReported,valuePreferred,valueReported,treatmentAccession,studyTimeT0EventSpecify,studyTimeT0Event,study_accession,condition_or_disease
0,184478,Age at enrollment,,Years,,,,IgA,ARM926,TIV 2010,...,Subjects receiving TIV seasonal vaccine in the...,ng/ml,ng/ml,20635.3,20635.34198,TRT831,,Time of initial vaccine administration,SDY224,influenza
1,184479,Age at enrollment,,Years,,,,IgG,ARM926,TIV 2010,...,Subjects receiving TIV seasonal vaccine in the...,ng/ml,ng/ml,117373.0,117372.6129,TRT831,,Time of initial vaccine administration,SDY224,influenza


Unnamed: 0,resultId,ageEvent,ageEventSpecify,ageUnit,ancestralPopulation,analyteAccession,analytePreferred,analyteReported,armAccession,armName,...,subjectPhenotype,unitPreferred,unitReported,valuePreferred,valueReported,treatmentAccession,studyTimeT0EventSpecify,studyTimeT0Event,study_accession,condition_or_disease
0,184478,Age at enrollment,,Years,,,,IgA,ARM926,TIV 2010,...,Subjects receiving TIV seasonal vaccine in the...,ng/ml,ng/ml,20635.3,20635.34198,TRT831,,Time of initial vaccine administration,SDY224,influenza
1,184479,Age at enrollment,,Years,,,,IgG,ARM926,TIV 2010,...,Subjects receiving TIV seasonal vaccine in the...,ng/ml,ng/ml,117373.0,117372.6129,TRT831,,Time of initial vaccine administration,SDY224,influenza


In [15]:
# Save the merged dataframe to a new Excel file
output_file_pathx = r'ELISATUTLABELED.xlsx'
merged_df.to_excel(output_file_pathx, index=False)