In [2]:
!pip install google-genai pandas dspy litellm



Import libraries and configure DSPy for LiteLLM

In [37]:
import dspy
import pandas as pd
import os
from collections import Counter
import io
from google.colab import userdata
import litellm
import time

Fetch the secret and set it as an environment variable

In [16]:
try:
    google_api_key = userdata.get('GEMINI_API_KEY')
    os.environ["GOOGLE_API_KEY"] = google_api_key
    MODEL_TO_USE = 'gemini/gemini-2.5-flash-lite-preview-06-17'
    print(" Configured to use Google Gemini via LiteLLM.")
except:
    print("Could not find GOOGLE_API_KEY. Trying other providers...")

 Configured to use Google Gemini via LiteLLM.


Configure DSPy to use the LiteLLM client

In [21]:
lm = dspy.LM(MODEL_TO_USE, api_key=google_api_key)
dspy.configure(lm=lm)
lm("which model are you?", temperature=0.7)
print(f"✅ DSPy configured successfully with LiteLLM using model: {MODEL_TO_USE}")

✅ DSPy configured successfully with LiteLLM using model: gemini/gemini-2.5-flash-lite-preview-06-17


Rate Limit Configuration
 - Gemini's free tier can have a 10 RPM (Requests Per Minute) limit

In [35]:
RPM_LIMIT = 10
RATE_LIMIT_DELAY = 60 / RPM_LIMIT  # This calculates to 6 seconds per request
print(f"Rate limit set to {RPM_LIMIT} RPM. A delay of {RATE_LIMIT_DELAY:.1f} seconds will be added after each API call.")

Rate limit set to 10 RPM. A delay of 6.0 seconds will be added after each API call.


Upload your CSV file

In [23]:
from google.colab import files
print("\nPlease upload your articles CSV file:")
uploaded = files.upload()
file_name = next(iter(uploaded))
print(f"\n✅ File '{file_name}' uploaded successfully.")


Please upload your articles CSV file:


Saving articles.csv to articles (1).csv

✅ File 'articles (1).csv' uploaded successfully.


Load and prepare your data

In [24]:
df_articles = pd.read_csv(io.BytesIO(uploaded[file_name]))
articles = [
    dspy.Example(
        article_title=row['title'],
        article_abstract=row['abstract']
    ).with_inputs('article_title', 'article_abstract')
    for index, row in df_articles.iterrows()
]
print(f"Loaded {len(articles)} articles to be screened.")

Loaded 89 articles to be screened.


Define your inclusion criteria




In [25]:
inclusion_criteria ="""
I am screening for a systematic review and meta-analysis on aortic valve replacement.
Please adhere strictly to the following criteria based on the study protocol.

**PICO Framework:**
*   **Population:** Adult patients with severe aortic stenosis classified as being at **LOW SURGICAL RISK** (e.g., STS score < 4%).
*   **Intervention:** Transcatheter Aortic Valve Replacement (TAVR or TAVI).
*   **Comparator:** Surgical Aortic Valve Replacement (SAVR). The study MUST be a direct comparison between TAVR and SAVR.
*   **Outcomes:** Must report on long-term (>=1 year) clinical outcomes such as mortality, stroke, reintervention, or MACCE.

**Inclusion Criteria:**
1.  **Study Design:** Must be a **Randomized Controlled Trial (RCT)**.
2.  **Population:** Must explicitly state that the patient cohort is **low-risk**.
3.  **Comparison:** Must compare TAVR directly against SAVR.
"""

Define your exclusion criteria

In [26]:
exclusion_criteria = """
1.  **Wrong Study Design:** Exclude ALL non-RCTs. This includes observational studies, cohort studies, registry analyses, case series, case reports, editorials, letters, and especially **systematic reviews or meta-analyses**.
2.  **Wrong Population:** Exclude studies focused on intermediate-risk or high-risk patients. Exclude pediatric studies or studies on conditions other than aortic stenosis.
3.  **Wrong Comparison:** Exclude studies that do not compare TAVR vs. SAVR (e.g., TAVR only, SAVR only, TAVR vs. medical therapy, comparisons between different TAVR devices).
4.  **Wrong Outcomes:** Exclude studies that only report on procedural details, imaging, or economic analyses without clinical outcomes.
5.  **Animal studies.**
6. **Non-English studies.**
"""

Training Examples to Match your Protocol

In [32]:
train_examples = [
    dspy.Example(
        article_title="Transcatheter versus Surgical Aortic Valve Replacement in Low-Risk Patients with Severe Aortic Stenosis: A Randomized Clinical Trial",
        article_abstract="This randomized controlled trial was conducted to compare the outcomes of transcatheter aortic valve replacement (TAVR) with those of surgical aortic valve replacement (SAVR) using tissue valves. We enrolled 1000 low-risk adult patients with severe, symptomatic aortic stenosis, defined by an STS-PROM score below 4%. The primary endpoint was a composite of mortality and stroke at 2 years.",
        inclusion_criteria=inclusion_criteria,
        exclusion_criteria=exclusion_criteria,
        reasoning="1. Study Design: The abstract clearly states it is a 'randomized controlled trial', meeting inclusion criterion 1. 2. Population: The abstract specifies 'low-risk adult patients with severe, symptomatic aortic stenosis' and an STS score below 4%, meeting inclusion criterion 2. 3. Intervention: The study evaluates TAVR, meeting inclusion criterion 3. 4. Comparator: The study compares TAVR against SAVR with 'tissue valves', meeting inclusion criterion 4. No exclusion criteria apply. This article must be included.",
        decision="Include"
    ).with_inputs('article_title', 'article_abstract'),

    dspy.Example(
        article_title="A Meta-Analysis of Long-Term Outcomes After Transcatheter Aortic Valve Replacement",
        article_abstract="We performed a comprehensive systematic review and meta-analysis to synthesize the available evidence on TAVR outcomes. Data from 30 studies, including both randomized trials and observational cohorts, were pooled to assess long-term mortality.",
        inclusion_criteria=inclusion_criteria,
        exclusion_criteria=exclusion_criteria,
        reasoning="1. Study Design: The title and abstract explicitly identify the study as a 'meta-analysis' and 'systematic review'. This meets exclusion criterion 1. Therefore, the article must be excluded.",
        decision="Exclude"
    ).with_inputs('article_title', 'article_abstract'),

    dspy.Example(
        article_title="Outcomes of TAVR in High-Risk and Inoperable Patients with Aortic Stenosis",
        article_abstract="This prospective cohort study followed 500 patients with severe aortic stenosis deemed to be at high-risk or inoperable for traditional surgery. All patients received TAVR and were followed for 5 years to assess survival and quality of life. There was no surgical comparison arm.",
        inclusion_criteria=inclusion_criteria,
        exclusion_criteria=exclusion_criteria,
        reasoning="1. Study Design: The abstract describes this as a 'prospective cohort study', which meets exclusion criterion 1. 2. Population: The study focuses on 'high-risk and inoperable patients', which meets exclusion criterion 2. 3. Comparator: The study is single-arm with 'no surgical comparison arm', which meets exclusion criterion 3. The article must be excluded for multiple reasons.",
        decision="Exclude"
    ).with_inputs('article_title', 'article_abstract')
]

Define and Compile the DSPy Program

In [33]:
class ScreeningSignature(dspy.Signature):
    """
    Screens a research article based on title and abstract against inclusion and exclusion criteria for a systematic review comparing TAVR and SAVR.
    Decides whether to 'Include' or 'Exclude' the article.
    """

    article_title = dspy.InputField(desc="The title of the research article.")
    article_abstract = dspy.InputField(desc="The abstract of the research article.")
    inclusion_criteria = dspy.InputField(desc="Criteria that must be met for inclusion.")
    exclusion_criteria = dspy.InputField(desc="Criteria that, if met, will lead to exclusion.")
    reasoning = dspy.OutputField(desc="A step-by-step analysis of the article against each criterion. Conclude with the final decision's justification.")
    decision = dspy.OutputField(desc="The final verdict, which must be exactly 'Include' or 'Exclude'.")

class ArticleScreener(dspy.Module):
    def __init__(self):
        super().__init__()
        self.screener = dspy.ChainOfThought(ScreeningSignature)

    def forward(self, article_title, article_abstract):
        return self.screener(
            article_title=article_title,
            article_abstract=article_abstract,
            inclusion_criteria=inclusion_criteria,
            exclusion_criteria=exclusion_criteria
        )

from dspy.teleprompt import BootstrapFewShot
teleprompter = BootstrapFewShot(metric=None, max_bootstrapped_demos=3)
compiled_screener = teleprompter.compile(ArticleScreener(), trainset=train_examples)
print("\n--- Program Compiled. Ready for Screening --- \n")

100%|██████████| 3/3 [00:03<00:00,  1.01s/it]

Bootstrapped 3 full traces after 2 examples for up to 1 rounds, amounting to 3 attempts.

--- Program Compiled. Ready for Screening --- 






Run the Screening and Save Results

In [38]:
NUMBER_OF_VOTERS = 3
results = []
from tqdm.auto import tqdm

for article in tqdm(articles, desc=f"Screening Articles with LiteLLM ({MODEL_TO_USE})"):
    votes, reasons = [], []
    for i in range(NUMBER_OF_VOTERS):
        try:
            # Each call to the screener is one API request
            prediction = compiled_screener(article.article_title, article.article_abstract)

            decision = "Include" if "include" in prediction.decision.lower() else "Exclude"
            votes.append(decision)
            reasons.append(prediction.reasoning)

        except Exception as e:
            print(f"Error screening article '{article.article_title}' (Voter {i+1}): {e}")
            votes.append("Error")
            reasons.append(f"Failed due to error: {e}")

        # --- THIS IS THE RATE LIMITING LOGIC ---
        # Pause after every request to stay under the RPM limit.
        time.sleep(RATE_LIMIT_DELAY)

    vote_counts = Counter(votes)
    include_votes = vote_counts.get("Include", 0)
    final_decision = "Include" if include_votes > (NUMBER_OF_VOTERS / 2) else "Exclude"

    results.append({
        'title': article.article_title,
        'abstract': article.article_abstract,
        'final_decision': final_decision,
        'include_percentage': f"{(include_votes / NUMBER_OF_VOTERS) * 100:.2f}%",
        'include_votes': include_votes,
        'exclude_votes': vote_counts.get("Exclude", 0),
        'error_votes': vote_counts.get("Error", 0),
        'all_reasons': " | ".join([f"Voter {_ + 1}: {r}" for _, r in enumerate(reasons)])
    })

# --- Save and display results (unchanged) ---
df_results = pd.DataFrame(results)
df_results = df_results.sort_values(by='include_votes', ascending=False)
from IPython.display import display
print("\n--- Screening Complete. Results: ---")
display(df_results[['title', 'final_decision', 'include_percentage', 'include_votes']])
output_filename = 'screening_results_TAVR_protocol.csv'
df_results.to_csv(output_filename, index=False)
print(f"\n✅ Results saved to '{output_filename}'.")

Screening Articles with LiteLLM (gemini/gemini-2.5-flash-lite-preview-06-17):   0%|          | 0/89 [00:00<?, …


--- Screening Complete. Results: ---


Unnamed: 0,title,final_decision,include_percentage,include_votes
0,5-Year Outcomes After Transcatheter or Surgica...,Include,100.00%,3
1,Transcatheter aortic valve implantation in low...,Include,100.00%,3
2,Transcatheter or surgical aortic valve implant...,Include,100.00%,3
3,Transcatheter Aortic-Valve Replacement in Low-...,Include,100.00%,3
4,3-Year Outcomes After Transcatheter or Surgica...,Include,100.00%,3
...,...,...,...,...
68,Echocardiographic changes in right ventricular...,Exclude,0.00%,0
65,Two-year outcomes from the PARTNER 3 trial: wh...,Exclude,0.00%,0
64,Hemodynamic consequences following surgical ao...,Exclude,0.00%,0
62,Aortic Valve Replacement in Low-Risk Patients ...,Exclude,0.00%,0



✅ Results saved to 'screening_results_TAVR_protocol.csv'.



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.




Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.



Create a table using the csv generated

In [42]:
# Create a table using the csv generated
from google.colab import data_table

# Load the results from the CSV file back into a DataFrame
df_results_from_csv = pd.read_csv(output_filename)

# Display the DataFrame as an interactive data table
print("\n--- Interactive Screening Results Table ---")
data_table.enable_dataframe_formatter() # Ensure the formatter is enabled
display_columns = [
    'title',
    'final_decision',
    'include_percentage',
    'include_votes',
    'exclude_votes',
    'error_votes'
    # Exclude 'abstract' and 'all_reasons' which can be very long
]
existing_display_columns = [col for col in display_columns if col in df_results_from_csv.columns]

if not existing_display_columns:
    print("Warning: None of the selected display columns were found in the DataFrame. Displaying all columns.")
    display(df_results_from_csv)
else:
    display(df_results_from_csv[existing_display_columns])

print("--- Interactive Screening Results Table Displayed ---")



--- Interactive Screening Results Table ---


Unnamed: 0,title,final_decision,include_percentage,include_votes,exclude_votes,error_votes
0,5-Year Outcomes After Transcatheter or Surgica...,Include,100.00%,3,0,0
1,Transcatheter aortic valve implantation in low...,Include,100.00%,3,0,0
2,Transcatheter or surgical aortic valve implant...,Include,100.00%,3,0,0
3,Transcatheter Aortic-Valve Replacement in Low-...,Include,100.00%,3,0,0
4,3-Year Outcomes After Transcatheter or Surgica...,Include,100.00%,3,0,0
...,...,...,...,...,...,...
84,Echocardiographic changes in right ventricular...,Exclude,0.00%,0,3,0
85,Two-year outcomes from the PARTNER 3 trial: wh...,Exclude,0.00%,0,3,0
86,Hemodynamic consequences following surgical ao...,Exclude,0.00%,0,3,0
87,Aortic Valve Replacement in Low-Risk Patients ...,Exclude,0.00%,0,3,0


--- Interactive Screening Results Table Displayed ---
