In [12]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
from typing import List, Dict

In [13]:
base_url = "https://techfinder.stanford.edu/"
response = requests.get(base_url)
soup = BeautifulSoup(response.content, 'html.parser')

In [14]:
def get_all_titles():
    response = requests.get(base_url)
    soup = BeautifulSoup(response.content, 'html.parser')
    titles = soup.find_all('h3', class_='teaser__title')
    return [title.text.strip() for title in titles]

print(get_all_titles())

['An Adjustable Mitral Ring for Asymmetric and Continuous Off-Bypass Annulus Reduction', 'Anatomically conforming device for tricuspid valve annuloplasty', 'Diode-pumped photonic integrated titanium-sapphire waveguide amplifier', 'Wafer-Scale Thin-Film Titanium:Sapphire Photonics', 'Hypoxia-mediated resistance of Homologous recombination-deficient cancer cells to PARP inhibitors', 'Optoelectronic orchestrated microdroplet reactors for solid-phase reactions', 'Compositions and Methods Related to Coronavirus Therapies', 'Genetically Encoded Lysosome Targeting Chimeras for Cell-mediated Delivery', '3D Printing of Organoid Slurries', '3D Heat Spreading (Heat-plating) from Semiconductor Devices', 'Generation and tracking of cells with precise edits', 'Improved cfDNA methylation profiling through correction of misrepaired jagged-ends', 'B7H3-Targeting Peptides', 'Efficient wide-field nanosecond imaging methods using Pockels cells for low-light applications', 'Wide-field Resonant Electro-opti

In [15]:
def get_all_pages_soups(limit: int = 130):
    num_pages = min(limit, 130)
    soups = []
    for page in range(0, num_pages):
        url = f"{base_url}?page={page}"
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        soups.append(soup)
    return soups

def get_titles_from_page_soup(soup):
    titles = soup.find_all('h3', class_='teaser__title')
    return [title.text.strip() for title in titles]
        
all_pages_soups = get_all_pages_soups(1)

In [16]:
for soup in all_pages_soups:
    for title in get_titles_from_page_soup(soup):
        print(title)


An Adjustable Mitral Ring for Asymmetric and Continuous Off-Bypass Annulus Reduction
Anatomically conforming device for tricuspid valve annuloplasty
Diode-pumped photonic integrated titanium-sapphire waveguide amplifier
Wafer-Scale Thin-Film Titanium:Sapphire Photonics
Hypoxia-mediated resistance of Homologous recombination-deficient cancer cells to PARP inhibitors
Optoelectronic orchestrated microdroplet reactors for solid-phase reactions
Compositions and Methods Related to Coronavirus Therapies
Genetically Encoded Lysosome Targeting Chimeras for Cell-mediated Delivery
3D Printing of Organoid Slurries
3D Heat Spreading (Heat-plating) from Semiconductor Devices
Generation and tracking of cells with precise edits
Improved cfDNA methylation profiling through correction of misrepaired jagged-ends
B7H3-Targeting Peptides
Efficient wide-field nanosecond imaging methods using Pockels cells for low-light applications
Wide-field Resonant Electro-optic Imaging Devices and Applications


In [17]:
def get_links_from_page_soup(soup):
    links = [base_url[:-1] + title.find('a')['href'] for title in soup.find_all('h3', class_='teaser__title')]
    return links

all_pages_soups = get_all_pages_soups(1)
for soup in all_pages_soups:
    for link in get_links_from_page_soup(soup):
        print(link)

https://techfinder.stanford.edu/technology/adjustable-mitral-ring-asymmetric-and-continuous-bypass-annulus-reduction
https://techfinder.stanford.edu/technology/anatomically-conforming-device-tricuspid-valve-annuloplasty
https://techfinder.stanford.edu/technology/diode-pumped-photonic-integrated-titanium-sapphire-waveguide-amplifier
https://techfinder.stanford.edu/technology/wafer-scale-thin-film-titaniumsapphire-photonics
https://techfinder.stanford.edu/technology/hypoxia-mediated-resistance-homologous-recombination-deficient-cancer-cells-parp
https://techfinder.stanford.edu/technology/optoelectronic-orchestrated-microdroplet-reactors-solid-phase-reactions
https://techfinder.stanford.edu/technology/compositions-and-methods-related-coronavirus-therapies
https://techfinder.stanford.edu/technology/genetically-encoded-lysosome-targeting-chimeras-cell-mediated-delivery
https://techfinder.stanford.edu/technology/3d-printing-organoid-slurries
https://techfinder.stanford.edu/technology/3d-heat

In [18]:
def get_subpage_soup(link: str):
    response = requests.get(link)
    soup = BeautifulSoup(response.content, 'html.parser')
    return soup

all_links = []
for soup in all_pages_soups:
    for link in get_links_from_page_soup(soup):
        all_links.append(link)


In [25]:

first_link = all_links[1]
first_soup = get_subpage_soup(first_link)

def get_descriptions_applications_advantages(subpage_soup: BeautifulSoup):
    # Get applications and advantages
    applications = [li.get_text() for li in subpage_soup.find('h2', string="Applications").find_next('ul').find_all('li')]
    advantages = [li.get_text() for li in subpage_soup.find('h2', string="Advantages").find_next('ul').find_all('li')]
    
    # Get descriptions
    descriptions = [para.get_text().strip() for para in subpage_soup.find('div', class_='docket__text').find_all('p')]
    full_description = " ".join(descriptions)

    # Construct the final paragraph with applications and advantages
    full_paragraph = f"{full_description} Applications include {', '.join(applications)}. Advantages of the device are {', '.join(advantages)}."

    return full_paragraph

print(get_descriptions_applications_advantages(first_soup))

Stanford researchers have designed a novel tricuspid annuloplasty ring that minimizes the risk of interrupting cardiac conduction during implantation. Tricuspid regurgitation is a prevalent heart condition in which the tricuspid valve fails to close completely during ventricular contraction. The disrupted unidirectional blood flow could lead to abnormal pressure or volume overload. Patients with moderate to severe cases may require annuloplasty, where a ring device is implanted over the annulus around the tricuspid valve to support valve leaflets. While securing the device, sutures often inadvertently cross the bundle of His in the septum, disrupting heart conduction. To avoid this complication, there is a critical need for alternate annuloplasty devices. Researchers in the Woo Lab at Stanford have created a modular annuloplasty device that avoids suture placement in conduction tissue. Its most basic form is a C-shaped band with a wide gap between the two ends to avoid traversing over 