In [None]:
from bs4 import BeautifulSoup
import requests
from neo4j import GraphDatabase
import re
import time
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Neo4j connection details
URI = "neo4j://localhost:7687"
USERNAME = "neo4j"
PASSWORD = os.getenv("NEO4J_PASSWORD") 
driver = GraphDatabase.driver(URI, auth=(USERNAME, PASSWORD))


In [2]:
init_time = time.time()
with driver.session() as session:

    for i in range(1, 10):  # Start from 1 and go up to 1000

        sequence_number = f"A{i:06d}"  # Format number with leading zeros (6 digits)
        print(sequence_number)



        url = "https://oeis.org/"+sequence_number
        print(url)
        response = requests.get(url)
        soup = BeautifulSoup(response.content, "html.parser")

        seqname = soup.find("div", class_="seqname")
        seqdata = soup.find("div", class_="seqdata")

        # Extract and print text
        seq_name = seqname.get_text(strip=True) if seqname else "No seqname found"
        seq_data = seqdata.get_text(strip=True) if seqdata else "No seqdata found"
        # print(seqname)
        # print(seqdata)
        
        sections = soup.find_all("div", class_="section")

        # ---------------------------------------------------------------------------
        for section in sections:
            sectname = section.find("div", class_="sectname").get_text(strip=True)
            # print(f"Section Name: {sectname}")
            
            sectbody = section.find("div", class_="sectbody")
            
            links = sectbody.find_all("a")  # Find all <a> tags inside sectbody
        
            for link in links:
                link_text = link.get_text(strip=True)  # Extract the visible text of the link
                href = link.get("href")  # Extract the URL
                title = link.get("title", "--------------------------------")
                # print(f"  Link Text: {link_text}. URL: {href}, TITLE: {title}")
                # add_node_with_relation(from_id, link_text, elements, sectname, title, href)
                
                session.run("""
                    MERGE (from:Sequence {id: $sequence_number, seq_name: $seq_name, seq_data: $seq_data})
                    MERGE (to:Sequence {id: $to_id})
                    SET to.from_title = $title
                    MERGE (from)-[r:`%s`]->(to)
                    """ % sectname,
                    sequence_number=sequence_number,
                    to_id=link_text,
                    title=title,
                    seq_name=seq_name, 
                    seq_data=seq_data
                )
            # print(f"  URL: {href}")
print(time.time()-init_time, "secs")


A000001
https://oeis.org/A000001
A000002
https://oeis.org/A000002
A000003
https://oeis.org/A000003
A000004
https://oeis.org/A000004
A000005
https://oeis.org/A000005
A000006
https://oeis.org/A000006
A000007
https://oeis.org/A000007
A000008
https://oeis.org/A000008
A000009
https://oeis.org/A000009
10.665527105331421 secs
