In [1]:
import random
import threading
import time
from sqlalchemy import create_engine, Column, Integer, String, DateTime, func
from sqlalchemy.orm import sessionmaker, declarative_base
from sqlalchemy.exc import NoResultFound


In [2]:
#---- this seems to work!
import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
from contextlib import contextmanager

from sqlalchemy import create_engine, Column, Integer, String, Boolean, DateTime
from sqlalchemy.orm import sessionmaker, declarative_base
from sqlalchemy.exc import NoResultFound
from sqlalchemy.sql import text

from sqlalchemy import Index


# --- Setup the SQLite database & SQLAlchemy ORM ---

# Using a file-based SQLite database and allow multithreaded access:
engine = create_engine("sqlite:///test2.db", connect_args={"check_same_thread": False})
#engine = create_engine('postgresql://postgres@localhost:5333/ajtest1')
SessionLocal = sessionmaker(bind=engine)

Base = declarative_base()

class Tile(Base):
    __tablename__ = 'tiles'
    id = Column(Integer, primary_key=True)
    annotation_class_id = Column(Integer, default=1)  # for demo, all tiles use 1
    hasgt = Column(Boolean, default=True)
    datetime = Column(DateTime, default=datetime.datetime.utcnow)
    status = Column(String, default="pending")  # can be "pending", "in_progress", etc.
    worker_id = Column(Integer, nullable=True)    # which worker claimed it

    # Correct way to define indexes
    __table_args__ = (
        Index("idx_datetime", datetime),
        Index("idx_status", status),
    )
# Drop and recreate the table (for demo purposes)
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)

In [3]:
Tile.__table__.indexes

{Index('idx_datetime', Column('datetime', DateTime(), table=<tiles>, default=CallableColumnDefault(<function datetime.utcnow at 0x7f0de1906050>))),
 Index('idx_status', Column('status', String(), table=<tiles>, default=ScalarElementColumnDefault('pending')))}

In [4]:
# --- Populate the database with some tiles ---
with SessionLocal() as session:
    tiles = []
    for i in range(1_000):  # create tiles
        tile = Tile(
            annotation_class_id=1,
            hasgt=True,
            # Newer tiles have a more recent datetime:
            datetime=datetime.datetime.utcnow() - datetime.timedelta(seconds=i)
        )
        tiles.append(tile)
    session.add_all(tiles)
    session.commit()

In [5]:
# --- Utility to get a session ---
@contextmanager
def get_session():
    session = SessionLocal()
    try:
        yield session
        session.commit()
    except Exception as e:
        session.rollback()
        raise e
    finally:
        session.close()

In [9]:
from sqlalchemy.orm import Session
from sqlalchemy import update, select, text
from sqlalchemy.engine import Engine

def getWorkersTile(worker_id: int):
    """
    Atomically retrieves and marks a tile as 'in_progress' so no two workers claim the same tile.
    """
    with get_session() as db_session:  # Ensure this provides a session context
        dialect = db_session.bind.dialect.name  # Get database type
        with db_session.begin():  # Explicit transaction
            subquery = (
                select(Tile.id)
                .where(Tile.annotation_class_id == 1,
                       Tile.hasgt == True,
                       Tile.status == 'pending')
                .order_by(Tile.datetime.desc())
                .limit(2).with_for_update(skip_locked=True)
            )
            
            tiles = db_session.execute(
                update(Tile)
                .where(Tile.id.in_(subquery)) #.scalar_subquery())
                .where(Tile.status == 'pending')  # Ensures another worker hasn't claimed it
                .values(status='in_progress', worker_id=worker_id)
                .returning(Tile)
            ).fetchall()
            
            if tiles:
                return f"Worker {worker_id} claimed Tile {tiles[0]["id"]}"
            else:
                return f"Worker {worker_id} found no tile"



SyntaxError: f-string: unmatched '[' (2878868240.py, line 30)

In [10]:
# --- Worker function ---
def worker_function(worker_id):
    result = getWorkersTile(worker_id)
    print(result)
    return result

In [11]:
# --- Main function: spawn many workers concurrently ---
def main():
    num_workers = 200  # simulate an aggressive scenario with 200 concurrent workers
    results = []
    with ThreadPoolExecutor(max_workers=num_workers) as executor:
        futures = [executor.submit(worker_function, worker_id) for worker_id in range(num_workers)]
        for future in as_completed(futures):
            results.append(future.result())
    
    print("\nSummary:")
    claimed = [r for r in results if "claimed Tile" in r]
    for res in results:
        print(res)
    print(f"\nTotal claimed tiles: {len(claimed)}")

if __name__ == "__main__":
    main()

Worker 0 claimed Tile (<__main__.Tile object at 0x7f0de00c9750>,)
Worker 15 claimed Tile (<__main__.Tile object at 0x7f0de0391810>,)
Worker 55 claimed Tile (<__main__.Tile object at 0x7f0de03d3cd0>,)
Worker 9 claimed Tile (<__main__.Tile object at 0x7f0de00fc5b0>,)
Worker 18 claimed Tile (<__main__.Tile object at 0x7f0de01dfd60>,)
Worker 1 claimed Tile (<__main__.Tile object at 0x7f0de02842e0>,)
Worker 133 claimed Tile (<__main__.Tile object at 0x7f0de036b730>,)
Worker 11 claimed Tile (<__main__.Tile object at 0x7f0de03fc850>,)
Worker 190 claimed Tile (<__main__.Tile object at 0x7f0de18690f0>,)
Worker 21 claimed Tile (<__main__.Tile object at 0x7f0de01f23b0>,)
Worker 24 claimed Tile (<__main__.Tile object at 0x7f0de01f2710>,)
Worker 25 claimed Tile (<__main__.Tile object at 0x7f0de01f2380>,)
Worker 26 claimed Tile (<__main__.Tile object at 0x7f0de01f05e0>,)
Worker 27 claimed Tile (<__main__.Tile object at 0x7f0de186bd30>,)
Worker 28 claimed Tile (<__main__.Tile object at 0x7f0de18697b0

In [None]:

result = getWorkersTile(-1)

In [None]:
result