In [1]:
import random
import threading
import time
from sqlalchemy import create_engine, Column, Integer, String, DateTime, func
from sqlalchemy.orm import sessionmaker, declarative_base
from sqlalchemy.exc import NoResultFound


In [None]:

# # Define database and ORM setup
# Base = declarative_base()
# engine = create_engine("sqlite:///tiles2.db", echo=False)
# SessionLocal = sessionmaker(bind=engine)


In [None]:

# class Tile(Base):
#     __tablename__ = "tiles"

#     id = Column(Integer, primary_key=True, autoincrement=True)
#     status = Column(String, default="pending")  # pending, in_progress, done
#     worker_id = Column(Integer, nullable=True)
#     datetime = Column(DateTime, server_default=func.now())  # Simulate new tiles

# # Create the table
# Base.metadata.create_all(engine)

# # Populate the database with 1,000 pending tiles
# def populate_database():
#     with SessionLocal() as session:
#         if session.query(Tile).count() == 0:  # Prevent duplicate runs
#             tiles = [Tile(status="pending") for _ in range(10)]
#             session.add_all(tiles)
#             session.commit()
#             print("Database populated with 1,000 tiles.")

# populate_database()


In [7]:
#---- this seems to work!
import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
from contextlib import contextmanager

from sqlalchemy import create_engine, Column, Integer, String, Boolean, DateTime
from sqlalchemy.orm import sessionmaker, declarative_base
from sqlalchemy.exc import NoResultFound
from sqlalchemy.sql import text

# --- Setup the SQLite database & SQLAlchemy ORM ---

# Using a file-based SQLite database and allow multithreaded access:
#engine = create_engine("sqlite:///test5.db", connect_args={"check_same_thread": False})
engine = create_engine('postgresql://postgres@localhost:5333/test_tile')
SessionLocal = sessionmaker(bind=engine)

Base = declarative_base()

class Tile(Base):
    __tablename__ = 'tiles'
    id = Column(Integer, primary_key=True)
    annotation_class_id = Column(Integer, default=1)  # for demo, all tiles use 1
    hasgt = Column(Boolean, default=True)
    datetime = Column(DateTime, default=datetime.datetime.utcnow)
    status = Column(String, default="pending")  # can be "pending", "in_progress", etc.
    worker_id = Column(Integer, nullable=True)    # which worker claimed it

# Drop and recreate the table (for demo purposes)
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)

# --- Populate the database with some tiles ---
with SessionLocal() as session:
    tiles = []
    for i in range(100):  # create 50 tiles
        tile = Tile(
            annotation_class_id=1,
            hasgt=True,
            # Newer tiles have a more recent datetime:
            datetime=datetime.datetime.utcnow() - datetime.timedelta(seconds=i)
        )
        tiles.append(tile)
    session.add_all(tiles)
    session.commit()

# --- Utility to get a session ---
@contextmanager
def get_session():
    session = SessionLocal()
    try:
        yield session
        session.commit()
    except Exception as e:
        session.rollback()
        raise e
    finally:
        session.close()

# --- The tile-claiming function ---
def getWorkersTile(worker_id):
    """
    Atomically retrieves and marks a tile as 'in_progress' so no two workers claim the same tile.
    
    Since SQLite does not support row-level locking (FOR UPDATE SKIP LOCKED), we use an atomic update query.
    """
    with get_session() as db_session:
        # Atomically select and update a tile in one query
        tile = db_session.execute(
            text("""
                UPDATE tiles 
                SET status = 'in_progress', worker_id = :worker_id 
                WHERE id = (
                    SELECT id FROM tiles 
                    WHERE annotation_class_id = 1 
                    AND hasgt = True 
                    AND status = 'pending' 
                    ORDER BY datetime DESC 
                    LIMIT 1
                )
                RETURNING id, annotation_class_id, hasgt, datetime, status, worker_id
            """),
            {"worker_id": worker_id}
        ).fetchone()

        if tile:
            return f"Worker {worker_id} claimed Tile {tile[0]}"
        else:
            return f"Worker {worker_id} found no tile"

# --- Worker function ---
def worker_function(worker_id):
    result = getWorkersTile(worker_id)
    print(result)
    return result

# --- Main function: spawn many workers concurrently ---
def main():
    num_workers = 200  # simulate an aggressive scenario with 200 concurrent workers
    results = []
    with ThreadPoolExecutor(max_workers=num_workers) as executor:
        futures = [executor.submit(worker_function, worker_id) for worker_id in range(num_workers)]
        for future in as_completed(futures):
            results.append(future.result())
    
    print("\nSummary:")
    claimed = [r for r in results if "claimed Tile" in r]
    for res in results:
        print(res)
    print(f"\nTotal claimed tiles: {len(claimed)}")

if __name__ == "__main__":
    main()

Worker 0 claimed Tile 1
Worker 7 claimed Tile 2
Worker 1 claimed Tile 2
Worker 2 claimed Tile 3
Worker 4 claimed Tile 2
Worker 6 claimed Tile 3
Worker 9 claimed Tile 4
Worker 3 claimed Tile 2
Worker 12 claimed Tile 4
Worker 20 claimed Tile 5
Worker 11 claimed Tile 4
Worker 8 claimed Tile 3
Worker 19 claimed Tile 6
Worker 18 claimed Tile 5
Worker 27 claimed Tile 6
Worker 16 claimed Tile 4
Worker 13 claimed Tile 3
Worker 17 claimed Tile 5
Worker 10 claimed Tile 4
Worker 29 claimed Tile 6
Worker 5 claimed Tile 3
Worker 31 claimed Tile 7
Worker 14 claimed Tile 5
Worker 25 claimed Tile 7
Worker 39 claimed Tile 8
Worker 15 claimed Tile 5
Worker 26 claimed Tile 7
Worker 43 claimed Tile 9
Worker 41 claimed Tile 8
Worker 22 claimed Tile 5
Worker 32 claimed Tile 10
Worker 30 claimed Tile 7
Worker 53 claimed Tile 9
Worker 21 claimed Tile 5
Worker 36 claimed Tile 7
Worker 37 claimed Tile 8
Worker 58 claimed Tile 11
Worker 46 claimed Tile 9
Worker 35 claimed Tile 10
Worker 24 claimed Tile 7
Worker 

In [None]:

# # Function for a worker to claim tiles
# def worker_process(worker_id):
#     with SessionLocal() as session:
#         while True:
#             try:
#                 # Atomically select a tile and mark it as in_progress
#                 tile = (
#                     session.query(Tile)
#                     .filter(Tile.status == "pending")
#                     .order_by(Tile.datetime.desc())  # Prioritize newer tiles
#                     .with_for_update(skip_locked=True)  # Avoid race conditions
#                     .first()
#                 )

#                 if not tile:
#                     print(f"Worker {worker_id}: No more tiles left.")
#                     break  # Exit if no tiles available

#                 # Mark tile as in_progress
#                 tile.status = "in_progress"
#                 tile.worker_id = worker_id
#                 session.commit()
#                 print(f"Worker {worker_id} claimed tile {tile.id}")

#                 # Simulate work
#                 time.sleep(random.uniform(0.01, 0.1))  # Random small delay

#                 # Mark tile as done
#                 tile.status = "done"
#                 session.commit()
#                 print(f"Worker {worker_id} completed tile {tile.id}")

#             except NoResultFound:
#                 break  # No more tiles to process

# # Launch 100+ aggressive workers
# threads = []
# num_workers = 100  # Adjust for more/less workers
# for i in range(num_workers):
#     t = threading.Thread(target=worker_process, args=(i,))
#     threads.append(t)
#     t.start()

# # Wait for all threads to finish
# for t in threads:
#     t.join()

# print("All tiles processed.")


In [None]:
# import datetime
# from concurrent.futures import ThreadPoolExecutor, as_completed
# from contextlib import contextmanager

# from sqlalchemy import create_engine, Column, Integer, String, Boolean, DateTime
# from sqlalchemy.orm import sessionmaker, declarative_base
# from sqlalchemy.exc import NoResultFound

# # --- Setup the SQLite database & SQLAlchemy ORM ---

# # Using a file-based SQLite database and allow multithreaded access:
# engine = create_engine("sqlite:///test11.db", connect_args={"check_same_thread": False})
# SessionLocal = sessionmaker(bind=engine)

# Base = declarative_base()

# class Tile(Base):
#     __tablename__ = 'tiles'
#     id = Column(Integer, primary_key=True)
#     annotation_class_id = Column(Integer, default=1)  # for demo, all tiles use 1
#     hasgt = Column(Boolean, default=True)
#     datetime = Column(DateTime, default=datetime.datetime.utcnow)
#     status = Column(String, default="pending")  # can be "pending", "in_progress", etc.
#     worker_id = Column(Integer, nullable=True)    # which worker claimed it

# # Drop and recreate the table (for demo purposes)
# Base.metadata.drop_all(engine)
# Base.metadata.create_all(engine)

# # --- Populate the database with some tiles ---
# with SessionLocal() as session:
#     tiles = []
#     for i in range(50):  # create 50 tiles
#         tile = Tile(
#             annotation_class_id=1,
#             hasgt=True,
#             # Newer tiles have a more recent datetime:
#             datetime=datetime.datetime.utcnow() - datetime.timedelta(seconds=i)
#         )
#         tiles.append(tile)
#     session.add_all(tiles)
#     session.commit()

# # --- Utility to get a session ---
# @contextmanager
# def get_session():
#     session = SessionLocal()
#     try:
#         yield session
#         session.commit()
#     except Exception as e:
#         session.rollback()
#         raise e
#     finally:
#         session.close()

# # --- The tile-claiming function ---
# def getWorkersTile(worker_id):
#     """
#     Atomically retrieves and marks a tile as 'in_progress', so no two workers claim the same tile.
    
#     For SQLite, note that with_for_update is not supported, so this is just for demonstration.
#     """
#     with get_session() as db_session:
#         # Select the latest available tile (pending)
#         tile = (
#             db_session.query(Tile)
#             .filter(Tile.annotation_class_id == 1,
#                     Tile.hasgt == True,
#                     Tile.status == "pending")
#             .order_by(Tile.datetime.desc())
#             # .with_for_update(skip_locked=True)  <-- Not supported by SQLite
#             .first()
#         )
#         if tile:
#             # Claim the tile by marking it as in progress
#             tile.status = "in_progress"
#             tile.worker_id = worker_id
#             db_session.commit()  # persist the update
#             return tile
#         else:
#             return None

# # --- Worker function ---
# def worker_function(worker_id):
#     tile = getWorkersTile(worker_id)
#     if tile:
#         result = f"Worker {worker_id} claimed Tile {tile.id}"
#     else:
#         result = f"Worker {worker_id} found no tile"
#     print(result)
#     return result

# # --- Main function: spawn many workers concurrently ---

# num_workers = 200  # simulate an aggressive scenario with 200 concurrent workers
# results = []
# with ThreadPoolExecutor(max_workers=num_workers) as executor:
#     futures = [executor.submit(worker_function, worker_id) for worker_id in range(num_workers)]
#     for future in as_completed(futures):
#         results.append(future.result())

# print("\nSummary:")
# claimed = [r for r in results if "claimed Tile" in r]
# for res in results:
#     print(res)
# print(f"\nTotal claimed tiles: {len(claimed)}")



In [None]:
# import datetime
# from concurrent.futures import ThreadPoolExecutor, as_completed
# from contextlib import contextmanager

# from sqlalchemy import create_engine, Column, Integer, String, Boolean, DateTime
# from sqlalchemy.orm import sessionmaker, declarative_base
# from sqlalchemy.exc import NoResultFound

# # --- Setup the SQLite database & SQLAlchemy ORM ---

# # Using a file-based SQLite database and allow multithreaded access:
# engine = create_engine("sqlite:///test.db", connect_args={"check_same_thread": False})
# SessionLocal = sessionmaker(bind=engine)

# Base = declarative_base()

# class Tile(Base):
#     __tablename__ = 'tiles'
#     id = Column(Integer, primary_key=True)
#     annotation_class_id = Column(Integer, default=1)  # for demo, all tiles use 1
#     hasgt = Column(Boolean, default=True)
#     datetime = Column(DateTime, default=datetime.datetime.utcnow)
#     status = Column(String, default="pending")  # can be "pending", "in_progress", etc.
#     worker_id = Column(Integer, nullable=True)    # which worker claimed it

# # Drop and recreate the table (for demo purposes)
# Base.metadata.drop_all(engine)
# Base.metadata.create_all(engine)

# # --- Populate the database with some tiles ---
# with SessionLocal() as session:
#     tiles = []
#     for i in range(50):  # create 50 tiles
#         tile = Tile(
#             annotation_class_id=1,
#             hasgt=True,
#             # Newer tiles have a more recent datetime:
#             datetime=datetime.datetime.utcnow() - datetime.timedelta(seconds=i)
#         )
#         tiles.append(tile)
#     session.add_all(tiles)
#     session.commit()

# # --- Utility to get a session ---
# @contextmanager
# def get_session():
#     session = SessionLocal()
#     try:
#         yield session
#         session.commit()
#     except Exception as e:
#         session.rollback()
#         raise e
#     finally:
#         session.close()

# # --- The tile-claiming function ---
# def getWorkersTile(worker_id):
#     """
#     Atomically retrieves and marks a tile as 'in_progress', so no two workers claim the same tile.
    
#     For SQLite, note that with_for_update is not supported, so this is just for demonstration.
#     """
#     with get_session() as db_session:
#         # Select the latest available tile (pending)
#         tile = (
#             db_session.query(Tile)
#             .filter(Tile.annotation_class_id == 1,
#                     Tile.hasgt == True,
#                     Tile.status == "pending")
#             .order_by(Tile.datetime.desc())
#             # .with_for_update(skip_locked=True)  <-- Not supported by SQLite
#             .first()
#         )
#         if tile:
#             # Claim the tile by marking it as in progress
#             tile.status = "in_progress"
#             tile.worker_id = worker_id
#             db_session.commit()  # persist the update
#             return tile
#         else:
#             return None

# # --- Worker function ---
# def worker_function(worker_id):
#     tile = getWorkersTile(worker_id)
#     if tile:
#         result = f"Worker {worker_id} claimed Tile {tile.id}"
#     else:
#         result = f"Worker {worker_id} found no tile"
#     print(result)
#     return result

# # --- Main function: spawn many workers concurrently ---
# def main():
#     num_workers = 200  # simulate an aggressive scenario with 200 concurrent workers
#     results = []
#     with ThreadPoolExecutor(max_workers=num_workers) as executor:
#         futures = [executor.submit(worker_function, worker_id) for worker_id in range(num_workers)]
#         for future in as_completed(futures):
#             results.append(future.result())
    
#     print("\nSummary:")
#     claimed = [r for r in results if "claimed Tile" in r]
#     for res in results:
#         print(res)
#     print(f"\nTotal claimed tiles: {len(claimed)}")

# if __name__ == "__main__":
#     main()


In [8]:
#---- this seems to work!
import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
from contextlib import contextmanager

from sqlalchemy import create_engine, Column, Integer, String, Boolean, DateTime
from sqlalchemy.orm import sessionmaker, declarative_base
from sqlalchemy.exc import NoResultFound
from sqlalchemy.sql import text

from sqlalchemy import update, select

# --- Setup the SQLite database & SQLAlchemy ORM ---

# Using a file-based SQLite database and allow multithreaded access:
#engine = create_engine("sqlite:///test5.db", connect_args={"check_same_thread": False})
engine = create_engine('postgresql://postgres@localhost:5333/test_tile')
SessionLocal = sessionmaker(bind=engine)

Base = declarative_base()

class Tile(Base):
    __tablename__ = 'tiles'
    id = Column(Integer, primary_key=True)
    annotation_class_id = Column(Integer, default=1)  # for demo, all tiles use 1
    hasgt = Column(Boolean, default=True)
    datetime = Column(DateTime, default=datetime.datetime.utcnow)
    status = Column(String, default="pending")  # can be "pending", "in_progress", etc.
    worker_id = Column(Integer, nullable=True)    # which worker claimed it

# Drop and recreate the table (for demo purposes)
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)

# --- Populate the database with some tiles ---
with SessionLocal() as session:
    tiles = []
    for i in range(10_000):  # create 50 tiles
        tile = Tile(
            annotation_class_id=1,
            hasgt=True,
            # Newer tiles have a more recent datetime:
            datetime=datetime.datetime.utcnow() - datetime.timedelta(seconds=i)
        )
        tiles.append(tile)
    session.add_all(tiles)
    session.commit()

# --- Utility to get a session ---
@contextmanager
def get_session():
    session = SessionLocal()
    try:
        yield session
        session.commit()
    except Exception as e:
        session.rollback()
        raise e
    finally:
        session.close()

def getWorkersTile(worker_id: int):
    subquery = (
        select(Tile.id)
        .where(Tile.annotation_class_id == 1)
        .where(Tile.hasgt == True)
        .where(Tile.status == 'pending')
        .order_by(Tile.datetime.desc())
        .limit(1)
        .scalar_subquery()
    )

    # Perform the update
    stmt = (
        update(Tile)
        .where(Tile.id == subquery)
        .values(status='in_progress', worker_id=worker_id)
        .returning(Tile.id, Tile.annotation_class_id, Tile.hasgt, Tile.datetime, Tile.status, Tile.worker_id)
    )
    with get_session() as db_session:
        result = db_session.execute(stmt).fetchone()
    if result:
        return f"Worker {worker_id} claimed Tile {result[0]}"
    else:
        return f"Worker {worker_id} found no tile"

# # --- The tile-claiming function ---
# def getWorkersTile(worker_id):
#     """
#     Atomically retrieves and marks a tile as 'in_progress' so no two workers claim the same tile.
    
#     Since SQLite does not support row-level locking (FOR UPDATE SKIP LOCKED), we use an atomic update query.
#     """
#     with get_session() as db_session:
#         # Atomically select and update a tile in one query
#         tile = db_session.execute(
#             text("""
#                 UPDATE tiles 
#                 SET status = 'in_progress', worker_id = :worker_id 
#                 WHERE id = (
#                     SELECT id FROM tiles 
#                     WHERE annotation_class_id = 1 
#                     AND hasgt = 1 
#                     AND status = 'pending' 
#                     ORDER BY datetime DESC 
#                     LIMIT 1
#                 )
#                 RETURNING id, annotation_class_id, hasgt, datetime, status, worker_id
#             """),
#             {"worker_id": worker_id}
#         ).fetchone()

#         if tile:
#             return f"Worker {worker_id} claimed Tile {tile[0]}"
#         else:
#             return f"Worker {worker_id} found no tile"

# --- Worker function ---
def worker_function(worker_id):
    result = getWorkersTile(worker_id)
    print(result)
    return result

# --- Main function: spawn many workers concurrently ---
def main():
    num_workers = 200  # simulate an aggressive scenario with 200 concurrent workers
    results = []
    with ThreadPoolExecutor(max_workers=num_workers) as executor:
        futures = [executor.submit(worker_function, worker_id) for worker_id in range(num_workers)]
        for future in as_completed(futures):
            results.append(future.result())
    
    print("\nSummary:")
    claimed = [r for r in results if "claimed Tile" in r]
    for res in results:
        print(res)
    print(f"\nTotal claimed tiles: {len(claimed)}")

if __name__ == "__main__":
    main()

Worker 0 claimed Tile 1
Worker 1 claimed Tile 2
Worker 2 claimed Tile 2
Worker 7 claimed Tile 3
Worker 5 claimed Tile 2
Worker 3 claimed Tile 2
Worker 4 claimed Tile 3
Worker 6 claimed Tile 3
Worker 8 claimed Tile 4
Worker 10 claimed Tile 3
Worker 12 claimed Tile 4
Worker 19 claimed Tile 5
Worker 13 claimed Tile 4
Worker 18 claimed Tile 5
Worker 11 claimed Tile 4
Worker 29 claimed Tile 6
Worker 17 claimed Tile 4
Worker 20 claimed Tile 5
Worker 14 claimed Tile 4
Worker 21 claimed Tile 5
Worker 9 claimed Tile 4
Worker 35 claimed Tile 7
Worker 32 claimed Tile 6
Worker 22 claimed Tile 5
Worker 25 claimed Tile 7
Worker 24 claimed Tile 6
Worker 36 claimed Tile 7
Worker 34 claimed Tile 6
Worker 42 claimed Tile 8
Worker 26 claimed Tile 5
Worker 38 claimed Tile 7
Worker 48 claimed Tile 9
Worker 16 claimed Tile 5
Worker 41 claimed Tile 9
Worker 44 claimed Tile 8
Worker 40 claimed Tile 8
Worker 46 claimed Tile 8
Worker 23 claimed Tile 5
Worker 15 claimed Tile 5
Worker 45 claimed Tile 8
Worker 53 

In [None]:
# from sqlalchemy import update, select
# from sqlalchemy.orm import aliased
# from sqlalchemy.orm import Session

# def update_tile(db_session: Session, worker_id: int):
#     # Subquery to find the tile that meets the condition
#     subquery = (
#         select(Tile.id)
#         .where(Tile.annotation_class_id == 1)
#         .where(Tile.hasgt == 1)
#         .where(Tile.status == 'pending')
#         .order_by(Tile.datetime.desc())
#         .limit(1)
#         .scalar_subquery()
#     )

#     # Perform the update
#     stmt = (
#         update(Tile)
#         .where(Tile.id == subquery)
#         .values(status='in_progress', worker_id=worker_id)
#         .returning(Tile.id, Tile.annotation_class_id, Tile.hasgt, Tile.datetime, Tile.status, Tile.worker_id)
#     )

#     # Execute the update and fetch the result
#     result = db_session.execute(stmt).fetchone()
    
#     return result
