In [None]:
from sqlalchemy import create_engine, desc
from idetect.model import db_url, Base, Article, Session, Status

# connect to the DB specified in the docker.env file
engine = create_engine(db_url())
Session.configure(bind=engine)

# create the DB schema, if it doesn't already exist
Base.metadata.create_all(engine)

In [None]:
session = Session()

In [None]:
a = session.query(Article).first()

In [None]:
with open('/home/idetect/data/input_urls.csv') as f:
    for l in f.readlines():
        print(l)

In [None]:
# add an article to the database
article1 = Article(url="http://www.internal-displacement.org/", url_id=1, status=Status.NEW)
session.add(article1)
session.commit()

article2 = Article(url="http://datafordemocracy.org/", url_id=2, status=Status.NEW)
session.add(article2)
session.commit()

In [None]:
# what is the current state of the Article with url_id 1?
Article.get_latest_version(session, url_id=1).status

In [None]:
# what is the current state of the second article we added above?
article2.get_updated_version().status

In [None]:
# what is the latest version of each article that exists?
[(str(a), a.status)
 for a in Article.select_latest_version(session)\
 .order_by(Article.url)\
 .all()]

In [None]:
# all versions of all articles
[(a.id, a.url_id, a.status, str(a.updated)) 
 for a in session.query(Article)\
 .order_by(desc(Article.updated))\
 .all()]

In [None]:
# what articles are currently in the processed state
[(str(a), a.status, str(a.updated))
 for a in Article.select_latest_version(session)\
 .filter(Article.status == Status.PROCESSED)\
 .order_by(Article.url)\
 .all()]

In [None]:
from idetect.worker import Worker
from idetect.scraper import scrape

In [None]:
# set up a worker that looks for NEW articles and runs a trivial function
worker = Worker(Status.NEW, Status.SCRAPING, Status.SCRAPED, Status.SCRAPING_FAILED,
                scrape, engine)

In [None]:
# run that worker a single time
worker.work()

# note that if the workers container is running, there are some workers in there that will compete with this,
# so use `docker compose up notebooks localdb` instead of just `docker compose up`