In [None]:
import json
from sqlalchemy import func, select, sql
import database
import logbook

In [None]:
project_file = "data/projects-2016-10-14-dedup.json"
exp_name = "11_find_performance"
out_file = "fa_ga_transitions.csv"

In [None]:
exp = logbook.Experiment(exp_name)
log = exp.get_logger()

In [None]:
log.info("Loading project info")
project_names = {}
with open(project_file, "rb") as f:
    for row in f:
        d = json.loads(row)
        project_names[d["project_id"]] = d["project_name"]

In [None]:
log.info("Querying database")
ratings = database.schema.Rating.__table__
conn = database.engine.connect()
try:
    to_ga = {}
    to_fa = {}
    from_fa = {}
    from_ga = {}
    # Count GA articles
    log.info("  Counting GA transitions")
    stmt = select([ratings.c.project_id, func.count(ratings.c.internal_id)]) \
        .where(sql.and_(
            sql.and_(
                ratings.c.new_quality == 'GA-Class',
                ~ratings.c.old_quality.in_(['GA-Class', 'FA-Class', ''])),
            ~ratings.c.added_ga)) \
        .group_by(ratings.c.project_id)
    results = conn.execute(stmt)
    for project_id, num in results:
        to_ga[project_id] = num
    # Subtract transitions away from GA
    log.info("  Counting transitions from GA")
    stmt = select([ratings.c.project_id, func.count(ratings.c.internal_id)]) \
        .where(sql.and_(
            sql.and_(
                ratings.c.old_quality == 'GA-Class',
                ~ratings.c.new_quality.in_(['GA-Class', 'FA-Class'])),
            ~ratings.c.added_ga)) \
        .group_by(ratings.c.project_id)
    results = conn.execute(stmt)
    for project_id, num in results:
        from_ga[project_id] = num
    # Count FA articles
    log.info("  Counting FA transitions")
    stmt = select([ratings.c.project_id, func.count(ratings.c.internal_id)]) \
        .where(sql.and_(
            sql.and_(
                ratings.c.new_quality == 'FA-Class',
                ~ratings.c.old_quality.in_(['GA-Class', 'FA-Class', ''])),
            ~ratings.c.added_ga)) \
        .group_by(ratings.c.project_id)
    results = conn.execute(stmt)
    for project_id, num in results:
        to_fa[project_id] = num
    # Subtract transitions away from FA
    log.info("  Counting transitions from FA")
    stmt = select([ratings.c.project_id, func.count(ratings.c.internal_id)]) \
        .where(sql.and_(
            sql.and_(
                ratings.c.old_quality == 'FA-Class',
                ~ratings.c.new_quality.in_(['GA-Class', 'FA-Class'])),
            ~ratings.c.added_ga)) \
        .group_by(ratings.c.project_id)
    results = conn.execute(stmt)
    for project_id, num in results:
        from_fa[project_id] = num
    log.info("Writing output")
    with open(exp.get_filename(out_file), "wb") as f:
        f.write("project_id,to_ga,to_fa,from_ga,from_fa\n")
        for project_id in sorted(project_names.keys()):
            try:
                ga = to_ga[project_id]
            except KeyError:
                ga = 0
            try:
                fa = to_fa[project_id]
            except KeyError:
                fa = 0
            fga = from_ga.get(project_id, 0)
            ffa = from_fa.get(project_id, 0)
            f.write("%d,%d,%d,%d,%d\n" % (project_id, ga, fa, fga, ffa))
    log.info("Completed successfully")
finally:
    conn.close()