In [1]:
import pandas

from IPython.display import clear_output
from matplotlib import pyplot
from panoptes_client import Classification, Panoptes

from datetime import datetime
from pathlib import Path

import json
import pytz
import time
import yaml

Broken libmagic installation detected. The python-magic module is installed but can't be imported. Please check that both python-magic and the libmagic shared library are installed correctly. Uploading media other than images may not work.


In [2]:
PROJECT_ID = 14770
START_DATETIME = pytz.UTC.localize(datetime(2022, 3, 7, 17, 38, 0, 0))
START_DATETIME

datetime.datetime(2022, 3, 7, 17, 38, tzinfo=<UTC>)

In [3]:
with open('env.yaml') as env_f:
    env = yaml.safe_load(env_f)
Panoptes.connect(**env)

<panoptes_client.panoptes.Panoptes at 0x1609a3c81f0>

## Pre-load the last classifications export

In [4]:
try:
    previous_classifications = pandas.read_pickle('data/classifications.pkl')
except FileNotFoundError:
    previous_classifications = pandas.read_csv(
        "data/superwasp-black-hole-hunters-classifications.csv",
        converters={'annotations': json.loads},
        parse_dates=['created_at'],
    ).set_index('classification_id')
    previous_classifications['lensed'] = previous_classifications.annotations.apply(
        lambda a: a[0]['value'] == 'Yes'
    )
    previous_classifications = previous_classifications[['lensed', 'subject_ids', 'created_at']]
    #previous_classifications['created_at'] = previous_classifications['created_at'].dt.tz_localize('UTC')
    previous_classifications.to_pickle('data/classifications.pkl')

In [5]:
try:
    session_stats = pandas.read_pickle('data/stats.pkl')
except FileNotFoundError:
    session_stats = pandas.DataFrame({
        'timestamp': [],
        'agreement': [],
        'total': [],
    })

## Catch up on classifications since the last export

In [6]:
while True:
    new_classifications = {
        'classification_id': [],
        'subject_ids': [],
        'lensed': [],
        'created_at': [],
    }
    for i, classification in enumerate(Classification.where(
        scope='project', project_id=14770, last_id=previous_classifications.iloc[-1].name,
        page_size=100,
    ), start=1):
        new_classifications['classification_id'].append(classification.id)
        new_classifications['subject_ids'].append(int(classification.links.subjects[0].id))
        new_classifications['lensed'].append(classification.annotations[0]['value'] in ('Yes', 0, '0'))
        new_classifications['created_at'].append(classification.created_at)
        if i == 1000:
            break
    new_classifications = pandas.DataFrame(new_classifications).set_index('classification_id')
    new_classifications['created_at'] = pandas.to_datetime(new_classifications['created_at'])
    previous_classifications = pandas.concat([previous_classifications, new_classifications])
    previous_classifications.to_pickle('data/classifications.pkl.new')
    Path('data/classifications.pkl.new').replace('data/classifications.pkl')

    session_classifications = previous_classifications[previous_classifications['created_at'].gt(START_DATETIME)]
    if len(session_classifications) == 0:
        time.sleep(10)
        continue
    session_subjects = session_classifications['subject_ids'].drop_duplicates()
    all_subject_classifications = previous_classifications[previous_classifications['subject_ids'].isin(session_subjects)]

    aggregated_classifications = all_subject_classifications[['subject_ids', 'lensed']].pivot_table(
        columns=['lensed'],
        values='lensed',
        index='subject_ids',
        aggfunc=len,
        fill_value=0,
    )
    if 1.0 not in aggregated_classifications:
        aggregated_classifications[1.0] = 0
    if 0.0 not in aggregated_classifications:
        aggregated_classifications[0.0] = 0
    aggregated_classifications['consensus'] = aggregated_classifications.apply(
        lambda c: c[1.0] > c[0.0],
        axis=1,
    )
    aggregated_classifications['total'] = aggregated_classifications[True] + aggregated_classifications[False]
    aggregated_classifications['agreement'] = aggregated_classifications.apply(
        lambda c: (c[c['consensus']] / c['total']) * 100,
        axis=1,
    )

    mean_agreement = aggregated_classifications['agreement'].mean()

    session_stats = pandas.concat((
        session_stats,
        pandas.DataFrame({
            'timestamp': [pytz.UTC.localize(datetime.now())],
            'total': [len(session_classifications)],
            'agreement': [mean_agreement],
        })
    ))
    filtered_session_stats = session_stats[session_stats['timestamp'].gt(START_DATETIME)]
    filtered_session_stats.to_pickle('data/stats.pkl.new')
    Path('data/stats.pkl.new').replace('data/stats.pkl')

    filtered_session_stats.plot(x='timestamp', y='total')
    pyplot.savefig('output/total.png', dpi=200)
    pyplot.close()
    filtered_session_stats.plot(x='timestamp', y='agreement')
    pyplot.savefig('output/agreement.png', dpi=200)
    pyplot.close()
    with open('output/dashboard.html', 'w') as dashboard:
        dashboard.write(f"""
            <!doctype html>
            <html>
            <head>
            <meta http-equiv="refresh" content="5" />
            <!-- CSS only -->
            <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-1BmE4kWBq78iYhFldvKuhfTAU6auU8tT94WrHftjDbrCEXSU1oBoqyl2QvZ6jIW3" crossorigin="anonymous">
            <!-- JavaScript Bundle with Popper -->
            <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/js/bootstrap.bundle.min.js" integrity="sha384-ka7Sk0Gln4gmtz2MlQnikT1wXgYsOg+OMhuP+IlRH9sENBO0LRn5q+8nbTov4+1p" crossorigin="anonymous"></script>
            <body>
            <div class="container">
                <div class="row justify-content-md-center mb-5">
                    <h1>Black Hole Hunters: British Science Week 2022</h1>
                </div>
                <div class="row justify-content-md-center">
                    <div class="col">
                    <h2>Total Classifications: {filtered_session_stats.iloc[-1]['total']:.0f}</h2>
                    <p><img src="total.png" style="width: 100%"></p>
                    </div>
                    <div class="col">
                    <h2>Agreement: {filtered_session_stats.iloc[-1]['agreement']:.1f}%</h2>
                    <p><img src="agreement.png" style="width: 100%"></p>
                    </div>
                </div>
                <div class="row justify-content-md-center mt-5">
                    <p><small>Last update: {datetime.now()}</small></p>
                    <p><small>Last classification: {previous_classifications.iloc[-1]['created_at']}
                </div>
            </div>
            </body>
            </html>
        """)
    time.sleep(10)

KeyboardInterrupt: 