In [None]:
from pathlib import Path

import ujson as json
import pandas

from swasputils import DATA_LOCATION
DATA_LOCATION = Path(DATA_LOCATION)

In [None]:
orig_classifications = pandas.read_csv(
    DATA_LOCATION / 'superwasp-variable-stars-classifications.csv',
    converters={'annotations': json.loads, 'subject_data': json.loads},
    parse_dates=['created_at'],
).set_index('classification_id')

In [None]:
zoo_subjects = pandas.read_csv(
    DATA_LOCATION / 'lookup.dat',
    delim_whitespace=True,
    header=None,
)
zoo_subjects.columns = [
    'subject_id',
    'SWASP ID',
    'Period',
    'Period Number',
]
# Period in this file is rounded differently to the others
# So drop it here so it doesn't stop us from merging later
zoo_subjects.drop('Period', 'columns', inplace=True)

In [None]:
periodicity_results = pandas.read_csv(
    DATA_LOCATION / 'results_total.dat',
    delim_whitespace=True,
    header=None,
)
periodicity_results.columns = [
    'Camera Number',
    'SWASP',
    'ID',
    'Period Number',
    'Period',
    'Sigma',
    'Chi Squared',
    'Period Flag'
]
periodicity_results['SWASP ID'] = periodicity_results['SWASP'] + periodicity_results['ID']

In [None]:
zoo_subjects = zoo_subjects.merge(periodicity_results[['SWASP ID', 'Period', 'Period Number']])
zoo_subjects['Filename'] = zoo_subjects.apply(
    lambda r: f"{r['SWASP ID']}_P{r['Period Number']}_fold.gif",
    axis=1,
)
zoo_subjects = zoo_subjects.set_index('subject_id')

In [None]:
zoo_subject_export = pandas.read_csv(
    DATA_LOCATION / 'superwasp-variable-stars-subjects.csv',
    converters={'locations': json.loads},
).set_index('subject_id')
zoo_subject_export = zoo_subject_export[zoo_subject_export['workflow_id'] == 17313.0]
zoo_subject_export['Image URL'] = zoo_subject_export.locations.apply(
    lambda d: d["0"],
)

In [None]:
zoo_subjects = zoo_subjects.merge(zoo_subject_export[['Image URL']], left_index=True, right_index=True)

In [None]:
classifications = orig_classifications[orig_classifications['workflow_id'] == 17313]
classifications['classification'] = classifications.annotations.apply(
    lambda d: d[0]['value'],
)
classifications = classifications[['classification', 'subject_ids']]

In [None]:
aggregated_classifications = classifications.pivot_table(
        columns=['classification'],
        values='classification',
        index='subject_ids',
        aggfunc=lambda x: len(x),
        fill_value=0,
)
aggregated_classifications['consensus'] = aggregated_classifications.apply(
    lambda c: 'Real' if c['Real'] > 0 else ('Junk' if c['Junk'] >= 3 else ''),
    axis=1,
)

In [None]:
aggregated_classifications = aggregated_classifications.merge(zoo_subjects[['Filename', 'Image URL']], left_index=True, right_index=True)

In [None]:
aggregated_classifications.to_csv(
    DATA_LOCATION / 'real-or-junk-aggregated-classifications.csv',
    index_label='subject_id',
)