In [9]:
import json
from pathlib import Path

import pandas as pd

from constants import RAW_DIR

In [10]:
with open(RAW_DIR / "Pallas_lc.json", "r") as f:
    raw_data = json.load(f)

In [4]:
raw_data[1]

{'LightCurve': {'id': '2',
  'asteroid_id': '101',
  'scale': '0',
  'filter': None,
  'points': '2435711.643685  9.818130e-01    -2.71504147   1.72278374  -0.98182916   -1.81102044   1.27146541  -0.98187312\n2435711.646018  9.728124e-01    -2.71505124   1.72277029  -0.98181912   -1.81101294   1.27148772  -0.98186308\n2435711.650685  9.936407e-01    -2.71507078   1.72274338  -0.98179905   -1.81099793   1.27153236  -0.98184299\n2435711.652685  9.845316e-01    -2.71507915   1.72273185  -0.98179044   -1.81099150   1.27155150  -0.98183438\n2435711.656351  9.945582e-01    -2.71509450   1.72271071  -0.98177467   -1.81097971   1.27158656  -0.98181860\n2435711.657351  9.991483e-01    -2.71509869   1.72270495  -0.98177037   -1.81097650   1.27159613  -0.98181430\n2435711.662351  1.000071e+00    -2.71511962   1.72267612  -0.98174886   -1.81096043   1.27164396  -0.98179278\n2435711.664351  1.004686e+00    -2.71512799   1.72266459  -0.98174025   -1.81095400   1.27166310  -0.98178417\n2435711.668685

In [5]:
LIGHTCURVE = "LightCurve"
ID = "id"
SCALE = "scale"
POINTS = "points"
POINTS_COUNT = "points_count"
to_leave = [ID, SCALE, POINTS, "created", "modified", POINTS_COUNT]
 
def clean(raw_data: dict):
    cleaned_data = {key: value for key, value in raw_data[LIGHTCURVE].items() if key in to_leave}
    cleaned_data[ID] = int(cleaned_data[ID]) - 1  # id starts from 1, not 0
    cleaned_data[SCALE] = int(cleaned_data[SCALE])
    cleaned_data[POINTS_COUNT] = int(cleaned_data[POINTS_COUNT])
    cleaned_data[POINTS] = list(map(str.split, cleaned_data[POINTS].splitlines()))
    cleaned_data[POINTS] = list(map(lambda x: list(map(float, x)), cleaned_data[POINTS]))
    return cleaned_data

In [6]:
cleaned_data = list(map(clean, raw_data))
assert all([len(data[POINTS]) == data[POINTS_COUNT] for data in cleaned_data]), "Wrong number of points in some light curve!"
assert all([data[SCALE] == 0 for data in cleaned_data]), "Calibrated light curves detected!"

In [7]:
for data in cleaned_data:
    if len(data[POINTS]) != data["points_count"]:
        print("Points -", data[ID])
    
    if data[SCALE] != 0:
        print("Scale -", data[ID])

In [8]:
cleaned_data[1]

{'id': 1,
 'scale': 0,
 'points': [[2435711.643685,
   0.981813,
   -2.71504147,
   1.72278374,
   -0.98182916,
   -1.81102044,
   1.27146541,
   -0.98187312],
  [2435711.646018,
   0.9728124,
   -2.71505124,
   1.72277029,
   -0.98181912,
   -1.81101294,
   1.27148772,
   -0.98186308],
  [2435711.650685,
   0.9936407,
   -2.71507078,
   1.72274338,
   -0.98179905,
   -1.81099793,
   1.27153236,
   -0.98184299],
  [2435711.652685,
   0.9845316,
   -2.71507915,
   1.72273185,
   -0.98179044,
   -1.8109915,
   1.2715515,
   -0.98183438],
  [2435711.656351,
   0.9945582,
   -2.7150945,
   1.72271071,
   -0.98177467,
   -1.81097971,
   1.27158656,
   -0.9818186],
  [2435711.657351,
   0.9991483,
   -2.71509869,
   1.72270495,
   -0.98177037,
   -1.8109765,
   1.27159613,
   -0.9818143],
  [2435711.662351,
   1.000071,
   -2.71511962,
   1.72267612,
   -0.98174886,
   -1.81096043,
   1.27164396,
   -0.98179278],
  [2435711.664351,
   1.004686,
   -2.71512799,
   1.72266459,
   -0.98174025,
