In [1]:
import json
from pathlib import Path

import pandas as pd

from constants import RAW_DIR
from src.model import Asteroid, LightCurve

In [2]:
with open(RAW_DIR / "Pallas" / "lc.json", "r") as f:
    raw_data = json.load(f)

In [3]:
raw_data[0]

{'LightCurve': {'id': '1',
  'asteroid_id': '101',
  'scale': '0',
  'filter': None,
  'points': '2433827.771536  9.882148e-01    -1.52407483   2.56166596  -1.65498099   -1.37631303   1.55577772  -1.65510792\n2433827.773953  9.665995e-01    -1.52409281   2.56165996  -1.65497547   -1.37629054   1.55577761  -1.65510240\n2433827.783286  9.621180e-01    -1.52416223   2.56163678  -1.65495417   -1.37620371   1.55577719  -1.65508110\n2433827.790454  9.550239e-01    -1.52421554   2.56161898  -1.65493781   -1.37613703   1.55577688  -1.65506473\n2433827.795662  9.479908e-01    -1.52425428   2.56160604  -1.65492593   -1.37608857   1.55577666  -1.65505285\n2433827.803037  9.470855e-01    -1.52430913   2.56158772  -1.65490909   -1.37601996   1.55577637  -1.65503601\n2433827.809745  9.357856e-01    -1.52435902   2.56157106  -1.65489378   -1.37595756   1.55577612  -1.65502070\n2433827.816038  9.280344e-01    -1.52440582   2.56155543  -1.65487942   -1.37589902   1.55577589  -1.65500633\n2433827.822538

In [8]:
lc = LightCurve(**raw_data[0]["LightCurve"])

In [10]:
Asteroid(
    id=101,
    name="Pallas",
    lightcurves=[lc],
)

Asteroid(id=101, name='Pallas', lightcurves=[LightCurve(id=1, scale=0, points=[[2433827.771536, 0.9882148, -1.52407483, 2.56166596, -1.65498099, -1.37631303, 1.55577772, -1.65510792], [2433827.773953, 0.9665995, -1.52409281, 2.56165996, -1.65497547, -1.37629054, 1.55577761, -1.6551024], [2433827.783286, 0.962118, -1.52416223, 2.56163678, -1.65495417, -1.37620371, 1.55577719, -1.6550811], [2433827.790454, 0.9550239, -1.52421554, 2.56161898, -1.65493781, -1.37613703, 1.55577688, -1.65506473], [2433827.795662, 0.9479908, -1.52425428, 2.56160604, -1.65492593, -1.37608857, 1.55577666, -1.65505285], [2433827.803037, 0.9470855, -1.52430913, 2.56158772, -1.65490909, -1.37601996, 1.55577637, -1.65503601], [2433827.809745, 0.9357856, -1.52435902, 2.56157106, -1.65489378, -1.37595756, 1.55577612, -1.6550207], [2433827.816038, 0.9280344, -1.52440582, 2.56155543, -1.65487942, -1.37589902, 1.55577589, -1.65500633], [2433827.822538, 0.9314326, -1.52445417, 2.56153928, -1.65486458, -1.37583855, 1.5557

In [6]:
lc.points

[[2433827.771536,
  0.9882148,
  -1.52407483,
  2.56166596,
  -1.65498099,
  -1.37631303,
  1.55577772,
  -1.65510792],
 [2433827.773953,
  0.9665995,
  -1.52409281,
  2.56165996,
  -1.65497547,
  -1.37629054,
  1.55577761,
  -1.6551024],
 [2433827.783286,
  0.962118,
  -1.52416223,
  2.56163678,
  -1.65495417,
  -1.37620371,
  1.55577719,
  -1.6550811],
 [2433827.790454,
  0.9550239,
  -1.52421554,
  2.56161898,
  -1.65493781,
  -1.37613703,
  1.55577688,
  -1.65506473],
 [2433827.795662,
  0.9479908,
  -1.52425428,
  2.56160604,
  -1.65492593,
  -1.37608857,
  1.55577666,
  -1.65505285],
 [2433827.803037,
  0.9470855,
  -1.52430913,
  2.56158772,
  -1.65490909,
  -1.37601996,
  1.55577637,
  -1.65503601],
 [2433827.809745,
  0.9357856,
  -1.52435902,
  2.56157106,
  -1.65489378,
  -1.37595756,
  1.55577612,
  -1.6550207],
 [2433827.816038,
  0.9280344,
  -1.52440582,
  2.56155543,
  -1.65487942,
  -1.37589902,
  1.55577589,
  -1.65500633],
 [2433827.822538,
  0.9314326,
  -1.52445417

In [5]:
LIGHTCURVE = "LightCurve"
ID = "id"
SCALE = "scale"
POINTS = "points"
POINTS_COUNT = "points_count"
to_leave = [ID, SCALE, POINTS, "created", "modified", POINTS_COUNT]
 
def clean(raw_data: dict):
    cleaned_data = {key: value for key, value in raw_data[LIGHTCURVE].items() if key in to_leave}
    cleaned_data[ID] = int(cleaned_data[ID]) - 1  # id starts from 1, not 0
    cleaned_data[SCALE] = int(cleaned_data[SCALE])
    cleaned_data[POINTS_COUNT] = int(cleaned_data[POINTS_COUNT])
    cleaned_data[POINTS] = list(map(str.split, cleaned_data[POINTS].splitlines()))
    cleaned_data[POINTS] = list(map(lambda x: list(map(float, x)), cleaned_data[POINTS]))
    return cleaned_data

In [6]:
cleaned_data = list(map(clean, raw_data))
assert all([len(data[POINTS]) == data[POINTS_COUNT] for data in cleaned_data]), "Wrong number of points in some light curve!"
assert all([data[SCALE] == 0 for data in cleaned_data]), "Calibrated light curves detected!"

In [7]:
for data in cleaned_data:
    if len(data[POINTS]) != data["points_count"]:
        print("Points -", data[ID])
    
    if data[SCALE] != 0:
        print("Scale -", data[ID])