Skip to content

Commit

Permalink
Merge pull request #13 from asas-sn/normalization
Browse files Browse the repository at this point in the history
Normalization
  • Loading branch information
gonzodeveloper committed Jul 17, 2023
2 parents 474f711 + fae2b4d commit 3db0b91
Show file tree
Hide file tree
Showing 7 changed files with 703 additions and 608 deletions.
33 changes: 20 additions & 13 deletions pyasassn/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,19 @@
import re
import os
import io
import traceback
from time import sleep

from .utils import LightCurveCollection
from .collection import LightCurveCollection


class SkyPatrolClient:
"""
The SkyPatrolClient allows users to interact with the ASAS-SN Sky Patrol photometry database.
This client enables users to use ADQL, cone searches, random samples, and catalog ID lookups on the input catalogs.
The SkyPatrolClient allows users to interact with the ASAS-SN Sky Patrol photometry database.
This client enables users to use ADQL, cone searches, random samples, and catalog ID lookups on the input catalogs.
Queries to the input catalogs will either be returned as pandas DataFrames containing aggregate information
on astronomical targets, or they will be returned as LightCurveCollections containing photometry data from all
queried targets.
Queries to the input catalogs will either be returned as pandas DataFrames containing aggregate information
on astronomical targets, or they will be returned as LightCurveCollections containing photometry data from all
queried targets.
"""

def __init__(self, verbose=True):
Expand Down Expand Up @@ -434,7 +433,13 @@ def _get_curves(
results = [
pool.apply_async(
self._get_lightcurve_chunk,
args=(query_hash, idx, catalog, save_dir, file_format,),
args=(
query_hash,
idx,
catalog,
save_dir,
file_format,
),
)
for idx in range(n_chunks)
]
Expand Down Expand Up @@ -473,7 +478,7 @@ def _get_lightcurve_chunk(

# Loop through until we download or timeout
while success is False and timeout <= 5:
try:
try:
# Query API with (POST METHOD)
url = (
f"http://{self.block_servers[server_idx]}:9006/get_block/"
Expand All @@ -484,19 +489,21 @@ def _get_lightcurve_chunk(
# Pandas dataframe
data = _deserialize(response.content)
# ID and count
id_col = "asas_sn_id" if catalog not in ["asteroids", "comets"] else "name"
id_col = (
"asas_sn_id" if catalog not in ["asteroids", "comets"] else "name"
)
count = len(data[id_col].unique())

success = True
except:
sleep(timeout)
server_idx = (server_idx + 1) % n_servers

# Raise timeout if we've tried all servers once
if (server_idx % n_servers) == (block_idx % n_servers):
timeout += 1
# If download fails, raise exception

# If download fails, raise exception
if success is False:
raise TimeoutError("Lightcurve servers unavailable, try again later")

Expand Down
194 changes: 194 additions & 0 deletions pyasassn/collection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
from __future__ import division, print_function
import os
from astropy.timeseries import LombScargle
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

from .lightcurve import LightCurve
from .utils import Vcams, gcams


class LightCurveCollection(object):
"""
Object for storing and analysing ASAS-SN Sky Patrol light curves.
Returned by any SkyPatrolClient query where download=True
"""

def __init__(self, data, catalog_info, id_col):
self.id_col = id_col
self.data = data
self.catalog_info = catalog_info

self.data["phot_filter"] = self.data.camera.apply(
lambda x: "V" if x in Vcams else "g" if x in gcams else None
)
self.data = self.data[self.data["phot_filter"].notna()]

def __repr__(self):
f = f"LightCurveCollection with {len(self)} light curves \n"
return f + self.catalog_info.__repr__()
# return f"LightCurveCollection with {len(self)} light curves"

def __getitem__(self, item):
if type(item) == pd.Series or type(item) == list or type(item) == np.ndarray:
data = self.data[self.data[self.id_col] in item]
catalog_info = self.catalog_info[self.catalog_info[self.id_col] in item]
return LightCurveCollection(data, catalog_info, self.id_col)
else:
return self.__get_lc(item)

def __get_lc(self, key):
source = self.catalog_info[self.id_col] == key
meta = self.catalog_info[source]
data = self.data[self.data[self.id_col] == key]
return LightCurve(data, meta)

def __len__(self):
return len(self.catalog_info)

@property
def ids(self):
"""
Returns a list of all IDs in the collection.
:return: list of IDs
"""
return self.catalog_info[self.id_col].values

def apply_function(
self,
func,
col="mag",
include_non_det=False,
include_poor_images=False,
phot_filter="all",
):
"""
Apply a custom aggregate function to all light curves in the collection.
:param func: custom aggregate function
:param col: column to apply aggregate function; defaluts to 'mag'
:param include_non_det: whether or not to include non-detection events in analysis; defaults to False
:param include_poor_images whether or not to include images of poor or unknown quality; defaults to False
:param phot_filter: specify bandpass filter for photometry, either g, V, or all, defaults to all
:return: pandas Dataframe with results
"""

# Filter preferences for this function call only
data = self.data
if not include_non_det:
data = data[data["mag_err"] < 99]
if not include_poor_images:
data = data[data["quality"] == "G"]

# Filter by filter
if phot_filter == "g":
data = data[data["phot_filter"] == "g"]
elif phot_filter == "V":
data = data[data["phot_filter"] == "V"]
elif phot_filter == "all":
pass
else:
raise ValueError("phot_filter must be in ['g', 'V', 'all']")

return data.groupby(self.id_col).agg({col: func})

def stats(
self, include_non_det=False, include_poor_images=False, phot_filter="all"
):
"""
Calculate simple aggregate statistics on the collection.
Gets the mean and stddev magnitude for each curve as well as the total number of epochs observed.
:param include_poor_images: whether or not to include images of poor or unknown quality; defaults to False
:param include_non_det: whether or not to include non-detection events in analysis; defaults to False
:param phot_filter: specify bandpass filter for photometry, either g, V, or all, defaults to all
:return: pandas Dataframe with results
"""
# Filter preferences for this function call only
data = self.data
if not include_non_det:
data = data[data["mag_err"] < 99]
if not include_poor_images:
data = data[data["quality"] == "G"]

# Filter by filter
if phot_filter == "g":
data = data[data["phot_filter"] == "g"]
elif phot_filter == "V":
data = data[data["phot_filter"] == "V"]
elif phot_filter == "all":
pass
else:
raise ValueError("phot_filter must be in ['g', 'V', 'all']")

return data.groupby(self.id_col).agg(
mean_mag=("mag", "mean"), std_mag=("mag", "std"), epochs=("mag", "count")
)

def itercurves(self):
"""
Generator to iterate through all light curves in the collection.
:return: a generator that iterates over the collection
"""
for key, data in self.data.groupby(self.id_col):
source = self.catalog_info[self.id_col] == key
meta = self.catalog_info[source]
yield LightCurve(data, meta)

def save(self, save_dir, file_format="parquet", include_index=True):
"""
Saves entire light curve collection to a given directory.
:param save_dir: directory name
:param file_format: file format of saved objects ['parquet', 'csv', 'pickle']
:param include_index: whether or not to save index (catalog_info)
:return: a list of file names
"""
filenames = []
if file_format == "parquet":
if include_index:
self.catalog_info.to_parquet(os.path.join(save_dir, "index.parq"))
filenames.append("index.parq")
for lc in self.itercurves():
file = os.path.join(save_dir, f"{lc.meta[self.id_col].values[0]}.parq")
lc.save(file, file_format="parquet")
filenames.append(file)

elif file_format == "pickle":
if include_index:
self.catalog_info.to_pickle(os.path.join(save_dir, "index.pkl"))
filenames.append("index.pkl")
for lc in self.itercurves():
file = os.path.join(save_dir, f"{lc.meta[self.id_col].values[0]}.pkl")
lc.save(file, file_format="pickle")
filenames.append(file)

elif file_format == "csv":
if include_index:
self.catalog_info.to_csv(
os.path.join(save_dir, "index.csv"), index=False
)
filenames.append("index.csv")
for lc in self.itercurves():
file = os.path.join(save_dir, f"{lc.meta[self.id_col].values[0]}.csv")
lc.save(file, file_format="csv")
filenames.append(file)
else:
raise ValueError(
f"invalid format: '{file_format}' not in ['parquet', 'csv', 'pickle']"
)

return filenames

def merge(self, name):
"""
Merge a LightCurveCollection or list of LightCurves to a single object.
Useful for solar system objects with multiple designations.
:param name: new name of the object
:return: LightCurve
"""
# Get all phot data
lcs_data = [lc.data for lc in self.itercurves()]

return LightCurve(data=pd.concat(lcs_data), meta=pd.DataFrame({"name": [name]}))
Loading

0 comments on commit 3db0b91

Please sign in to comment.