Merge pull request #13 from asas-sn/normalization

Normalization
asas-sn · Jul 17, 2023 · 3db0b91 · 3db0b91
2 parents 474f711 + fae2b4d
commit 3db0b91
Show file tree

Hide file tree

Showing 7 changed files with 703 additions and 608 deletions.
diff --git a/pyasassn/client.py b/pyasassn/client.py
@@ -9,20 +9,19 @@
 import re
 import os
 import io
-import traceback
 from time import sleep
 
-from .utils import LightCurveCollection
+from .collection import LightCurveCollection
 
 
 class SkyPatrolClient:
     """
-            The SkyPatrolClient allows users to interact with the ASAS-SN Sky Patrol photometry database.
-        This client enables users to use ADQL, cone searches, random samples, and catalog ID lookups on the input catalogs.
+        The SkyPatrolClient allows users to interact with the ASAS-SN Sky Patrol photometry database.
+    This client enables users to use ADQL, cone searches, random samples, and catalog ID lookups on the input catalogs.
 
-        Queries to the input catalogs will either be returned as pandas DataFrames containing aggregate information
-        on astronomical targets, or they will be returned as LightCurveCollections containing photometry data from all
-        queried targets.
+    Queries to the input catalogs will either be returned as pandas DataFrames containing aggregate information
+    on astronomical targets, or they will be returned as LightCurveCollections containing photometry data from all
+    queried targets.
     """
 
     def __init__(self, verbose=True):
@@ -434,7 +433,13 @@ def _get_curves(
             results = [
                 pool.apply_async(
                     self._get_lightcurve_chunk,
-                    args=(query_hash, idx, catalog, save_dir, file_format,),
+                    args=(
+                        query_hash,
+                        idx,
+                        catalog,
+                        save_dir,
+                        file_format,
+                    ),
                 )
                 for idx in range(n_chunks)
             ]
@@ -473,7 +478,7 @@ def _get_lightcurve_chunk(
 
         # Loop through until we download or timeout
         while success is False and timeout <= 5:
-            try:            
+            try:
                 # Query API with (POST METHOD)
                 url = (
                     f"http://{self.block_servers[server_idx]}:9006/get_block/"
@@ -484,19 +489,21 @@ def _get_lightcurve_chunk(
                 # Pandas dataframe
                 data = _deserialize(response.content)
                 # ID and count
-                id_col = "asas_sn_id" if catalog not in ["asteroids", "comets"] else "name"
+                id_col = (
+                    "asas_sn_id" if catalog not in ["asteroids", "comets"] else "name"
+                )
                 count = len(data[id_col].unique())
 
                 success = True
             except:
                 sleep(timeout)
                 server_idx = (server_idx + 1) % n_servers
-                
+
                 # Raise timeout if we've tried all servers once
                 if (server_idx % n_servers) == (block_idx % n_servers):
                     timeout += 1
-        
-        # If download fails, raise exception 
+
+        # If download fails, raise exception
         if success is False:
             raise TimeoutError("Lightcurve servers unavailable, try again later")
 

diff --git a/pyasassn/collection.py b/pyasassn/collection.py
@@ -0,0 +1,194 @@
+from __future__ import division, print_function
+import os
+from astropy.timeseries import LombScargle
+import matplotlib.pyplot as plt
+import pandas as pd
+import numpy as np
+
+from .lightcurve import LightCurve
+from .utils import Vcams, gcams
+
+
+class LightCurveCollection(object):
+    """
+    Object for storing and analysing ASAS-SN Sky Patrol light curves.
+    Returned by any SkyPatrolClient query where download=True
+    """
+
+    def __init__(self, data, catalog_info, id_col):
+        self.id_col = id_col
+        self.data = data
+        self.catalog_info = catalog_info
+
+        self.data["phot_filter"] = self.data.camera.apply(
+            lambda x: "V" if x in Vcams else "g" if x in gcams else None
+        )
+        self.data = self.data[self.data["phot_filter"].notna()]
+
+    def __repr__(self):
+        f = f"LightCurveCollection with {len(self)} light curves \n"
+        return f + self.catalog_info.__repr__()
+        # return f"LightCurveCollection with {len(self)} light curves"
+
+    def __getitem__(self, item):
+        if type(item) == pd.Series or type(item) == list or type(item) == np.ndarray:
+            data = self.data[self.data[self.id_col] in item]
+            catalog_info = self.catalog_info[self.catalog_info[self.id_col] in item]
+            return LightCurveCollection(data, catalog_info, self.id_col)
+        else:
+            return self.__get_lc(item)
+
+    def __get_lc(self, key):
+        source = self.catalog_info[self.id_col] == key
+        meta = self.catalog_info[source]
+        data = self.data[self.data[self.id_col] == key]
+        return LightCurve(data, meta)
+
+    def __len__(self):
+        return len(self.catalog_info)
+
+    @property
+    def ids(self):
+        """
+        Returns a list of all IDs in the collection.
+        :return: list of IDs
+        """
+        return self.catalog_info[self.id_col].values
+
+    def apply_function(
+        self,
+        func,
+        col="mag",
+        include_non_det=False,
+        include_poor_images=False,
+        phot_filter="all",
+    ):
+        """
+        Apply a custom aggregate function to all light curves in the collection.
+
+        :param func: custom aggregate function
+        :param col: column to apply aggregate function; defaluts to 'mag'
+        :param include_non_det: whether or not to include non-detection events in analysis; defaults to False
+        :param include_poor_images whether or not to include images of poor or unknown quality; defaults to False
+        :param phot_filter: specify bandpass filter for photometry, either g, V, or all, defaults to all
+        :return: pandas Dataframe with results
+        """
+
+        # Filter preferences for this function call only
+        data = self.data
+        if not include_non_det:
+            data = data[data["mag_err"] < 99]
+        if not include_poor_images:
+            data = data[data["quality"] == "G"]
+
+        # Filter by filter
+        if phot_filter == "g":
+            data = data[data["phot_filter"] == "g"]
+        elif phot_filter == "V":
+            data = data[data["phot_filter"] == "V"]
+        elif phot_filter == "all":
+            pass
+        else:
+            raise ValueError("phot_filter must be in ['g', 'V', 'all']")
+
+        return data.groupby(self.id_col).agg({col: func})
+
+    def stats(
+        self, include_non_det=False, include_poor_images=False, phot_filter="all"
+    ):
+        """
+        Calculate simple aggregate statistics on the collection.
+        Gets the mean and stddev magnitude for each curve as well as the total number of epochs observed.
+
+        :param include_poor_images: whether or not to include images of poor or unknown quality; defaults to False
+        :param include_non_det: whether or not to include non-detection events in analysis; defaults to False
+        :param phot_filter: specify bandpass filter for photometry, either g, V, or all, defaults to all
+        :return: pandas Dataframe with results
+        """
+        # Filter preferences for this function call only
+        data = self.data
+        if not include_non_det:
+            data = data[data["mag_err"] < 99]
+        if not include_poor_images:
+            data = data[data["quality"] == "G"]
+
+        # Filter by filter
+        if phot_filter == "g":
+            data = data[data["phot_filter"] == "g"]
+        elif phot_filter == "V":
+            data = data[data["phot_filter"] == "V"]
+        elif phot_filter == "all":
+            pass
+        else:
+            raise ValueError("phot_filter must be in ['g', 'V', 'all']")
+
+        return data.groupby(self.id_col).agg(
+            mean_mag=("mag", "mean"), std_mag=("mag", "std"), epochs=("mag", "count")
+        )
+
+    def itercurves(self):
+        """
+        Generator to iterate through all light curves in the collection.
+        :return: a generator that iterates over the collection
+        """
+        for key, data in self.data.groupby(self.id_col):
+            source = self.catalog_info[self.id_col] == key
+            meta = self.catalog_info[source]
+            yield LightCurve(data, meta)
+
+    def save(self, save_dir, file_format="parquet", include_index=True):
+        """
+        Saves entire light curve collection to a given directory.
+
+        :param save_dir: directory name
+        :param file_format: file format of saved objects ['parquet', 'csv', 'pickle']
+        :param include_index: whether or not to save index (catalog_info)
+        :return: a list of file names
+        """
+        filenames = []
+        if file_format == "parquet":
+            if include_index:
+                self.catalog_info.to_parquet(os.path.join(save_dir, "index.parq"))
+                filenames.append("index.parq")
+            for lc in self.itercurves():
+                file = os.path.join(save_dir, f"{lc.meta[self.id_col].values[0]}.parq")
+                lc.save(file, file_format="parquet")
+                filenames.append(file)
+
+        elif file_format == "pickle":
+            if include_index:
+                self.catalog_info.to_pickle(os.path.join(save_dir, "index.pkl"))
+                filenames.append("index.pkl")
+            for lc in self.itercurves():
+                file = os.path.join(save_dir, f"{lc.meta[self.id_col].values[0]}.pkl")
+                lc.save(file, file_format="pickle")
+                filenames.append(file)
+
+        elif file_format == "csv":
+            if include_index:
+                self.catalog_info.to_csv(
+                    os.path.join(save_dir, "index.csv"), index=False
+                )
+                filenames.append("index.csv")
+            for lc in self.itercurves():
+                file = os.path.join(save_dir, f"{lc.meta[self.id_col].values[0]}.csv")
+                lc.save(file, file_format="csv")
+                filenames.append(file)
+        else:
+            raise ValueError(
+                f"invalid format: '{file_format}' not in ['parquet', 'csv', 'pickle']"
+            )
+
+        return filenames
+
+    def merge(self, name):
+        """
+        Merge a LightCurveCollection or list of LightCurves to a single object.
+        Useful for solar system objects with multiple designations.
+        :param name: new name of the object
+        :return: LightCurve
+        """
+        # Get all phot data
+        lcs_data = [lc.data for lc in self.itercurves()]
+
+        return LightCurve(data=pd.concat(lcs_data), meta=pd.DataFrame({"name": [name]}))