astrolabsoftware · JulienPeloton · Jun 7, 2022 · Apr 13, 2022 · Apr 13, 2022 · Apr 13, 2022
diff --git a/apps/api/api.py b/apps/api/api.py
@@ -22,13 +22,15 @@
 
 from apps.api.doc import api_doc_summary, api_doc_object, api_doc_explorer
 from apps.api.doc import api_doc_latests, api_doc_sso, api_doc_tracklets
-from apps.api.doc import api_doc_cutout, api_doc_xmatch, api_doc_bayestar, api_doc_stats
+from apps.api.doc import api_doc_cutout, api_doc_xmatch, api_doc_bayestar
+from apps.api.doc import api_doc_stats, api_doc_random
 
 from apps.api.utils import return_object_pdf, return_explorer_pdf
 from apps.api.utils import return_latests_pdf, return_sso_pdf
 from apps.api.utils import return_tracklet_pdf, format_and_send_cutout
 from apps.api.utils import perform_xmatch, return_bayestar_pdf
 from apps.api.utils import return_statistics_pdf, send_data
+from apps.api.utils import return_random_pdf
 
 import io
 import requests
@@ -168,6 +170,17 @@ def layout(is_mobile):
                                     ),
                                 ], label="Statistics"
                             ),
+                            dbc.Tab(
+                                [
+                                    dbc.Card(
+                                        dbc.CardBody(
+                                            dcc.Markdown(api_doc_random)
+                                        ), style={
+                                            'backgroundColor': 'rgb(248, 248, 248, .7)'
+                                        }
+                                    ),
+                                ], label="Random objects"
+                            ),
                         ]
                     )
                 ], className="mb-8", fluid=True, style={'width': width}
@@ -434,11 +447,39 @@ def layout(is_mobile):
         'required': True,
         'description': 'Observing date. This can be either a given night (YYYYMMDD), month (YYYYMM), year (YYYY), or eveything (empty string)'
     },
+    {
+        'name': 'columns',
+        'required': False,
+        'description': 'Comma-separated data columns to transfer. Default is all columns.'
+    },
+    {
+        'name': 'output-format',
+        'required': False,
+        'description': 'Output format among json[default], csv, parquet, votable'
+    }
+]
+
+args_random = [
+    {
+        'name': 'n',
+        'required': True,
+        'description': 'Number of objects to return. Maximum is 16 for performance.'
+    },
     {
         'name': 'columns',
         'required': False,
         'description': 'Comma-separated data columns to transfer. Default is all columns. See {}/api/v1/columns for more information.'.format(APIURL)
     },
+    {
+        'name': 'class',
+        'required': False,
+        'description': 'Fink derived class. Default is empty string, namely all classes are considered. See {}/api/v1/classes for more information'.format(APIURL)
+    },
+    {
+        'name': 'seed',
+        'required': False,
+        'description': 'Seed number for random number generator. By default, the seed is not fixed.'
+    },
     {
         'name': 'output-format',
         'required': False,
@@ -829,3 +870,40 @@ def return_statistics(payload=None):
 
     output_format = payload.get('output-format', 'json')
     return send_data(pdf, output_format)
+
+@api_bp.route('/api/v1/random', methods=['GET'])
+def return_random_arguments():
+    """ Obtain information about retrieving random object data
+    """
+    if len(request.args) > 0:
+        # POST from query URL
+        return return_random(payload=request.args)
+    else:
+        return jsonify({'args': args_random})
+
+@api_bp.route('/api/v1/random', methods=['POST'])
+def return_random(payload=None):
+    """ Retrieve random object data from the Fink database
+    """
+    # get payload from the JSON
+    if payload is None:
+        payload = request.json
+
+    # Check all required args are here
+    required_args = [i['name'] for i in args_random if i['required'] is True]
+    for required_arg in required_args:
+        if required_arg not in payload:
+            rep = {
+                'status': 'error',
+                'text': "A value for `{}` is required. Use GET to check arguments.\n".format(required_arg)
+            }
+            return Response(str(rep), 400)
+
+    pdf = return_random_pdf(payload)
+
+    # Error propagation
+    if isinstance(pdf, Response):
+        return pdf
+
+    output_format = payload.get('output-format', 'json')
+    return send_data(pdf, output_format)
diff --git a/apps/api/doc.py b/apps/api/doc.py
@@ -34,9 +34,10 @@
 | POST/GET | {}/api/v1/xmatch | Cross-match user-defined catalog with Fink alert data| &#x2611;&#xFE0F; |
 | POST/GET | {}/api/v1/bayestar | Cross-match LIGO/Virgo sky map with Fink alert data| &#x2611;&#xFE0F; |
 | POST/GET | {}/api/v1/statistics | Statistics concerning Fink alert data| &#x2611;&#xFE0F; |
+| POST/GET | {}/api/v1/random | Draw random objects from the Fink database| &#x2611;&#xFE0F; |
 | GET  | {}/api/v1/classes  | Display all Fink derived classification | &#x2611;&#xFE0F; |
 | GET  | {}/api/v1/columns  | Display all available alert fields and their type | &#x2611;&#xFE0F; |
-""".format(APIURL, APIURL, APIURL, APIURL, APIURL, APIURL, APIURL, APIURL, APIURL, APIURL, APIURL)
+""".format(APIURL, APIURL, APIURL, APIURL, APIURL, APIURL, APIURL, APIURL, APIURL, APIURL, APIURL, APIURL)
 
 api_doc_object = """
 ## Retrieve object data
@@ -1080,3 +1081,66 @@
 
 All other fields starting with `class:` are crossmatch from the SIMBAD database.
 """.format(pd.DataFrame([dic_names]).T.rename(columns={0: 'description'}).to_markdown())
+
+api_doc_random = """
+## Draw random objects
+
+This service lets you draw random objects (full lightcurve) from the Fink database (120+ million alerts). This is still largely experimental.
+
+The list of arguments for retrieving object data can be found at https://fink-portal.org/api/v1/random.
+
+In a unix shell, you would simply use
+
+```bash
+# Get the data for 8 *objects* randomly drawn from the +120 million alerts in Fink
+curl -H "Content-Type: application/json" -X POST -d '{"n":8, "output-format":"csv"}' https://fink-portal.org/api/v1/random -o random.csv
+
+# you can also specify parameters in the URL, e.g. with wget:
+wget "https://fink-portal.org/api/v1/random?n=8&output-format=json" -O random.json
+```
+
+In python, you would use
+
+```python
+import requests
+import pandas as pd
+
+r = requests.post(
+  'https://fink-portal.org/api/v1/random',
+  json={
+    'n': integer, # Number of random objects to get. Maximum is 16.
+    'class': classname, # Optional, specify a Fink class.
+    'seed': integer, # Optional, the seed for reproducibility
+    'columns': str, # Optional, comma-separated column names
+    'output-format': output_format, # Optional [json[default], csv, parquet, votable]
+  }
+)
+
+# Format output in a DataFrame
+pdf = pd.read_json(r.content)
+```
+
+As this service is experimental, the number of random objects returned for a single
+call cannot be greater than 16. Concerning the classname, see https://fink-portal.org/api/v1/classes.
+If you do not specify the parameter `class`, you will get random objects from all classes.
+For better performances, we advice to choose a classname, and limit colunms to transfer, e.g.:
+
+```
+# random Early SN Ia candidate
+r = requests.post(
+  'https://fink-portal.org/api/v1/random',
+  json={
+    'n': 16, # Number of random objects to get
+    'class': 'Early SN Ia candidate', # Optional, specify a Fink class.
+    'seed': 0, # Optional, the seed for reproducibility
+    'columns': 'i:objectId,i:jd,i:magpsf,i:fid', # Optional, comma-separated column names
+  }
+)
+```
+
+Note that this returns data for *objects* (and not just alerts).
+
+Note also that the `seed` is used to fix the date boundaries, hence it is valid only over a small period of time as the database is updated everyday, and more dates are added...
+So consider your seed valid over 24h (this might change in the future).
+
+"""
diff --git a/apps/api/utils.py b/apps/api/utils.py
@@ -361,7 +361,7 @@ def return_explorer_pdf(payload: dict, user_group: int) -> pd.DataFrame:
 
     return pdfs
 
-def return_latests_pdf(payload: dict) -> pd.DataFrame:
+def return_latests_pdf(payload: dict, return_raw: bool = False) -> pd.DataFrame:
     """ Extract data returned by HBase and format it in a Pandas dataframe
 
     Data is from /api/v1/latests
@@ -370,6 +370,8 @@ def return_latests_pdf(payload: dict) -> pd.DataFrame:
     ----------
     payload: dict
         See https://fink-portal.org/api/v1/latests
+    return_raw: bool
+        If True, return the HBase output, else pandas DataFrame. Default is False.
 
     Return
     ----------
@@ -464,6 +466,9 @@ def return_latests_pdf(payload: dict) -> pd.DataFrame:
         # Restore default limits
         clientT.setLimit(nlimit)
 
+    if return_raw:
+        return results
+
     # We want to return alerts
     # color computation is disabled
     pdfs = format_hbase_output(
@@ -1014,3 +1019,100 @@ def send_data(pdf, output_format):
         'text': "Output format `{}` is not supported. Choose among json, csv, or parquet\n".format(output_format)
     }
     return Response(str(rep), 400)
+
+def return_random_pdf(payload: dict) -> pd.DataFrame:
+    """ Extract data returned by HBase and format it in a Pandas dataframe
+
+    Data is from /api/v1/random
+
+    Parameters
+    ----------
+    payload: dict
+        See https://fink-portal.org/api/v1/random
+
+    Return
+    ----------
+    out: pandas dataframe
+    """
+    if 'columns' in payload:
+        cols = payload['columns'].replace(" ", "")
+    else:
+        cols = '*'
+
+    if 'class' in payload and str(payload['class']) != "":
+        classsearch = True
+    else:
+        classsearch = False
+
+    if cols == '*':
+        truncated = False
+    else:
+        truncated = True
+
+    if int(payload['n']) > 16:
+        number = 16
+    else:
+        number = int(payload['n'])
+
+    seed = payload.get('seed', None)
+    if seed is not None:
+        np.random.seed(int(payload['seed']))
+
+    # logic
+    results = []
+    clientT.setLimit(1000)
+    clientT.setRangeScan(True)
+
+    jd_low = Time('2019-11-02 03:00:00.0').jd
+    jd_high = Time.now().jd
+
+    # 1 month
+    delta_min = 43200
+    delta_jd = TimeDelta(delta_min * 60, format='sec').jd
+    while len(results) == 0:
+        jdstart = np.random.uniform(jd_low, jd_high)
+        jdstop = jdstart + delta_jd
+
+        if classsearch:
+            payload_data = {
+                'class': payload['class'],
+                'n': number,
+                'startdate': Time(jdstart, format='jd').iso,
+                'stopdate': Time(jdstop, format='jd').iso,
+                'columns': "",
+                'output-format': 'json'
+            }
+            results = return_latests_pdf(payload_data, return_raw=True)
+        else:
+            results = clientT.scan(
+                "",
+                "key:key:{},key:key:{}".format(jdstart, jdstop),
+                "", 0, False, False
+            )
+
+    oids = list(dict(results).keys())
+    oids = np.array([i.split('_')[-1] for i in oids])
+
+    index_oid = np.random.randint(0, len(oids), number)
+    oid = oids[index_oid]
+
+    client.setLimit(2000)
+    # Get data from the main table
+    results = java.util.TreeMap()
+    for oid_ in oid:
+        result = client.scan(
+            "",
+            "key:key:{}".format(oid_),
+            "{}".format(cols),
+            0, False, False
+        )
+        results.putAll(result)
+
+    pdf = format_hbase_output(
+        results, client.schema(), group_alerts=False, truncated=truncated
+    )
+
+    clientT.setLimit(nlimit)
+    client.setLimit(nlimit)
+
+    return pdf
diff --git a/tests/api_performance_test.py b/tests/api_performance_test.py
@@ -22,12 +22,13 @@
 
 APIURL = sys.argv[1]
 
-def classsearch(myclass='Solar System MPC', n=100000, startdate='2022-03-03', stopdate='2022-03-04', output_format='json'):
+def classsearch(myclass='Solar System MPC', n=100000, startdate='2022-03-03', stopdate='2022-03-04', output_format='json', columns='*'):
     """ Perform a heavy class search in the Science Portal using the Fink REST API
     """
     payload = {
         'class': myclass,
         'n': n,
+        'columns': columns,
         'output-format': output_format
     }
 
@@ -61,7 +62,7 @@ def test_heavy_classsearch() -> None:
     >>> test_heavy_classsearch()
     """
     t0 = time.time()
-    pdf = classsearch()
+    pdf = classsearch(columns='i:objectId,i:magpsf,i:jd,d:rf_snia_vs_nonia')
     dt = time.time() - t0
 
     # less than 45 seconds to get 21,000 objects