In [None]:
# make sure we are working in module directory
repo_root = !git rev-parse --show-toplevel
module_path = repo_root[0] + "/backend/heatflask"
%cd $module_path

In [None]:
# %%writefile Index.py
"""
***  For Jupyter notebook ***
Paste one of these Jupyter magic directives to the top of a cell
 and run it, to do these things:
    %%cython --annotate    # Compile and run the cell
    %load Index.py         # Load Index.py file into this (empty) cell
    %%writefile Index.py   # Write the contents of this cell to Index.py
"""

import os
import polyline
import numpy as np
from logging import getLogger
import datetime
import time
from pymongo import DESCENDING

import DataAPIs
import Strava
import Utility

log = getLogger(__name__)
log.propagate = True

APP_NAME = "heatflask"
COLLECTION_NAME = "index"

SECS_IN_HOUR = 60 * 60
SECS_IN_DAY = 24 * SECS_IN_HOUR

# How long we store Index entry in MongoDB
INDEX_TTL = int(os.environ.get("INDEX_TTL", 10)) * SECS_IN_DAY
DATA = {}


async def get_collection():
    if "col" not in DATA:
        DATA["col"] = await DataAPIs.init_collection(
            COLLECTION_NAME, ttl=INDEX_TTL
        )
    return DATA["col"]


def polyline_bounds(poly):
    try:
        latlngs = np.array(polyline.decode(poly), dtype=np.float32)
    except Exception:
        return

    lats = latlngs[:, 0]
    lngs = latlngs[:, 1]

    return {
        "SW": (float(lats.min()), float(lngs.min())),
        "NE": (float(lats.max()), float(lngs.max())),
    }


# MongoDB short field names speed up data transfer to/from
# remote DB server
ACTIVITY_ID = "_id"
TIMESTAMP = "ts"
USER_ID = "U"
ACTIVITY_NAME = "N"
DISTANCE_METERS = "D"
TIME_SECONDS = "T"
ACTIVITY_TYPE = "t"
UTC_START_TIME = "s"
UTC_LOCAL_OFFSET = "o"
N_ATHLETES = "#a"
N_PHOTOS = "#p"
FLAG_COMMUTE = "c"
FLAG_PRIVATE = "p"
LATLNG_BOUNDS = "B"


# see https://developers.strava.com/docs/reference/#api-models-SummaryActivity
def mongo_doc(
    # From Strava SummaryActivity record
    id=None,
    athlete=None,
    name=None,
    distance=None,
    moving_time=None,
    elapsed_time=None,
    type=None,
    start_date=None,
    utc_offset=None,
    athlete_count=None,
    total_photo_count=None,
    map=None,
    commute=None,
    private=None,
    # my additions
    _id=None,
    ts=None,
    **and_more
):
    if not (start_date and map and map.get("summary_polyline")):
        #         log.debug("cannot make doc for activity %s", id)
        return

    utc_start_time = int(Utility.to_datetime(start_date).timestamp())
    return Utility.cleandict(
        {
            TIMESTAMP: ts or datetime.datetime.utcnow(),
            ACTIVITY_ID: int(_id or id),
            USER_ID: int(athlete["id"]),
            ACTIVITY_NAME: name,
            DISTANCE_METERS: distance,
            TIME_SECONDS: elapsed_time,
            ACTIVITY_TYPE: type,
            UTC_START_TIME: utc_start_time,
            UTC_LOCAL_OFFSET: utc_offset,
            N_ATHLETES: athlete_count,
            N_PHOTOS: total_photo_count,
            FLAG_COMMUTE: commute,
            FLAG_PRIVATE: private,
            LATLNG_BOUNDS: polyline_bounds(map["summary_polyline"]),
        }
    )


async def import_user_entries(**user):
    t0 = time.perf_counter()

    uid = int(user["_id"])

    # we assume the access_token is current
    strava = Strava.AsyncClient(uid, **user["auth"])
    await strava.update_access_token()
    now = datetime.datetime.utcnow()
    docs = [mongo_doc(**A, ts=now) async for A in strava.get_index() if A is not None]
    docs = filter(None, docs)
    t1 = time.perf_counter()
    fetch_time = (t1 - t0) * 1000

    index = await get_collection()
    await delete_user_entries(**user)
    insert_result = await index.insert_many(docs, ordered=False)
    insert_time = (time.perf_counter() - t1) * 1000
    count = len(insert_result.inserted_ids)
    log.debug(
        "fetched %s entries in %dms, insert_many %dms", count, fetch_time, insert_time
    )


async def delete_user_entries(**user):
    uid = int(user["_id"])
    index = await get_collection()
    return await index.delete_many({USER_ID: int(uid)})


SORT_SPECS = [(UTC_START_TIME, DESCENDING)]


async def query(
    user_id=None,
    activity_ids=None,
    exclude_ids=None,
    after=None,
    before=None,
    limit=None,
    update_ts=True,
):
    if activity_ids:
        activity_ids = set(int(aid) for aid in activity_ids)

    if exclude_ids:
        exclude_ids = set(int(aid) for aid in exclude_ids)

    limit = int(limit) if limit else 0

    query = {}
    projection = None

    if user_id:
        query[USER_ID] = int(user_id)
        projection = {USER_ID: False}

    if before or after:
        query[UTC_START_TIME] = Utility.cleandict(
            {
                "$lt": None if before is None else Utility.to_epoch(before),
                "$gte": None if after is None else Utility.to_epoch(after),
            }
        )

    if activity_ids:
        query[ACTIVITY_ID] = {"$in": list(activity_ids)}

    to_delete = None

    index = await get_collection()

    result = {}

    if exclude_ids:
        t0 = time.perf_counter()
        cursor = index.find(
            filter=query,
            projection={ACTIVITY_ID: True},
            sort=SORT_SPECS,
            limit=limit,
        )

        # These are the ids of activities that matched the query
        query_ids = set([doc[ACTIVITY_ID] async for doc in cursor])

        to_fetch = list(query_ids - exclude_ids)
        to_delete = list(exclude_ids - query_ids)

        result["triage"] = to_delete
        query = {ACTIVITY_ID: {"$in": to_fetch}}

        elapsed = (time.perf_counter() - t0) * 1000
        log.debug("queried %d ids in %dms", len(query_ids), elapsed)

    t0 = time.perf_counter()
    cursor = index.find(
        filter=query,
        projection=projection,
        sort=SORT_SPECS,
        limit=limit,
    )

    docs = await cursor.to_list(length=None)
    result["docs"] = docs

    t1 = time.perf_counter()
    elapsed = (t1 - t0) * 1000
    log.debug("queried %d activities in %dms", len(docs), elapsed)

    if update_ts:
        update_result = await index.update_many(
            {"_id": {"$in": [a[ACTIVITY_ID] for a in docs]}},
            {"$set": {TIMESTAMP: datetime.datetime.utcnow()}},
        )
        elapsed = (time.perf_counter() - t1) * 1000
        log.debug("ts update in %dms", elapsed)
    return result


def stats():
    return DataAPIs.stats(COLLECTION_NAME)


def drop():
    return DataAPIs.drop(COLLECTION_NAME)


In [None]:
# Example Strava ActivitySummary
A = {'resource_state': 2,
 'athlete': {'id': 15972102, 'resource_state': 1},
 'name': 'Afternoon Shred',
 'distance': 3301.7,
 'moving_time': 1346,
 'elapsed_time': 1378,
 'total_elevation_gain': 50.1,
 'type': 'Surfing',
 'id': 6663463299,
 'start_date': '2022-02-10T21:49:17Z',
 'start_date_local': '2022-02-10T13:49:17Z',
 'timezone': '(GMT-08:00) America/Los_Angeles',
 'utc_offset': -28800.0,
 'location_city': None,
 'location_state': None,
 'location_country': 'United States',
 'achievement_count': 0,
 'kudos_count': 0,
 'comment_count': 0,
 'athlete_count': 1,
 'photo_count': 0,
 'map': {'id': 'a6663463299',
  'summary_polyline': 'cr{eFjowhVd@RVED]K[A[BMHKLAZBTJHAHGCEPK@k@LUFCVEP?DBL@DFd@FVNLCFIDCJWPMBa@G[O]EY?SBOd@a@FQBYH[AUIi@BQFILGNBPHNCh@WLARKDEBQCKBMNc@XMJBFH@f@Kr@c@lACN@XRf@?`@K`@@HDHZPV`@JFF@DIPCLg@NUn@YJALD\\N\\TF?HCN@JMVE@EBk@AWFYFIXSLGXENIJ]Jg@FKFCJHNVF^`@vANPLXRJJJP\\DDNBBERCF?FDVBHBHPBb@DTDfACz@Gp@IPHGBMAAGPORKZS\\e@~A_AXe@D]Cc@K_@OM?G@IJYhAMHi@Hc@TYHk@D[?g@LqAr@_@d@CRA\\Jl@?NEVOZWR_ANcAX[@QNO^CZ`@`BD`@@\\GnAQrAIb@OLI?GCKMGo@GSU]IU?UGQBc@EUe@o@Qm@E]RuBC[O_@IEMBUA]J{@^[DOASGEGIWIc@GMGEKA_@VQR{@|A]d@EBG?_@OKIQ@OCKIGY?i@DaBM]e@g@_@{@DYFQb@_ABWCMe@_@_@MSCs@Ps@Ce@?Cb@FC@FD@?E?BBB',
  'resource_state': 2},
 'trainer': False,
 'commute': False,
 'manual': False,
 'private': False,
 'visibility': 'everyone',
 'flagged': False,
 'gear_id': None,
 'start_latlng': [37.829625, -122.18629333],
 'end_latlng': [37.83219167, -122.187075],
 'start_latitude': 37.829625,
 'start_longitude': -122.187075,
 'average_speed': 2.453,
 'max_speed': 6.48,
 'has_heartrate': True,
 'average_heartrate': 115.2,
 'max_heartrate': 142.0,
 'heartrate_opt_out': False,
 'display_hide_heartrate_option': True,
 'elev_high': 428.1,
 'elev_low': 353.1,
 'upload_id': 7086217566,
 'upload_id_str': '7086217566',
 'external_id': '2022-02-10_22-13-35_2404963f-543a-485f-aca1-04e4184b7c08.tcx',
 'from_accepted_tag': False,
 'pr_count': 0,
 'total_photo_count': 0,
 'has_kudoed': False,
 'suffer_score': 7.0}

In [None]:
import logging
logging.basicConfig(level="DEBUG")
log = logging.getLogger()

mongo_doc(**A)

In [None]:
import Users
efrem = await Users.get(Users.ADMIN[0])
efrem

In [None]:
await import_user_entries(**efrem)

In [None]:
result = await query(after="2021")

In [None]:
result

In [None]:
import DataAPIs
await DataAPIs.stats("index")