In [3]:
# make sure we are working in module directory
repo_root = !git rev-parse --show-toplevel
module_path = repo_root[0] + "/backend/heatflask"
%cd $module_path

import sys
__package__ = "heatflask"
if ".." not in sys.path:
    sys.path.insert(0, "..")


# Make cells wider
from IPython.display import display, HTML
display(HTML("<style>.container { width:96% !important; }</style>"))

/home/efrem/dev/heatflask/backend/heatflask


In [35]:
# %%writefile Users.py
"""
***  For Jupyter notebook ***
Paste one of these Jupyter magic directives to the top of a cell
 and run it, to do these things:
    %%cython --annotate       # Compile and run the cell
    %load Users.py            # Load Users.py file into this (empty) cell
    %%writefile Users.py      # Write the contents of this cell to Users.py
"""

from logging import getLogger
import datetime
import pymongo
import types
import asyncio
from aiohttp.client_exceptions import ClientResponseError

from . import DataAPIs
from . import Utility
from . import Strava
from . import Index

log = getLogger(__name__)
log.propagate = True

COLLECTION_NAME = "users"

# Drop a user after a year of inactivity
# not logging in
TTL = 365 * 24 * 3600

# These are IDs of users we consider to be admin users
ADMIN = [15972102]

# This is to limit the number of de-auths in one batch so we
# don't go over our hit quota
MAX_TRIAGE = 10

myBox = types.SimpleNamespace(collection=None)


async def get_collection():
    if myBox.collection is None:
        myBox.collection = await DataAPIs.init_collection(COLLECTION_NAME)
    return myBox.collection


fields = [
    ID := "_id",
    LAST_LOGIN := "ts",
    LOGIN_COUNT := "#",
    LAST_INDEX_ACCESS := "I",
    FIRSTNAME := "f",
    LASTNAME := "l",
    PROFILE := "P",
    CITY := "c",
    STATE := "s",
    COUNTRY := "C",
    AUTH := "@",
    PRIVATE := "p",
]


def mongo_doc(
    # From Strava Athlete record
    id=None,
    firstname=None,
    lastname=None,
    profile_medium=None,
    profile=None,
    city=None,
    state=None,
    country=None,
    # my additions
    _id=None,
    last_login=None,
    login_count=None,
    last_index_access=None,
    private=None,
    auth=None,
    **extras,
):
    if not (id or _id):
        log.error("cannot create user with no id")
        return

    return Utility.cleandict(
        {
            ID: int(_id or id),
            FIRSTNAME: firstname,
            LASTNAME: lastname,
            PROFILE: profile_medium or profile,
            CITY: city,
            STATE: state,
            COUNTRY: country,
            LAST_LOGIN: last_login,
            LOGIN_COUNT: login_count,
            LAST_INDEX_ACCESS: last_index_access,
            AUTH: auth,
            PRIVATE: private,
        }
    )


def is_admin(user_id):
    return int(user_id) in ADMIN


async def add_or_update(
    update_last_login=False,
    update_index_access=False,
    inc_login_count=False,
    **strava_athlete,
):
    users = await get_collection()
    # log.debug("Athlete: %s", strava_athlete)
    doc = mongo_doc(**strava_athlete)
    if not doc:
        log.exception("error adding/updating user: %s", doc)
        return

    now_ts = datetime.datetime.utcnow().timestamp()
    if update_last_login:
        doc[LAST_LOGIN] = now_ts

    if update_index_access:
        doc[LAST_INDEX_ACCESS] = now_ts

    # We cannot technically "update" the _id field if this user exists
    # in the database, so we need to remove that field from the updates
    user_info = {**doc}
    user_id = user_info.pop(ID)
    updates = {"$set": user_info}

    if inc_login_count:
        updates["$inc"] = {LOGIN_COUNT: 1}

    log.debug("calling mongodb update_one with updates %s", updates)

    # Creates a new user or updates an existing user (with the same id)
    try:
        return await users.find_one_and_update(
            {ID: user_id},
            updates,
            upsert=True,
            return_document=pymongo.ReturnDocument.AFTER,
        )
    except Exception:
        log.exception("error adding/updating user: %s", doc)


async def get(user_id):
    if not user_id:
        return
    users = await get_collection()
    uid = int(user_id)
    query = {ID: uid}
    try:
        return await users.find_one(query)
    except Exception:
        log.exception("Failed mongodb query: %s", query)


# Returns an async iterator
async def get_all():
    users = await get_collection()
    return users.find()


default_out_fields = {
    ID: True,
    FIRSTNAME: True,
    LASTNAME: True,
    PROFILE: True,
    CITY: True,
    STATE: True,
    COUNTRY: True,
    #
    # LAST_LOGIN=False
    # LOGIN_COUNT=False
    # LAST_INDEX_ACCESS=False
    # AUTH: False,
    # PRIVATE: False,
}


async def dump(admin=False, output="json"):
    query = {} if admin else {PRIVATE: False}

    out_fields = {**default_out_fields}
    if admin:
        out_fields.update(
            {
                LAST_LOGIN: True,
                LOGIN_COUNT: True,
                LAST_INDEX_ACCESS: True,
                PRIVATE: True,
            }
        )
    users = await get_collection()
    cursor = users.find(filter=query, projection=out_fields)
    keys = list(out_fields.keys())
    csv = output == "csv"
    if csv:
        yield keys
    async for u in cursor:
        yield [u.get(k, "") for k in keys] if csv else u



async def delete(user_id, deauthenticate=True):
    # First we delete the user's index
    await Index.delete_user_entries(**{ID: user_id})

    user = await get(user_id)

    # attempt to de-authenticate the user. we
    #  will no longer access data on their behalf.
    #  We need their stored access_token in order to do this
    #  and we won't be able to if we delete that info, so we must
    #  make sure it is done before deleting this user from mongodb.
    #  Afterwards it is useless so we can delete it.
    if user and (AUTH in user) and deauthenticate:
        client = Strava.AsyncClient(user_id, **user[AUTH])
        async with Strava.get_limiter():
            try:
                await client.deauthenticate(raise_exception=True)
            except ClientResponseError as e:
                log.info("user %s is already deauthenticated? (%s, %s)", user_id, e.status, e.message)
            except Exception:
                log.exception("strava error?")

    users = await get_collection()
    try:
        await users.delete_one({ID: user_id})

    except Exception:
        log.exception("error deleting user %d", user_id)
    else:
        log.info("deleted user %s", user_id)


async def triage(*args, only_find=False, deauthenticate=True, max_triage=MAX_TRIAGE):
    now_ts = datetime.datetime.now().timestamp()
    cutoff = now_ts - TTL
    users = await get_collection()
    cursor = users.find({LAST_LOGIN: {"$lt": cutoff}}, {ID: True, LAST_LOGIN: True})
    bad_users = await cursor.to_list(length=max_triage)
    log.debug({u[ID]: str(datetime.datetime.fromtimestamp(u[LAST_LOGIN]).date()) for u in bad_users})
    if only_find:
        return bad_users
    tasks = [
        asyncio.create_task(delete(bu[ID], deauthenticate=deauthenticate))
        for bu in bad_users
    ]
    await asyncio.gather(*tasks)


def stats():
    return DataAPIs.stats(COLLECTION_NAME)


def drop():
    return DataAPIs.drop(COLLECTION_NAME)


#  #### Legacy ######
import os
from sqlalchemy import create_engine, text
import json

async def migrate():
    # Import legacy Users database
    log.info("Importing users from legacy db")
    pgurl = os.environ["REMOTE_POSTGRES_URL"]
    results = None
    with create_engine(pgurl).connect() as conn:
        result = conn.execute(text("select * from users"))
    results = result.all()

    docs = []

    for (
        id,
        username,
        firstname,
        lastname,
        profile,
        access_token,
        measurement_preference,
        city,
        state,
        country,
        email,
        dt_last_active,
        app_activity_count,
        share_profile,
        xxx,
    ) in results:
        if (id in ADMIN) or (dt_last_active is None):
            log.info("skipping %d", id)
            continue
        try:
            docs.append(
                mongo_doc(
                    # From Strava Athlete record
                    id=id,
                    firstname=firstname,
                    lastname=lastname,
                    profile=profile,
                    city=city,
                    state=state,
                    country=country,
                    #
                    last_login=dt_last_active.timestamp(),
                    login_count=app_activity_count,
                    private=not share_profile,
                    auth=json.loads(access_token),
                )
            )
        except json.JSONDecodeError:
            pass

    ids = [u[ID] for u in docs]
    users = await get_collection()
    await users.delete_many({ID: {"$in": ids}})
    insert_result = await users.insert_many(docs)
    log.info("Done migrating %d users", len(insert_result.inserted_ids))


Overwriting Users.py


In [19]:
# await DataAPIs.connect()
# await drop()
# await DataAPIs.disconnect()

In [20]:
import logging
logging.basicConfig(level="DEBUG")

await DataAPIs.connect()

new_user_dict = {"username": "guy"}
await add_or_update(**new_user_dict)  # should give an error

new_user_dict = {**new_user_dict, "id": 222}
await add_or_update(**new_user_dict)

ERROR:__main__:cannot create user with no id
ERROR:__main__:error adding/updating user: None
NoneType: None
DEBUG:__main__:calling mongodb update_one with updates {'$set': {}}


{'_id': 222}

In [21]:
await get(222)

{'_id': 222}

In [22]:
await add_or_update(_id=222, private=False)

await add_or_update(id=222, inc_access_count=True)

result = await add_or_update(id=222, update_ts=True)
user_info = await get(222)
result, user_info

DEBUG:__main__:calling mongodb update_one with updates {'$set': {'p': False}}
DEBUG:__main__:calling mongodb update_one with updates {'$set': {}}
DEBUG:__main__:calling mongodb update_one with updates {'$set': {}}


({'_id': 222, 'p': False}, {'_id': 222, 'p': False})

In [23]:
result = await delete(222)

DEBUG:heatflask.Index:222 deleted 0 entries
INFO:__main__:deleted user 222


In [24]:
# import real user data from Strava
from . import Strava
print("Paste this URL into your browser and retrieve the code:\n", Strava.auth_url())
print("You will be redirected to an error page. copy the 'code' parameter from the url")

Paste this URL into your browser and retrieve the code:
 https://www.strava.com/oauth/authorize?client_id=12700&response_type=code&approval_prompt=force&scope=read,activity:read,activity:read_all&redirect_uri=http:%2F%2Flocalhost%2Fexchange_token
You will be redirected to an error page. copy the 'code' parameter from the url


In [25]:
# exchange code for token
CODE = "cda3bbe70722fa345770211c65f940f605c64598"
C = Strava.AsyncClient("admin")
auth_info = await C.update_access_token(code=CODE)
strava_athlete = auth_info.pop("athlete")
auth_info, strava_athlete

DEBUG:heatflask.Strava:refreshing access token from code
INFO:heatflask.Strava:admin token refresh took 533


({'expires_at': 1646432785,
  'refresh_token': '05867993a2d0c5b60c51653636a9c295348551f3',
  'access_token': 'e3d51cfe2f97616b0dfc6e44a0ce4ec7ac07a2e6'},
 {'id': 15972102,
  'username': 'bfef',
  'resource_state': 2,
  'firstname': '👣',
  'lastname': 'Efrem',
  'bio': '* runs barefoot 👣\n\nhttps://www.heatflask.com',
  'city': 'Oakland',
  'state': 'California',
  'country': 'United States',
  'sex': 'M',
  'premium': True,
  'summit': True,
  'created_at': '2016-06-25T03:48:55Z',
  'updated_at': '2021-09-27T06:08:53Z',
  'badge_type_id': 1,
  'weight': 81.22,
  'profile_medium': 'https://dgalywyr863hv.cloudfront.net/pictures/athletes/15972102/9131294/7/medium.jpg',
  'profile': 'https://dgalywyr863hv.cloudfront.net/pictures/athletes/15972102/9131294/7/large.jpg',
  'friend': None,
  'follower': None})

In [26]:
mongo_doc(**strava_athlete, auth=auth_info)

{'_id': 15972102,
 'f': '👣',
 'l': 'Efrem',
 'P': 'https://dgalywyr863hv.cloudfront.net/pictures/athletes/15972102/9131294/7/medium.jpg',
 'c': 'Oakland',
 's': 'California',
 'C': 'United States',
 '@': {'expires_at': 1646432785,
  'refresh_token': '05867993a2d0c5b60c51653636a9c295348551f3',
  'access_token': 'e3d51cfe2f97616b0dfc6e44a0ce4ec7ac07a2e6'}}

In [27]:
await add_or_update(
    update_last_login=True,
    update_index_access=True,
    inc_login_count=True,
    **strava_athlete,
    auth=auth_info
)

DEBUG:__main__:calling mongodb update_one with updates {'$set': {'f': '👣', 'l': 'Efrem', 'P': 'https://dgalywyr863hv.cloudfront.net/pictures/athletes/15972102/9131294/7/medium.jpg', 'c': 'Oakland', 's': 'California', 'C': 'United States', '@': {'expires_at': 1646432785, 'refresh_token': '05867993a2d0c5b60c51653636a9c295348551f3', 'access_token': 'e3d51cfe2f97616b0dfc6e44a0ce4ec7ac07a2e6'}, 'ts': 1646455348.60307, 'I': 1646455348.60307}, '$inc': {'#': 1}}


{'_id': 15972102,
 '#': 3,
 'C': 'United States',
 'I': 1646455348.60307,
 'P': 'https://dgalywyr863hv.cloudfront.net/pictures/athletes/15972102/9131294/7/medium.jpg',
 'c': 'Oakland',
 'f': '👣',
 'l': 'Efrem',
 's': 'California',
 'ts': 1646455348.60307,
 '@': {'expires_at': 1646432785,
  'refresh_token': '05867993a2d0c5b60c51653636a9c295348551f3',
  'access_token': 'e3d51cfe2f97616b0dfc6e44a0ce4ec7ac07a2e6'}}

In [29]:
# async_cursor = await get_all()
# userslist1 = [user async for user in async_cursor]  

async_cursor = await get_all()
userslist2 = await async_cursor.to_list(length=None)
userslist2

[{'_id': 15972102,
  '#': 3,
  'C': 'United States',
  'I': 1646455348.60307,
  'P': 'https://dgalywyr863hv.cloudfront.net/pictures/athletes/15972102/9131294/7/medium.jpg',
  'c': 'Oakland',
  'f': '👣',
  'l': 'Efrem',
  's': 'California',
  'ts': 1646455348.60307,
  '@': {'expires_at': 1646432785,
   'refresh_token': '05867993a2d0c5b60c51653636a9c295348551f3',
   'access_token': 'e3d51cfe2f97616b0dfc6e44a0ce4ec7ac07a2e6'}},
 {'_id': 3380054,
  'f': 'Jon',
  'l': 'Gauthier',
  'P': 'https://dgalywyr863hv.cloudfront.net/pictures/athletes/3380054/2967244/3/medium.jpg',
  'c': 'Boston',
  's': 'Massachusetts',
  'C': 'United States',
  'ts': 1619572215.834646,
  '#': 1,
  '@': {'access_token': '94cecf1e499deb6a7a96606e5084d6e66b32cf6d',
   'refresh_token': '45374dd96c2daf8ac4efdeba26a1545c842abdca',
   'expires_at': 1646439503},
  'p': True},
 {'_id': 21161919,
  'f': 'Алексей',
  'l': 'А',
  'P': 'https://dgalywyr863hv.cloudfront.net/pictures/athletes/21161919/11248743/4/medium.jpg',
  '

In [30]:
await migrate()

INFO:__main__:Importing users from legacy db
INFO:__main__:skipping 97298375
INFO:__main__:skipping 6851964
INFO:__main__:skipping 15972102
INFO:__main__:Done migrating 5722 users


In [31]:
await DataAPIs.disconnect()

INFO:heatflask.DataAPIs:Disconnecing from MongoDB and Redis


In [33]:
await DataAPIs.connect()
all_users = [a async for a in dump(admin=True, output="csv")]
await DataAPIs.disconnect()
all_users

INFO:heatflask.DataAPIs:Connected to MongoDB and Redis
INFO:heatflask.DataAPIs:Disconnecing from MongoDB and Redis


[['_id', 'f', 'l', 'P', 'c', 's', 'C', 'ts', '#', 'I', 'p'],
 [15972102,
  '👣',
  'Efrem',
  'https://dgalywyr863hv.cloudfront.net/pictures/athletes/15972102/9131294/7/medium.jpg',
  'Oakland',
  'California',
  'United States',
  1646455348.60307,
  3,
  1646455348.60307,
  ''],
 [3045216,
  'Hans',
  'Heinermann',
  'https://dgalywyr863hv.cloudfront.net/pictures/athletes/3045216/1917955/2/medium.jpg',
  'Münster',
  'NRW',
  'Germany',
  1576697670.032849,
  1,
  '',
  True],
 [37634167,
  'Markus',
  'Bock',
  'https://dgalywyr863hv.cloudfront.net/pictures/athletes/37634167/11068631/22/medium.jpg',
  'Hannover',
  'Niedersachsen',
  'Deutschland',
  1574306869.800961,
  1,
  '',
  True],
 [14569586,
  'Derek',
  'Dean',
  'https://graph.facebook.com/10203230987472749/picture?height=256&width=256',
  'Southbury',
  'Connecticut',
  'United States',
  1606683522.531564,
  2,
  '',
  True],
 [7222491,
  'Magnus',
  'Klinge',
  'https://dgalywyr863hv.cloudfront.net/pictures/athletes/722

In [34]:
await DataAPIs.connect()
stale_users = await triage()
await DataAPIs.disconnect()
stale_users

INFO:heatflask.DataAPIs:Connected to MongoDB and Redis
DEBUG:__main__:{3045216: '2019-12-18', 37634167: '2019-11-20', 14569586: '2020-11-29', 7222491: '2020-08-12', 1667371: '2020-05-23', 6483336: '2020-02-28', 10007779: '2020-05-02', 2276263: '2020-07-26', 5938463: '2020-08-19', 322591: '2020-06-11'}
DEBUG:heatflask.Index:3045216 deleted 0 entries
DEBUG:heatflask.Index:2276263 deleted 0 entries
DEBUG:heatflask.Index:6483336 deleted 0 entries
DEBUG:heatflask.Index:7222491 deleted 0 entries
DEBUG:heatflask.Index:37634167 deleted 0 entries
DEBUG:heatflask.Index:1667371 deleted 0 entries
DEBUG:heatflask.Index:14569586 deleted 0 entries
DEBUG:heatflask.Index:322591 deleted 0 entries
DEBUG:heatflask.Strava:opening new aiohttp session
DEBUG:heatflask.Strava:refreshing access token from refresh_token
DEBUG:heatflask.Index:10007779 deleted 0 entries
DEBUG:heatflask.Index:5938463 deleted 0 entries
DEBUG:heatflask.Strava:opening new aiohttp session
DEBUG:heatflask.Strava:refreshing access token 