Skip to content

Commit

Permalink
Cli: add user anonymization command
Browse files Browse the repository at this point in the history
  • Loading branch information
KonstantinaStoikou committed Aug 3, 2020
1 parent c3221ca commit ed3867d
Show file tree
Hide file tree
Showing 8 changed files with 519 additions and 198 deletions.
23 changes: 13 additions & 10 deletions invenio_app_ils/acquisition/jsonresolvers/order_order_lines.py
Expand Up @@ -11,6 +11,7 @@
from werkzeug.routing import Rule

from invenio_app_ils.acquisition.proxies import current_ils_acq
from invenio_app_ils.patrons.api import get_patron_or_unknown
from invenio_app_ils.proxies import current_app_ils
from invenio_app_ils.records.jsonresolvers.api import \
get_field_value_for_record as get_field_value
Expand All @@ -22,36 +23,38 @@ def jsonresolver_loader(url_map):
"""Resolve the referred document and patron for an Order Line."""
from flask import current_app

def patron_resolver(order_line, patron):
def patron_resolver(order_line, patron_pid):
"""Resolve the Patron for the given Order Line."""
order_line["patron"] = patron.dumps_loader()
patron = get_patron_or_unknown(patron_pid)

order_line["patron"] = patron
return patron

def document_resolver(order_line, doc):
"""Resolve the Document for the given Order Line."""
order_line["document"] = pick(doc, 'cover_metadata', 'pid', 'title')
order_line["document"] = pick(doc, "cover_metadata", "pid", "title")
return doc

def order_lines_resolver(order_pid):
Order = current_ils_acq.order_record_cls
Document = current_app_ils.document_record_cls
Patron = current_app_ils.patron_cls
order_lines = get_field_value(Order, order_pid, "order_lines")

documents = {}
patrons = {}
for order_line in order_lines:
doc_pid = order_line.get('document_pid')
doc_pid = order_line.get("document_pid")
doc = documents.get(doc_pid) or Document.get_record_by_pid(doc_pid)
documents[doc["pid"]] = doc
document_resolver(order_line, doc)

patron_pid = order_line.get('patron_pid')
patron_pid = order_line.get("patron_pid")
if not patron_pid:
continue
patron = patrons.get(patron_pid) or Patron.get_patron(patron_pid)
patrons[patron_pid] = patron
patron_resolver(order_line, patron)
patron = get_patron_or_unknown(patron_pid)
if not patron:
patron_resolver(order_line, None)
continue
patron_resolver(order_line, patron_pid)
return order_lines

url_map.add(
Expand Down
177 changes: 177 additions & 0 deletions invenio_app_ils/anonymization.py
@@ -0,0 +1,177 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2018-2020 CERN.
#
# invenio-app-ils is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.

"""Functions to anonymize user data and activity."""


from copy import deepcopy

from invenio_accounts.models import SessionActivity, User, userrole
from invenio_circulation.api import Loan
from invenio_circulation.proxies import current_circulation
from invenio_db import db
from invenio_oauthclient.models import RemoteAccount, UserIdentity
from invenio_userprofiles.models import UserProfile

from invenio_app_ils.acquisition.api import Order
from invenio_app_ils.acquisition.proxies import current_ils_acq
from invenio_app_ils.document_requests.api import DocumentRequest
from invenio_app_ils.errors import PatronNotFoundError
from invenio_app_ils.ill.api import BorrowingRequest
from invenio_app_ils.ill.proxies import current_ils_ill
from invenio_app_ils.patrons.api import (Patron, get_anonymous_patron_dict,
get_patron_or_unknown)

from .acquisition.search import OrderSearch
from .circulation.search import get_loans_by_patron_pid
from .document_requests.search import DocumentRequestSearch
from .ill.search import BorrowingRequestsSearch
from .patrons.indexer import PatronIndexer
from .proxies import current_app_ils


def get_patron_activity(patron_pid):
"""Get activity related to the given patron pid."""
if patron_pid is None:
raise ValueError("No patron pid was provided.")

patron = get_patron_or_unknown(patron_pid)
if not patron:
return None

def dump(search):
return [hit.to_dict() for hit in search.scan()]

patron_document_requests = dump(
DocumentRequestSearch().search_by_patron_pid(patron_pid)
)

patron_borrowing_requests = dump(
BorrowingRequestsSearch().search_by_patron_pid(patron_pid)
)

patron_acquisitions = dump(OrderSearch().search_by_patron_pid(patron_pid))

patron_loans = dump(get_loans_by_patron_pid(patron_pid))

patron_profile = UserProfile.get_by_userid(patron_pid).__dict__

patron_data = {
"patron": patron,
"profile": patron_profile,
"document_requests": patron_document_requests,
"borrowing_requests": patron_borrowing_requests,
"acquisitions": patron_acquisitions,
"loans": patron_loans,
}

return patron_data


def anonymize_patron_data(patron_pid, force=False):
"""Anonymize all the data/activity related to the given patron pid."""
if patron_pid is None:
raise ValueError("No patron pid was provided.")

patron = get_patron_or_unknown(patron_pid)
if not patron and not force:
return None

patron_object = None
try:
patron_object = current_app_ils.patron_cls.get_patron(patron_pid)
except PatronNotFoundError:
if not force:
raise PatronNotFoundError(patron_pid)

# Delete rows from db
dropped = 0

with db.session.begin_nested():
ongoing_loans = 0
for loan in (
get_loans_by_patron_pid(patron_pid)
.filter("term", state="ITEM_ON_LOAN")
.scan()
):
ongoing_loans += 1
if ongoing_loans > 0:
raise AssertionError(
"Cannot delete user %s: they have %s ongoing loans."
% (patron_pid, ongoing_loans)
)

d = db.session.query(userrole).filter(userrole.c.user_id == patron_pid)
dropped += d.delete(synchronize_session=False)

dropped += SessionActivity.query.filter(
SessionActivity.user_id == patron_pid
).delete()

dropped += UserIdentity.query.filter(
UserIdentity.id_user == patron_pid
).delete()
dropped += RemoteAccount.query.filter(
RemoteAccount.user_id == patron_pid
).delete()

dropped += UserProfile.query.filter(
UserProfile.user_id == patron_pid
).delete()
dropped += User.query.filter(User.id == patron_pid).delete()

db.session.commit()
if patron_object:
PatronIndexer().delete(patron_object)

# Serialize empty patron values
anonymous_patron_fields = get_anonymous_patron_dict(patron_pid)

patron_loans = get_loans_by_patron_pid(patron_pid).scan()

indices = 0

for hit in patron_loans:
loan = Loan.get_record_by_pid(hit.pid)
loan["patron"] = anonymous_patron_fields
loan.commit()
current_circulation.loan_indexer().index(loan)
indices += 1

patron_borrowing_requests = (
BorrowingRequestsSearch().search_by_patron_pid(patron_pid).scan()
)

for hit in patron_borrowing_requests:
borrowing_request = BorrowingRequest.get_record_by_pid(hit.pid)
borrowing_request["patron"] = anonymous_patron_fields
borrowing_request.commit()
current_ils_ill.borrowing_request_indexer_cls().index(
borrowing_request
)
indices += 1

patron_document_requests = (
DocumentRequestSearch().search_by_patron_pid(patron_pid).scan()
)

for hit in patron_document_requests:
document_request = DocumentRequest.get_record_by_pid(hit.pid)
document_request["patron"] = anonymous_patron_fields
document_request.commit()
current_app_ils.document_request_indexer.index(document_request)
indices += 1

patron_acquisitions = OrderSearch().search_by_patron_pid(patron_pid).scan()

for hit in patron_acquisitions:
acquisition = Order.get_record_by_pid(hit.pid)
acquisition.commit()
current_ils_acq.order_indexer.index(acquisition)
indices += 1

return dropped, indices
11 changes: 3 additions & 8 deletions invenio_app_ils/circulation/jsonresolvers/loan.py
Expand Up @@ -11,7 +11,7 @@
from invenio_pidstore.errors import PIDDeletedError

from invenio_app_ils.circulation.utils import resolve_item_from_loan
from invenio_app_ils.patrons.api import get_patron_or_empty_dict
from invenio_app_ils.patrons.api import get_patron_or_unknown
from invenio_app_ils.proxies import current_app_ils
from invenio_app_ils.records.jsonresolvers.api import \
get_field_value_for_record as get_field_value
Expand All @@ -34,12 +34,7 @@ def item_resolver(loan_pid):
# if it the item is a BorrowingRequest, then some of these
# fields might not be there
item = pick(
item,
"barcode",
"description",
"document_pid",
"medium",
"pid",
item, "barcode", "description", "document_pid", "medium", "pid",
)

return item
Expand All @@ -54,7 +49,7 @@ def loan_patron_resolver(loan_pid):
except KeyError:
return {}

return get_patron_or_empty_dict(patron_pid)
return get_patron_or_unknown(patron_pid)


@get_pid_or_default(default_value=dict())
Expand Down
14 changes: 13 additions & 1 deletion invenio_app_ils/circulation/search.py
Expand Up @@ -118,11 +118,23 @@ def get_most_loaned_documents(from_date, to_date, bucket_size):
def get_loans_aggregated_by_states(document_pid, states, patron_pid=None):
"""Returns loans aggregated by states for a given document."""
search_cls = current_circulation.loan_search_cls
search = search_cls().filter("terms", state=states).filter("term", document_pid=document_pid)
search = (
search_cls()
.filter("terms", state=states)
.filter("term", document_pid=document_pid)
)
if patron_pid:
search = search.filter("term", patron_pid=patron_pid)
# Aggregation
aggs = A("terms", field="state")
search.aggs.bucket("states", aggs)

return search


def get_loans_by_patron_pid(patron_pid):
"""Returns all the loans (past and current) for a given patron."""
search = current_circulation.loan_search_cls().filter(
"term", patron_pid=patron_pid
)
return search

0 comments on commit ed3867d

Please sign in to comment.