-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathapplying.py
288 lines (223 loc) · 8.98 KB
/
applying.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
# See config/data_retention.yaml
#
# Terminology:
#
# In data_retention.yaml, we use more "human friendly" terminology.
# We translate that to more precise, useful terminology that we want
# to use in code as part of load_data_retention_policy.
# We use 'erase' in this code to distinguish from 'delete'
#
# - delete refers specifically to a database delete,
# in which the entire row is removed.
#
# - erasure is a more general concept that can refer
# to a range of different erasing methods.
from __future__ import annotations
from datetime import date, datetime
from django.db import models, transaction
from django.db.models.expressions import RawSQL
from django.db.models.fields import Field
from django.utils import timezone
from django_countries.fields import CountryField
from mailer import models as mailer_models
from paypal.standard.ipn.models import PayPalIPN
from cciw.accounts.models import User
from cciw.bookings.models import Booking, BookingAccount, SupportingInformation, SupportingInformationDocument
from cciw.contact_us.models import Message
from cciw.officers.models import Application
from .datatypes import ErasureMethod, ForeverType, ModelDetail, Rules
def load_actual_data_retention_policy():
from .loading import load_data_retention_policy
return load_data_retention_policy(available_erasure_methods=CUSTOM_ERASURE_METHODS)
def apply_data_retention(policy=None, ignore_missing_models=False):
from .checking import get_data_retention_policy_issues
if policy is None:
policy = load_actual_data_retention_policy()
issues = get_data_retention_policy_issues(policy)
if issues:
if ignore_missing_models:
# Easier testing
issues = [issue for issue in issues if "Missing models" not in issue.msg]
if issues:
raise AssertionError("Invalid data retention policy, aborting", issues)
today = timezone.now()
retval = []
with transaction.atomic():
for group in policy.groups:
for model_detail in group.models:
retval.append(apply_data_retention_single_model(today, rules=group.rules, model_detail=model_detail))
return retval
def apply_data_retention_single_model(now: datetime, *, rules: Rules, model_detail: ModelDetail):
if isinstance(rules.keep, ForeverType):
return []
erase_before_datetime = now - rules.keep
# TODO probably want separate method for manual erasure requests,
# need to be careful about things that are still needed and
# how to respect `keep`
erasable_records = get_erasable(erase_before_datetime, model_detail.model)
if model_detail.delete_row:
retval = erasable_records.delete()
else:
update_dict = {}
for field in model_detail.fields:
if field in model_detail.custom_erasure_methods:
method = model_detail.custom_erasure_methods[field]
else:
method = find_erasure_method(field)
update_dict.update(method.build_update_dict(field))
if model_detail.model not in ERASED_ON_EXCEPTIONS:
update_dict["erased_on"] = update_erased_on_field(now)
retval = erasable_records.update(**update_dict)
return retval
def get_erasable(before_datetime: date, model: type):
qs = ERASABLE_RECORDS[model](before_datetime)
assert qs.model == model
return qs
def update_erased_on_field(now: datetime):
return RawSQL(
"""
CASE WHEN erased_on IS NULL THEN %s
ELSE erased_on
END
""",
[now],
)
# --- Default erasure methods ---
DELETED_STRING = "[deleted]"
DELETED_BYTES = b"[deleted]"
# For EmailFieldErasure and CountryFieldErasure we avoid validation errors in
# admin by setting something that is valid, rather than just deleting.
class EmailFieldErasure(ErasureMethod):
def allowed_for_field(self, field: Field):
return isinstance(field, models.EmailField)
def build_update_dict(self, field: Field):
key = field.name
if field.null and field.blank:
return {key: None}
else:
return {key: "deleted@example.com"}
class CountryFieldErasure(ErasureMethod):
def allowed_for_field(self, field: Field):
return isinstance(field, CountryField)
def build_update_dict(self, field: Field):
key = field.name
if field.null and field.blank:
return {key: None}
else:
return {key: "GB"} # United Kingdom
class CharFieldErasure(ErasureMethod):
def allowed_for_field(self, field: Field):
return isinstance(field, models.CharField)
def build_update_dict(self, field: Field):
key = field.name
if field.null:
return {key: None}
elif field.max_length < len(DELETED_STRING):
return {key: ""}
else:
return {key: DELETED_STRING}
class TextFieldErasure(ErasureMethod):
def allowed_for_field(self, field: Field):
return isinstance(field, models.TextField)
def build_update_dict(self, field: Field):
key = field.name
if field.null:
return {key: None}
else:
return {key: DELETED_STRING}
class BinaryFieldErasure(ErasureMethod):
def allowed_for_field(self, field: Field):
return isinstance(field, models.BinaryField)
def build_update_dict(self, field: Field):
key = field.name
if field.null:
return {key: None}
else:
return {key: DELETED_BYTES}
class BooleanFieldErasure(ErasureMethod):
def allowed_for_field(self, field: Field):
return isinstance(field, models.BooleanField)
def build_update_dict(self, field: Field):
# Set to default value.
return {field.name: field.default}
class IntegerFieldErasure(ErasureMethod):
def allowed_for_field(self, field: Field):
return isinstance(field, models.IntegerField)
def build_update_dict(self, field: Field):
return {field.name: 0}
class NullableFieldErasure(ErasureMethod):
def allowed_for_field(self, field: Field):
return field.null
def build_update_dict(self, field: Field):
return {field.name: None}
# This list is ordered to prioritise more specific methods
DEFAULT_ERASURE_METHODS: list[ErasureMethod] = [
EmailFieldErasure(),
CountryFieldErasure(),
CharFieldErasure(),
TextFieldErasure(),
BinaryFieldErasure(),
BooleanFieldErasure(),
IntegerFieldErasure(),
NullableFieldErasure(),
]
def find_erasure_method(field):
for method in DEFAULT_ERASURE_METHODS:
if method.allowed_for_field(field):
return method
raise LookupError(f"No erasure method found for field {field.model.__name__}.{field.name}")
# --- Domain specific knowledge ---
# Dictionary from model to callable that retrieves the erasable records:
ERASABLE_RECORDS = {
Message: lambda before_datetime: Message.objects.older_than(before_datetime),
Application: lambda before_datetime: Application.objects.older_than(before_datetime),
Booking: lambda before_datetime: Booking.objects.not_in_use().older_than(before_datetime),
BookingAccount: lambda before_datetime: BookingAccount.objects.not_in_use().older_than(before_datetime),
User: lambda before_datetime: User.objects.older_than(before_datetime),
SupportingInformation: lambda before_datetime: SupportingInformation.objects.older_than(before_datetime),
SupportingInformationDocument: lambda before_datetime: SupportingInformationDocument.objects.older_than(
before_datetime
),
# 3rd party:
mailer_models.Message: lambda before_datetime: mailer_models.Message.objects.filter(
when_added__lt=before_datetime,
),
mailer_models.MessageLog: lambda before_datetime: mailer_models.MessageLog.objects.filter(
when_added__lt=before_datetime,
),
PayPalIPN: lambda before_datetime: PayPalIPN.objects.filter(
created_at__lt=before_datetime,
),
}
# Models for which we don't expect an 'erased_on' field:
ERASED_ON_EXCEPTIONS = [
# This is in a 3rd party library, can't add a field to it:
PayPalIPN,
]
class PreserveAgeOnCamp(ErasureMethod):
def allowed_for_field(self, field):
return field.model == Booking and field.name == "date_of_birth"
def build_update_dict(self, field: Field):
return {
"date_of_birth":
# Birthdates after YYYY-08-31 get counted as next school year,
# so we anonymise those to YYYY-12-01, everything else to YYYY-01-01
# See also Booking.age_base_date
# See also BookingManager.need_approving
RawSQL(
"""
make_date(
EXTRACT(YEAR FROM date_of_birth)::int,
CASE WHEN EXTRACT(MONTH FROM date_of_birth) > 8 THEN 12
ELSE 1
END,
1
)
""",
[],
models.DateTimeField(),
),
}
CUSTOM_ERASURE_METHODS = {
"preserve age on camp": PreserveAgeOnCamp(),
}