-
Notifications
You must be signed in to change notification settings - Fork 4
/
magic.py
87 lines (67 loc) · 2.71 KB
/
magic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# This module contains magic needed to handle database with old DDCZ application
# which connects using latin2 connection, but then proceeds to write cp1250
# encoded data into those fields
# ...aaaaand few other, let's call them, missteps?
import logging
from django.db import models
logger = logging.getLogger(__name__)
class MisencodedTextField(models.TextField):
def from_db_value(self, value, expression, connection):
if isinstance(value, str):
return value.encode("latin2").decode("cp1250")
else:
return value
def get_db_prep_value(self, value, connection, prepared=False):
if isinstance(value, str):
return value.encode("cp1250").decode("latin2")
else:
return value
class MisencodedCharField(models.CharField):
def from_db_value(self, value, expression, connection):
if isinstance(value, str):
return value.encode("latin2").decode("cp1250")
else:
return value
def get_db_prep_value(self, value, connection, prepared=False):
if isinstance(value, str) and not prepared:
return value.encode("cp1250").decode("latin2")
else:
return value
# Note: Derived from CharField and not BooleanField since the underlying
# storage field is still CharField and not database-native boolean type!
# Migrate as part of cleanup
class MisencodedBooleanField(models.CharField):
def from_db_value(self, value, expression, connection):
if isinstance(value, str):
return value == "1"
else:
return value
def get_db_prep_value(self, value, connection, prepared=False):
if isinstance(value, str) and not prepared:
if value:
return "1"
else:
return "0"
else:
return value
class MisencodedIntegerField(models.CharField):
"""
This represents a field that should be integer, but somehow ended up being
VARCHAR() on database level. For one reason or another, data integrity problems
DO exist there.
This field exist to represent valid fields while choking up on invalid fields,
represent them as 0 and report a problem to the developer.
One day, all data will be cleaned up and this field will be ALTER TABLEd
to (SMALL)INT/IntegerField.
One can dream!
"""
def from_db_value(self, value, expression, connection):
try:
return int(value)
except ValueError:
logger.exception(
f"Integer in VARCHAR is not an integer, but {type(value)}: {value}"
)
return 0
def get_db_prep_value(self, value, connection, prepared=False):
return str(value)