-
-
Notifications
You must be signed in to change notification settings - Fork 143
/
updatecalaccessrawdata.py
108 lines (88 loc) · 2.78 KB
/
updatecalaccessrawdata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
"""
Download, unzip, clean and load the latest CAL-ACCESS database ZIP.
"""
# Files
import os
# Commands
from django.core.management import call_command
from calaccess_raw.management.commands import CalAccessCommand
# Models
from calaccess_raw import get_model_list
# Logging
import logging
logger = logging.getLogger(__name__)
class Command(CalAccessCommand):
"""
Download, unzip, clean and load the latest CAL-ACCESS database ZIP.
"""
help = "Download, unzip, clean and load the latest CAL-ACCESS database ZIP"
def add_arguments(self, parser):
"""
Adds custom arguments specific to this command.
"""
super(Command, self).add_arguments(parser)
parser.add_argument(
"--keep-files",
action="store_true",
dest="keep_files",
default=False,
help="Keep zip, unzipped, TSV and CSV files",
)
def handle(self, *args, **options):
"""
Make it happen.
"""
super(Command, self).handle(*args, **options)
# set / compute any attributes that multiple class methods need
self.keep_files = options["keep_files"]
# Download
call_command(
"downloadcalaccessrawdata",
verbosity=self.verbosity,
)
if self.verbosity:
self.duration()
# Extract
call_command("extractcalaccessrawfiles", keep_files=self.keep_files)
if self.verbosity:
self.duration()
# Clean
self.clean()
if self.verbosity:
self.duration()
# Load
self.load()
if self.verbosity:
self.duration()
self.success("Done!")
def clean(self):
"""
Clean up the raw data files from the state so they are ready to get loaded into the database.
"""
if self.verbosity:
self.header("Cleaning data files")
tsv_list = [f for f in os.listdir(self.tsv_dir) if ".TSV" in f.upper()]
# Loop through all the files in the source directory
for name in tsv_list:
call_command(
"cleancalaccessrawfile",
name,
verbosity=self.verbosity,
keep_file=self.keep_files,
)
def load(self):
"""
Loads the cleaned up csv files into the database.
"""
if self.verbosity:
self.header("Loading data files")
model_list = [
x for x in get_model_list() if os.path.exists(x.objects.get_csv_path())
]
for model in model_list:
call_command(
"loadcalaccessrawfile",
model.__name__,
verbosity=self.verbosity,
keep_file=self.keep_files,
)