Skip to content
This repository has been archived by the owner on Nov 14, 2020. It is now read-only.

Commit

Permalink
[refs #95885] Extract file types
Browse files Browse the repository at this point in the history
  • Loading branch information
andrei-duhnea committed Jun 12, 2018
1 parent 7afe194 commit 7d765e0
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 0 deletions.
13 changes: 13 additions & 0 deletions search/management/commands/_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
DKeyword,
DLanguage,
Organization,
DFileType,
)


Expand Down Expand Up @@ -222,3 +223,15 @@ def update_organizations(records):
Organization.objects.create(name=o[0], email=o[1])
new += 1
return new


def update_file_types(records):
data = set()
for r in records:
ext = r.resource_locator_internal.split('.')[-1].strip().lower()
if ext:
data.add(ext)

existing = [o.name for o in DFileType.objects.only('name').filter(name__in=data)]
data = [d for d in data if d not in existing]
return _update_data(DFileType, 'name', data)
5 changes: 5 additions & 0 deletions search/management/commands/load_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
update_keywords,
update_languages,
update_organizations,
update_file_types,
)

defusedxml.defuse_stdlib()
Expand Down Expand Up @@ -76,6 +77,10 @@ def update_dictionaries(self, records):
if new > 0:
self.stdout.write(f'Added {new} organizations')

new = update_file_types(records)
if new > 0:
self.stdout.write(f'Added {new} file types')

def handle(self, *args, **options):
start_row = options['startrow']
try:
Expand Down

0 comments on commit 7d765e0

Please sign in to comment.