Skip to content

Commit

Permalink
FIX: Fixes DateTime time conversions for CSV parser.
Browse files Browse the repository at this point in the history
  • Loading branch information
gethvi committed Mar 8, 2023
1 parent eba91a6 commit d3ff5ab
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 11 deletions.
28 changes: 28 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,34 @@
CHANGELOG
==========

3.1.1 (unreleased)
------------------

### Core

### Development

### Bots

#### Collectors

#### Parsers
`intelmq.bots.parsers.generic.parser_csv`: Changed to use `DateTime.TIME_CONVERSIONS` (PR#2329 by Filip Pokorný).

#### Experts

#### Outputs

### Documentation

### Tests

### Packaging

### Tools

### Known Errors

3.1.0 (2023-02-10)
------------------

Expand Down
14 changes: 3 additions & 11 deletions intelmq/bots/parsers/generic/parser_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,12 @@
import re
from typing import Optional, Union, Iterable

from dateutil.parser import parse

from intelmq.lib import utils
from intelmq.lib.bot import ParserBot
from intelmq.lib.exceptions import InvalidArgument, InvalidValue
from intelmq.lib.harmonization import DateTime
from intelmq.lib.utils import RewindableFileHandle

TIME_CONVERSIONS = {'timestamp': DateTime.from_timestamp,
'windows_nt': DateTime.from_windows_nt,
'epoch_millis': DateTime.from_epoch_millis,
None: lambda value: parse(value, fuzzy=True).isoformat() + " UTC"}

DATA_CONVERSIONS = {'json': lambda data: json.loads(data)}
DOCS = "https://intelmq.readthedocs.io/en/latest/guides/Bots.html#generic-csv-parser"

Expand Down Expand Up @@ -71,9 +64,9 @@ def init(self):
# handle empty strings, false etc.
if not self.time_format:
self.time_format = None
if self.time_format not in TIME_CONVERSIONS.keys():
if self.time_format and self.time_format.split('|')[0] not in DateTime.TIME_CONVERSIONS.keys():
raise InvalidArgument('time_format', got=self.time_format,
expected=list(TIME_CONVERSIONS.keys()),
expected=list(DateTime.TIME_CONVERSIONS.keys()),
docs=DOCS)
if self.filter_type and self.filter_type not in ('blacklist', 'whitelist'):
raise InvalidArgument('filter_type', got=self.filter_type,
Expand Down Expand Up @@ -137,7 +130,6 @@ def parse_line(self, row: list, report):
if search:
value = search.group(0)
else:
type = None
value = None

if key in ("__IGNORE__", ""):
Expand All @@ -147,7 +139,7 @@ def parse_line(self, row: list, report):
value = DATA_CONVERSIONS[self.data_type[key]](value)

if key in ("time.source", "time.destination"):
value = TIME_CONVERSIONS[self.time_format](value)
value = DateTime.TIME_CONVERSIONS[self.time_format](value)
elif key.endswith('.url'):
if not value:
continue
Expand Down

0 comments on commit d3ff5ab

Please sign in to comment.