From 3c2d5f6af903f1e0cc7904fa4af1478f32f95be1 Mon Sep 17 00:00:00 2001 From: gethvi Date: Wed, 8 Mar 2023 15:42:19 +0100 Subject: [PATCH] FIX: Fixes DateTime time conversions for CSV parser. --- CHANGELOG.md | 28 ++++++++++++++++++++++ intelmq/bots/parsers/generic/parser_csv.py | 14 +++-------- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d2ddfb8c..30d44c198 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,34 @@ CHANGELOG ========== +3.1.1 (unreleased) +------------------ + +### Core + +### Development + +### Bots + +#### Collectors + +#### Parsers +- `intelmq.bots.parsers.generic.parser_csv`: Changed to use `DateTime.TIME_CONVERSIONS` (PR#2329 by Filip Pokorný). + +#### Experts + +#### Outputs + +### Documentation + +### Tests + +### Packaging + +### Tools + +### Known Errors + 3.1.0 (2023-02-10) ------------------ diff --git a/intelmq/bots/parsers/generic/parser_csv.py b/intelmq/bots/parsers/generic/parser_csv.py index 28482c6a1..9d8264cc8 100644 --- a/intelmq/bots/parsers/generic/parser_csv.py +++ b/intelmq/bots/parsers/generic/parser_csv.py @@ -21,19 +21,12 @@ import re from typing import Optional, Union, Iterable -from dateutil.parser import parse - from intelmq.lib import utils from intelmq.lib.bot import ParserBot from intelmq.lib.exceptions import InvalidArgument, InvalidValue from intelmq.lib.harmonization import DateTime from intelmq.lib.utils import RewindableFileHandle -TIME_CONVERSIONS = {'timestamp': DateTime.from_timestamp, - 'windows_nt': DateTime.from_windows_nt, - 'epoch_millis': DateTime.from_epoch_millis, - None: lambda value: parse(value, fuzzy=True).isoformat() + " UTC"} - DATA_CONVERSIONS = {'json': lambda data: json.loads(data)} DOCS = "https://intelmq.readthedocs.io/en/latest/guides/Bots.html#generic-csv-parser" @@ -71,9 +64,9 @@ def init(self): # handle empty strings, false etc. if not self.time_format: self.time_format = None - if self.time_format not in TIME_CONVERSIONS.keys(): + if self.time_format and self.time_format.split('|')[0] not in DateTime.TIME_CONVERSIONS.keys(): raise InvalidArgument('time_format', got=self.time_format, - expected=list(TIME_CONVERSIONS.keys()), + expected=list(DateTime.TIME_CONVERSIONS.keys()), docs=DOCS) if self.filter_type and self.filter_type not in ('blacklist', 'whitelist'): raise InvalidArgument('filter_type', got=self.filter_type, @@ -137,7 +130,6 @@ def parse_line(self, row: list, report): if search: value = search.group(0) else: - type = None value = None if key in ("__IGNORE__", ""): @@ -147,7 +139,7 @@ def parse_line(self, row: list, report): value = DATA_CONVERSIONS[self.data_type[key]](value) if key in ("time.source", "time.destination"): - value = TIME_CONVERSIONS[self.time_format](value) + value = DateTime.TIME_CONVERSIONS[self.time_format](value) elif key.endswith('.url'): if not value: continue