Skip to content

Commit

Permalink
Merge branch 'feature/add_type_hints'
Browse files Browse the repository at this point in the history
  • Loading branch information
GjjvdBurg committed Sep 24, 2023
2 parents 0982707 + aba1362 commit c600cf6
Show file tree
Hide file tree
Showing 41 changed files with 647 additions and 377 deletions.
2 changes: 1 addition & 1 deletion clevercsv/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ._optional import import_optional_dependency


def main():
def main() -> None:
# Check that necessary dependencies are available
import_optional_dependency("wilderness")

Expand Down
38 changes: 20 additions & 18 deletions clevercsv/_regexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# Regular expressions for number formats #
##########################################

PATTERN_NUMBER_1: Pattern = regex.compile(
PATTERN_NUMBER_1: Pattern[str] = regex.compile(
r"^(?=[+-\.\d])"
r"[+-]?"
r"(?:0|[1-9]\d*)?"
Expand All @@ -35,19 +35,19 @@
r"$"
)

PATTERN_NUMBER_2: Pattern = regex.compile(
PATTERN_NUMBER_2: Pattern[str] = regex.compile(
r"[+-]?(?:[1-9]|[1-9]\d{0,2})(?:\,\d{3})+\.\d*"
)

PATTERN_NUMBER_3: Pattern = regex.compile(
PATTERN_NUMBER_3: Pattern[str] = regex.compile(
r"[+-]?(?:[1-9]|[1-9]\d{0,2})(?:\.\d{3})+\,\d*"
)

##############################################
# Regular expressions for url, email, and ip #
##############################################

PATTERN_URL: Pattern = regex.compile(
PATTERN_URL: Pattern[str] = regex.compile(
r"("
r"(https?|ftp):\/\/(?!\-)"
r")?"
Expand All @@ -62,17 +62,17 @@
r"(\.[a-z]+)?"
)

PATTERN_EMAIL: Pattern = regex.compile(
PATTERN_EMAIL: Pattern[str] = regex.compile(
r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)"
)

PATTERN_IPV4: Pattern = regex.compile(r"(?:\d{1,3}\.){3}\d{1,3}")
PATTERN_IPV4: Pattern[str] = regex.compile(r"(?:\d{1,3}\.){3}\d{1,3}")

#################################################
# Regular expressions related to time notations #
#################################################

PATTERN_TIME_HHMMSSZZ: Pattern = regex.compile(
PATTERN_TIME_HHMMSSZZ: Pattern[str] = regex.compile(
r"(0[0-9]|1[0-9]|2[0-3])"
r":"
r"([0-5][0-9])"
Expand All @@ -84,21 +84,23 @@
r"([0-5][0-9])"
)

PATTERN_TIME_HHMMSS: Pattern = regex.compile(
PATTERN_TIME_HHMMSS: Pattern[str] = regex.compile(
r"(0[0-9]|1[0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])"
)

PATTERN_TIME_HHMM_1: Pattern = regex.compile(
PATTERN_TIME_HHMM_1: Pattern[str] = regex.compile(
r"(0[0-9]|1[0-9]|2[0-3]):([0-5][0-9])"
)

PATTERN_TIME_HHMM_2: Pattern = regex.compile(
PATTERN_TIME_HHMM_2: Pattern[str] = regex.compile(
r"(0[0-9]|1[0-9]|2[0-3])([0-5][0-9])"
)

PATTERN_TIME_HH: Pattern = regex.compile(r"(0[0-9]|1[0-9]|2[0-3])([0-5][0-9])")
PATTERN_TIME_HH: Pattern[str] = regex.compile(
r"(0[0-9]|1[0-9]|2[0-3])([0-5][0-9])"
)

PATTERN_TIME_HMM: Pattern = regex.compile(
PATTERN_TIME_HMM: Pattern[str] = regex.compile(
r"([0-9]|1[0-9]|2[0-3]):([0-5][0-9])"
)

Expand All @@ -109,7 +111,7 @@
# Regex for various date formats. See
# https://github.com/alan-turing-institute/CleverCSV/blob/master/notes/date_regex/dateregex_annotated.txt
# for an explanation.
PATTERN_DATE: Pattern = regex.compile(
PATTERN_DATE: Pattern[str] = regex.compile(
r"("
r"(0[1-9]|1[0-2])"
r"("
Expand Down Expand Up @@ -238,7 +240,7 @@
ALPHANUM_SPECIALS: str = regex.escape(r"".join(SPECIALS_ALLOWED))

# Regex for alphanumeric text
PATTERN_ALPHANUM: Pattern = regex.compile(
PATTERN_ALPHANUM: Pattern[str] = regex.compile(
r"("
r"\p{N}?\p{L}+"
r"["
Expand All @@ -254,7 +256,7 @@
r"".join(SPECIALS_ALLOWED) + r"".join(QUOTED_SPECIALS_ALLOWED)
)
# Regex for alphanumeric text in quoted strings
PATTERN_ALPHANUM_QUOTED: Pattern = regex.compile(
PATTERN_ALPHANUM_QUOTED: Pattern[str] = regex.compile(
r"("
r"\p{N}?\p{L}+"
r"["
Expand All @@ -270,21 +272,21 @@
# Regular expression for currency #
###################################

PATTERN_CURRENCY: Pattern = regex.compile(r"\p{Sc}\s?(.*)")
PATTERN_CURRENCY: Pattern[str] = regex.compile(r"\p{Sc}\s?(.*)")

#####################################
# Regular expression for unix paths #
#####################################

PATTERN_UNIX_PATH: Pattern = regex.compile(
PATTERN_UNIX_PATH: Pattern[str] = regex.compile(
r"[~.]?(?:\/[a-zA-Z0-9\.\-\_]+)+\/?"
)

################################################
# Map of regular expresions for type detection #
################################################

DEFAULT_TYPE_REGEXES: Dict[str, Pattern] = {
DEFAULT_TYPE_REGEXES: Dict[str, Pattern[str]] = {
"number_1": PATTERN_NUMBER_1,
"number_2": PATTERN_NUMBER_2,
"number_3": PATTERN_NUMBER_3,
Expand Down
3 changes: 3 additions & 0 deletions clevercsv/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,17 @@
from typing import Any
from typing import Mapping
from typing import Type
from typing import TypeVar
from typing import Union

from clevercsv.dialect import SimpleDialect

AnyPath = Union[str, bytes, "os.PathLike[str]", "os.PathLike[bytes]"]
StrPath = Union[str, "os.PathLike[str]"]
_OpenFile = Union[AnyPath, int]
_DictRow = Mapping[str, Any]
_DialectLike = Union[str, csv.Dialect, Type[csv.Dialect], SimpleDialect]
_T = TypeVar("_T")

if sys.version_info >= (3, 8):
from typing import Dict as _DictReadMapping
Expand Down
8 changes: 5 additions & 3 deletions clevercsv/consistency.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def cached_is_known_type(cell: str, is_quoted: bool) -> bool:
self._cached_is_known_type = cached_is_known_type

def detect(
self, data: str, delimiters: Optional[Iterable[str]] = None
self, data: str, delimiters: Optional[List[str]] = None
) -> Optional[SimpleDialect]:
"""Detect the dialect using the consistency measure
Expand Down Expand Up @@ -192,7 +192,7 @@ def get_best_dialects(
return [d for d, score in scores.items() if score.Q == Qmax]

def compute_type_score(
self, data: str, dialect: SimpleDialect, eps=DEFAULT_EPS_TYPE
self, data: str, dialect: SimpleDialect, eps: float = DEFAULT_EPS_TYPE
) -> float:
"""Compute the type score"""
total = known = 0
Expand All @@ -211,8 +211,10 @@ def detect_dialect_consistency(
delimiters: Optional[Iterable[str]] = None,
skip: bool = True,
verbose: bool = False,
):
) -> Optional[SimpleDialect]:
"""Helper function that wraps ConsistencyDetector"""
# Mostly kept for backwards compatibility
consistency_detector = ConsistencyDetector(skip=skip, verbose=verbose)
if delimiters is not None:
delimiters = list(delimiters)
return consistency_detector.detect(data, delimiters=delimiters)
2 changes: 1 addition & 1 deletion clevercsv/console/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@
from .application import build_application


def main():
def main() -> int:
app = build_application()
return app.run()
4 changes: 2 additions & 2 deletions clevercsv/console/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class CleverCSVApplication(Application):
),
}

def __init__(self):
def __init__(self) -> None:
super().__init__(
"clevercsv",
version=__version__,
Expand All @@ -74,7 +74,7 @@ def __init__(self):
extra_sections=self._extra,
)

def register(self):
def register(self) -> None:
self.add_argument(
"-V",
"--version",
Expand Down
21 changes: 17 additions & 4 deletions clevercsv/console/commands/_utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
# -*- coding: utf-8 -*-

from typing import Any
from typing import List
from typing import Optional

from clevercsv import __version__
from clevercsv.dialect import SimpleDialect


def parse_int(val, name):
def parse_int(val: Any, name: str) -> Optional[int]:
"""Parse a number to an integer if possible"""
if val is None:
return val
Expand All @@ -15,7 +20,13 @@ def parse_int(val, name):
)


def generate_code(filename, dialect, encoding, use_pandas=False):
def generate_code(
filename: str,
dialect: SimpleDialect,
encoding: Optional[str],
use_pandas: bool = False,
) -> List[str]:
assert dialect.quotechar is not None
d = '"\\t"' if dialect.delimiter == "\t" else f'"{dialect.delimiter}"'
q = '"%s"' % (dialect.quotechar.replace('"', '\\"'))
e = repr(f"{dialect.escapechar}").replace("'", '"')
Expand All @@ -26,15 +37,17 @@ def generate_code(filename, dialect, encoding, use_pandas=False):
"import clevercsv",
]
if use_pandas:
return base + [
return [
*base,
"",
f'df = clevercsv.read_dataframe("{filename}", delimiter={d}, '
f"quotechar={q}, escapechar={e})",
"",
]

enc = "None" if encoding is None else f'"{encoding}"'
lines = base + [
lines = [
*base,
"",
f'with open("{filename}", "r", newline="", encoding={enc}) as fp:',
" reader = clevercsv.reader(fp, "
Expand Down
4 changes: 2 additions & 2 deletions clevercsv/console/commands/code.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@ class CodeCommand(Command):
"and copy the generated code to a Python script."
)

def __init__(self):
def __init__(self) -> None:
super().__init__(
name="code",
title="Generate Python code to import a CSV file",
description=self._description,
extra_sections={"CleverCSV": "Part of the CleverCSV suite"},
)

def register(self):
def register(self) -> None:
self.add_argument("path", help="Path to the CSV file")
self.add_argument(
"-e",
Expand Down
6 changes: 3 additions & 3 deletions clevercsv/console/commands/detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@
class DetectCommand(Command):
_description = "Detect the dialect of a CSV file."

def __init__(self):
def __init__(self) -> None:
super().__init__(
name="detect",
title="Detect the dialect of a CSV file",
description=self._description,
extra_sections={"CleverCSV": "Part of the CleverCSV suite"},
)

def register(self):
def register(self) -> None:
self.add_argument("path", help="Path to the CSV file")
self.add_argument(
"-c",
Expand Down Expand Up @@ -100,7 +100,7 @@ def register(self):
help="Add the runtime of the detection to the detection output.",
)

def handle(self):
def handle(self) -> int:
verbose = self.args.verbose
num_chars = parse_int(self.args.num_chars, "num-chars")
method = "consistency" if self.args.consistency else "auto"
Expand Down
4 changes: 2 additions & 2 deletions clevercsv/console/commands/explore.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,15 @@ class ExploreCommand(Command):
"to read the file as a Pandas dataframe."
)

def __init__(self):
def __init__(self) -> None:
super().__init__(
name="explore",
title="Explore the CSV file in an interactive Python shell",
description=self._description,
extra_sections={"CleverCSV": "Part of the CleverCSV suite"},
)

def register(self):
def register(self) -> None:
self.add_argument("path", help="Path to the CSV file")
self.add_argument(
"-e",
Expand Down
Loading

0 comments on commit c600cf6

Please sign in to comment.