Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -105,5 +105,6 @@ v0.2.6b1:
v0.2.6:
2023-05-24 -- Use token for PyPi in Makefile
v0.2.7b1:
2025-03-01 -- https://github.com/hapi-server/client-python/issues/76
2025-03-01 -- https://github.com/hapi-server/client-python/issues/78
2025-03-01 -- Unrecognized ISO 8601 time format: '00:00:00.Z' https://github.com/hapi-server/client-python/issues/76
2025-03-01 -- 'infer_datetime_format' is deprecated ... https://github.com/hapi-server/client-python/issues/78
2026-04-08 -- Improve NaN handling https://github.com/hapi-server/client-python/issues/88
74 changes: 64 additions & 10 deletions hapiclient/hapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ def hapi(*args, **kwargs):
Cadence = PT1M and request for

start/stop=1999-11-12T00:10:00/stop=1999-11-12T12:09:00

Chunk size is P1D and requested time range < 1/2 of this
=> Default behavior

Expand Down Expand Up @@ -812,13 +812,32 @@ def nhapi(SERVER, DATASET, PARAMETERS, pSTART, pDELTA, i, **opts):
error('Malformed response? Could not read response: {}'.format(urlcsv))
if opts['method'] == '' or opts['method'] == 'pandas':
# Read file into Pandas DataFrame
csv_kwargs = {
'sep': ',',
'header': None,
'encoding': 'utf-8',
'skipinitialspace': True,
'keep_default_na': False,
'na_values': ['NaN', 'nan', 'Nan', 'naN', ' "NaN"', ' "nan"', ' "Nan"', ' "naN"', '"NaN"', '"nan"', '"Nan"', '"naN"']
}
"""
Note that this does not handle trailing whitespace after
any of the na_values. (There is no skiptrailingspace option).
Stripping trailing whitespace would require adding something
like
def strip_field(x):
# Strip whitespace and normalize NaN values.
x = x.strip()
return np.nan if x.lower() == "nan" else x

ncols = cols[-1][1] + 1
csv_kwargs["converters"] = {i: strip_field for i in range(ncols)}
"""
try:
df = pandas.read_csv(fnamecsv,
sep=',',
header=None,
encoding='utf-8')
df = pandas.read_csv(fnamecsv, **csv_kwargs)
except:
error('Malformed response? Could not read response: {}'.format(urlcsv))

# Allocate output N-D array (It is not possible to pass dtype=dt
# as computed to pandas.read_csv; pandas dtype is different
# from numpy's dtype.)
Expand Down Expand Up @@ -865,7 +884,14 @@ def nhapi(SERVER, DATASET, PARAMETERS, pSTART, pDELTA, i, **opts):
pickle.dump(meta, f, protocol=2)

log('Writing %s' % fnamenpy, opts)
np.save(fnamenpy, data)
with warnings.catch_warnings():
# Ignore warning that occurs when saving Unicode data.
warnings.filterwarnings("ignore",
message=r"Stored array in format 3\.0.*",
category=UserWarning,
module=r"numpy\.lib\.format",
)
np.save(fnamenpy, data)

meta['x_totalTime'] = time.time() - tic_totalTime

Expand Down Expand Up @@ -963,9 +989,28 @@ def parse_missing_length(fnamecsv, dt, cols, psizes, pnames, ptypes, opts):
if opts['method'] == 'numpy' or opts['method'] == 'numpynolength':
# If requested method was numpy, use numpynolength method.

# With dtype='None', the data type is determined automatically
table = np.genfromtxt(fnamecsv, dtype=None, deletechars='',
delimiter=',', encoding='utf-8')
ncols = cols[-1][1] + 1

def normalize_field(value):
if isinstance(value, bytes):
value = value.decode('utf-8')
value = value.strip()
if len(value) >= 2 and value[0] == '"' and value[-1] == '"':
value = value[1:-1].strip()
if value.lower() == 'nan':
return 'nan'
return value

converters = {i: normalize_field for i in range(ncols)}

table = np.genfromtxt(fnamecsv,
dtype=None,
deletechars='',
replace_space=' ',
delimiter=',',
encoding='utf-8',
converters=converters)

# table is a 1-D array. Each element is a row in the file.
# - If the data types are not the same for each column,
# the elements are tuples with length equal to the number
Expand Down Expand Up @@ -1010,8 +1055,17 @@ def parse_missing_length(fnamecsv, dt, cols, psizes, pnames, ptypes, opts):
if opts['method'] == '' or opts['method'] == 'pandas' or opts['method'] == 'pandasnolength':
# If requested method was pandas, use pandasnolength method.

# TODO: Duplicate code.
# Read file into Pandas DataFrame
df = pandas.read_csv(fnamecsv, sep=',', header=None, encoding='utf-8')
csv_kwargs = {
'sep': ',',
'header': None,
'encoding': 'utf-8',
'skipinitialspace': True,
'keep_default_na': False,
'na_values': ['NaN', 'nan', 'Nan', 'naN', ' "NaN"', ' "nan"', ' "Nan"', ' "naN"', '"NaN"', '"nan"', '"Nan"', '"naN"']
}
df = pandas.read_csv(fnamecsv, **csv_kwargs)

# Allocate output N-D array (It is not possible to pass dtype=dt
# as computed to pandas.read_csv, so need to create new ND array.)
Expand Down
132 changes: 51 additions & 81 deletions hapiclient/test/compare.log
Original file line number Diff line number Diff line change
Expand Up @@ -3,118 +3,88 @@ Dataset = dataset1; Parameter(s) = scalar; run = short. cache = False; usecache
___________________________________________________________
Method total d/l->buff parse buff
___________________________________________________________
binary 0.1405 0.1138 0.0001
csv; pandas 0.1644 0.1104 0.0393
csv; pandas; no len. 0.1285 0.1107 0.0026
csv; numpy 0.1448 0.1293 0.0006
csv; numpy; no len. 0.1528 0.1366 0.0005
binary 0.1548 0.1229 0.0004
csv; pandas 0.2000 0.1644 0.0166
csv; pandas; no len. 0.1888 0.1641 0.0061
csv; numpy 0.1809 0.1590 0.0017
csv; numpy; no len. 0.1929 0.1725 0.0020

Dataset = dataset1; Parameter(s) = scalar,vector; run = short. cache = False; usecache = False
___________________________________________________________
Method total d/l->buff parse buff
___________________________________________________________
binary 0.1406 0.1120 0.0001
csv; pandas 0.1477 0.1280 0.0042
csv; pandas; no len. 0.1406 0.1207 0.0032
csv; numpy 0.1837 0.1678 0.0007
csv; numpy; no len. 0.1488 0.1316 0.0008
binary 0.1703 0.1293 0.0002
csv; pandas 0.1936 0.1694 0.0066
csv; pandas; no len. 0.1802 0.1545 0.0057
csv; numpy 0.1839 0.1629 0.0012
csv; numpy; no len. 0.1489 0.1258 0.0016

Dataset = dataset1; Parameter(s) = ; run = short. cache = False; usecache = False
___________________________________________________________
Method total d/l->buff parse buff
___________________________________________________________
binary 0.2053 0.1687 0.0004
csv; pandas 0.1406 0.1138 0.0089
csv; pandas; no len. 0.1630 0.1371 0.0081
csv; numpy 0.1675 0.1461 0.0042
csv; numpy; no len. 0.1487 0.1238 0.0069
binary 0.2016 0.1568 0.0013
csv; pandas 0.2054 0.1538 0.0274
csv; pandas; no len. 0.1879 0.1448 0.0257
csv; numpy 0.1544 0.1323 0.0063
csv; numpy; no len. 0.1882 0.1580 0.0101

Dataset = dataset1; Parameter(s) = scalar; run = short. cache = True; usecache = False
_____________________________________________________________
Method total d/l->file read & parse file
_____________________________________________________________
binary 0.2047 0.1489 0.0109
csv; pandas 0.1789 0.1320 0.0142
csv; pandas; no len. 0.2006 0.1373 0.0066
csv; numpy 0.1893 0.1298 0.0045
csv; numpy; no len. 0.1836 0.1260 0.0053
binary 0.1788 0.1163 0.0110
csv; pandas 0.1874 0.0966 0.0156
csv; pandas; no len. 0.1971 0.1236 0.0097
csv; numpy 0.1892 0.1209 0.0061
csv; numpy; no len. 0.1884 0.1235 0.0051

Dataset = dataset1; Parameter(s) = scalar,vector; run = short. cache = True; usecache = False
_____________________________________________________________
Method total d/l->file read & parse file
_____________________________________________________________
binary 0.1687 0.1163 0.0117
csv; pandas 0.1722 0.1198 0.0143
csv; pandas; no len. 0.2083 0.1543 0.0073
csv; numpy 0.1875 0.1297 0.0057
csv; numpy; no len. 0.1960 0.1360 0.0058
binary 0.2295 0.1647 0.0102
csv; pandas 0.1900 0.0980 0.0170
csv; pandas; no len. 0.2089 0.1368 0.0097
csv; numpy 0.1739 0.1215 0.0071
csv; numpy; no len. 0.1863 0.1193 0.0058

Dataset = dataset1; Parameter(s) = ; run = short. cache = True; usecache = False
_____________________________________________________________
Method total d/l->file read & parse file
_____________________________________________________________
binary 0.1738 0.1211 0.0087
csv; pandas 0.1728 0.1248 0.0148
csv; pandas; no len. 0.2127 0.1561 0.0131
csv; numpy 0.1439 0.0850 0.0103
csv; numpy; no len. 0.1147 0.0782 0.0110
binary 0.2398 0.1726 0.0102
csv; pandas 0.2350 0.1312 0.0378
csv; pandas; no len. 0.2157 0.1226 0.0293
csv; numpy 0.2084 0.1470 0.0114
csv; numpy; no len. 0.1379 0.0799 0.0135

Dataset = dataset1-Aα☃; Parameter(s) = Time; run = short. cache = False; usecache = False
Dataset = dataset1; Parameter(s) = scalar,vector,spectra; run = long. cache = False; usecache = False
___________________________________________________________
Method total d/l->buff parse buff
___________________________________________________________
binary 0.1639 0.1324 0.0001
csv; pandas 0.1486 0.1280 0.0027
csv; pandas; no len. 0.1335 0.1132 0.0027
csv; numpy 0.1447 0.1268 0.0004
csv; numpy; no len. 0.1046 0.0866 0.0006
binary 4.1120 4.0676 0.0107
csv; pandas 1.6467 1.4163 0.2102
csv; pandas; no len. 1.8892 1.6304 0.2431
csv; numpy 2.2088 1.5670 0.6262
csv; numpy; no len. 3.6609 1.4510 2.1950

Dataset = dataset1-Aα☃; Parameter(s) = unicodescalar-1-byte (A); run = short. cache = False; usecache = False
___________________________________________________________
Method total d/l->buff parse buff
___________________________________________________________
binary 0.1387 0.1048 0.0001
csv; pandas 0.0882 0.0660 0.0017
csv; pandas; no len. 0.0993 0.0777 0.0022
csv; numpy 0.1299 0.1115 0.0003
csv; numpy; no len. 0.1212 0.1044 0.0007

Dataset = dataset1-Aα☃; Parameter(s) = unicodescalar-2-byte (α); run = short. cache = False; usecache = False
___________________________________________________________
Method total d/l->buff parse buff
___________________________________________________________
binary 0.1160 0.0692 0.0001
csv; pandas 0.1288 0.1082 0.0021
csv; pandas; no len. 0.1348 0.1153 0.0021
csv; numpy 0.1344 0.1175 0.0003
csv; numpy; no len. 0.2600 0.2430 0.0007

Dataset = dataset1-Aα☃; Parameter(s) = unicodescalar-3-byte (☃); run = short. cache = False; usecache = False
___________________________________________________________
Method total d/l->buff parse buff
___________________________________________________________
binary 0.1149 0.0827 0.0001
csv; pandas 0.1327 0.1142 0.0021
csv; pandas; no len. 0.0873 0.0678 0.0020
csv; numpy 0.1518 0.1332 0.0005
csv; numpy; no len. 0.0924 0.0728 0.0008

Dataset = dataset1-Aα☃; Parameter(s) = unicodescalar-4-byte (👍); run = short. cache = False; usecache = False
___________________________________________________________
Method total d/l->buff parse buff
___________________________________________________________
binary 0.1154 0.0808 0.0001
csv; pandas 0.0945 0.0724 0.0022
csv; pandas; no len. 0.1325 0.1134 0.0019
csv; numpy 0.1116 0.0938 0.0003
csv; numpy; no len. 0.1330 0.1148 0.0008
Dataset = dataset1; Parameter(s) = scalar,vector,spectra; run = long. cache = True; usecache = False
_____________________________________________________________
Method total d/l->file read & parse file
_____________________________________________________________
binary 3.6810 3.6196 0.0259
csv; pandas 1.8802 1.6261 0.2286
csv; pandas; no len. 1.6921 1.4040 0.2675
csv; numpy 2.3263 1.6836 0.6233
csv; numpy; no len. 3.5179 1.3522 2.1474

Dataset = dataset1-Aα☃; Parameter(s) = unicodevector (A;α;☃;👍); run = short. cache = False; usecache = False
Dataset = dataset1; Parameter(s) = scalar,vector,spectra; run = long. cache = False; usecache = True
___________________________________________________________
Method total d/l->buff parse buff
___________________________________________________________
binary 0.1416 0.1073 0.0002
csv; pandas 0.1306 0.1102 0.0027
csv; pandas; no len. 0.0904 0.0697 0.0024
csv; numpy 0.0921 0.0738 0.0005
csv; numpy; no len. 0.0990 0.0811 0.0008
binary 0.0192 1.3522 2.1474
csv; pandas 0.0019 1.3522 2.1474
csv; pandas; no len. 0.0023 1.3522 2.1474
csv; numpy 0.0021 1.3522 2.1474
csv; numpy; no len. 0.0019 1.3522 2.1474
18 changes: 15 additions & 3 deletions hapiclient/test/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from hapiclient import hapi

debug = False
debug = True

def comparisonOK(a, b, nolength=False, a_name="First", b_name="Second"):

Expand Down Expand Up @@ -56,9 +56,21 @@ def comparisonOK(a, b, nolength=False, a_name="First", b_name="Second"):
def equal(a, b):
allequal = True
for name in a.dtype.names:
if not np.array_equal(a[name], b[name]):
if np.issubdtype(a[name].dtype, np.double) or np.issubdtype(a[name].dtype, np.floating):
try:
np.testing.assert_array_equal(a[name], b[name])
except AssertionError:
allequal = False
# nan equalities only supported in assert_array_equal before NumPy 1.19.
#ok = np.array_equal(a[name], b[name], equal_nan=True)
else:
ok = np.array_equal(a[name], b[name])
if not ok:
allequal = False
if debug: print(name + ' values differ.')
if debug:
print(name + ' values differ.')
print(a[name])
print(b[name])

return allequal

Expand Down
Loading