Skip to content

Commit

Permalink
Merge pull request #2818 from ericpre/fix_encoding_JEOL_reader
Browse files Browse the repository at this point in the history
Fix encoding jeol reader
  • Loading branch information
jlaehne committed Oct 4, 2021
2 parents a030666 + d497b80 commit bcdb232
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 33 deletions.
52 changes: 31 additions & 21 deletions hyperspy/io_plugins/jeol.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def read_img(filename, scale=None, **kwargs):
file_magic = np.fromfile(fd, "<I", 1)[0]
if file_magic == 52:
# fileformat
_ = fd.read(32).rstrip(b"\x00").decode("utf-8")
_ = decode(fd.read(32).rstrip(b"\x00"))
head_pos, head_len, data_pos = np.fromfile(fd, "<I", 3)
fd.seek(data_pos + 12)
header_long = parsejeol(fd)
Expand Down Expand Up @@ -197,12 +197,12 @@ def check_multiple(factor, number, string):

if file_magic == 304:
# fileformat
_ = fd.read(8).rstrip(b"\x00").decode("utf-8")
_ = decode(fd.read(8).rstrip(b"\x00"))
a, b, head_pos, head_len, data_pos, data_len = np.fromfile(fd, "<I", 6)
# groupname
_ = fd.read(128).rstrip(b"\x00").decode("utf-8")
_ = decode(fd.read(128).rstrip(b"\x00"))
# memo
_ = fd.read(132).rstrip(b"\x00").decode("utf-8")
_ = decode(fd.read(132).rstrip(b"\x00"))
datefile = datetime(1899, 12, 30) + timedelta(days=np.fromfile(fd, "d", 1)[0])
fd.seek(head_pos + 12)
header = parsejeol(fd)
Expand Down Expand Up @@ -378,9 +378,9 @@ def parsejeol(fd):
elif (
kwrd[-1] == 222
): # remove undecodable byte at the end of first ScanSize variable
kwrd = kwrd[:-1].decode("utf-8")
kwrd = decode(kwrd[:-1])
else:
kwrd = kwrd.decode("utf-8")
kwrd = decode(kwrd)
val_type, val_len = np.fromfile(fd, "<i", 2)
tmp_list.append(kwrd)
if val_type == 0:
Expand All @@ -390,7 +390,7 @@ def parsejeol(fd):
arr_len = val_len // np.dtype(c_type).itemsize
if c_type == "c":
value = fd.read(val_len).rstrip(b"\x00")
value = value.decode("utf-8").split("\x00")
value = decode(value).split("\x00")
# value = os.path.normpath(value.replace('\\','/')).split('\x00')
else:
value = np.fromfile(fd, c_type, arr_len)
Expand Down Expand Up @@ -494,8 +494,8 @@ def read_eds(filename, **kwargs):
header["filedate"] = datetime(1899, 12, 30) + timedelta(
days=np.fromfile(fd, "<d", 1)[0]
)
header["sp_name"] = fd.read(80).rstrip(b"\x00").decode("utf-8")
header["username"] = fd.read(32).rstrip(b"\x00").decode("utf-8")
header["sp_name"] = decode(fd.read(80).rstrip(b"\x00"))
header["username"] = decode(fd.read(32).rstrip(b"\x00"))

np.fromfile(fd, "<i", 1) # 1
header["arr"] = np.fromfile(fd, "<d", 10)
Expand All @@ -520,10 +520,10 @@ def read_eds(filename, **kwargs):
np.fromfile(fd, "<d", 1)[0]
header["CoefA"] = np.fromfile(fd, "<d", 1)[0]
header["CoefB"] = np.fromfile(fd, "<d", 1)[0]
header["State"] = fd.read(32).rstrip(b"\x00").decode("utf-8")
header["State"] = decode(fd.read(32).rstrip(b"\x00"))
np.fromfile(fd, "<i", 1)[0]
np.fromfile(fd, "<d", 1)[0]
header["Tpl"] = fd.read(32).rstrip(b"\x00").decode("utf-8")
header["Tpl"] = decode(fd.read(32).rstrip(b"\x00"))
header["NumCH"] = np.fromfile(fd, "<i", 1)[0]
data = np.fromfile(fd, "<i", header["NumCH"])

Expand All @@ -548,7 +548,7 @@ def read_eds(filename, **kwargs):
# unknown
_ = np.fromfile(fd, "<b", 14)
energy, unknow1, unknow2, unknow3 = np.fromfile(fd, "<d", 4)
elem_name = fd.read(32).rstrip(b"\x00").decode("utf-8")
elem_name = decode(fd.read(32).rstrip(b"\x00"))
# mark3?
_ = np.fromfile(fd, "<i", 1)[0]
n_line = np.fromfile(fd, "<i", 1)[0]
Expand All @@ -559,9 +559,9 @@ def read_eds(filename, **kwargs):
e_line = np.fromfile(fd, "<d", 1)[0]
z = np.fromfile(fd, "<H", 1)[0]
e_length = np.fromfile(fd, "<b", 1)[0]
e_name = fd.read(e_length).rstrip(b"\x00").decode("utf-8")
e_name = decode(fd.read(e_length).rstrip(b"\x00"))
l_length = np.fromfile(fd, "<b", 1)[0]
l_name = fd.read(l_length).rstrip(b"\x00").decode("utf-8")
l_name = decode(fd.read(l_length).rstrip(b"\x00"))
detect = np.fromfile(fd, "<i", 1)[0]
lines[e_name + "_" + l_name] = {
"energy": e_line,
Expand Down Expand Up @@ -596,7 +596,7 @@ def read_eds(filename, **kwargs):
mass1 = np.fromfile(fd, "<d", 1)[0]
error = np.fromfile(fd, "<d", 1)[0]
atom = np.fromfile(fd, "<d", 1)[0]
ox_name = fd.read(16).rstrip(b"\x00").decode("utf-8")
ox_name = decode(fd.read(16).rstrip(b"\x00"))
mass2 = np.fromfile(fd, "<d", 1)[0]
# K
_ = np.fromfile(fd, "<d", 1)[0]
Expand All @@ -621,9 +621,9 @@ def read_eds(filename, **kwargs):
e = np.fromfile(fd, "<i", 1)
if e == 5:
footer["Parameters"] = {
"DetT": fd.read(16).rstrip(b"\x00").decode("utf-8"),
"SEM": fd.read(16).rstrip(b"\x00").decode("utf-8"),
"Port": fd.read(16).rstrip(b"\x00").decode("utf-8"),
"DetT": decode(fd.read(16).rstrip(b"\x00")),
"SEM": decode(fd.read(16).rstrip(b"\x00")),
"Port": decode(fd.read(16).rstrip(b"\x00")),
"AccKV": np.fromfile(fd, "<d", 1)[0],
"AccNA": np.fromfile(fd, "<d", 1)[0],
"skip": np.fromfile(fd, "<b", 38),
Expand All @@ -645,10 +645,10 @@ def read_eds(filename, **kwargs):
"XtalAng": np.fromfile(fd, "d", 1)[0],
"ElevAng": np.fromfile(fd, "d", 1)[0],
"ValidSize": np.fromfile(fd, "d", 1)[0],
"WinCMat": fd.read(4).rstrip(b"\x00").decode("utf-8"),
"WinCMat": decode(fd.read(4).rstrip(b"\x00")),
"WinCZ": np.fromfile(fd, "<H", 1)[0],
"WinCThic": np.fromfile(fd, "d", 1)[0],
"WinChem": fd.read(16).rstrip(b"\x00").decode("utf-8"),
"WinChem": decode(fd.read(16).rstrip(b"\x00")),
"WinChem_nelem": np.fromfile(fd, "<H", 1)[0],
"WinChem_Z1": np.fromfile(fd, "<H", 1)[0],
"WinChem_Z2": np.fromfile(fd, "<H", 1)[0],
Expand All @@ -662,7 +662,7 @@ def read_eds(filename, **kwargs):
"WinChem_m5": np.fromfile(fd, "d", 1)[0],
"WinThic": np.fromfile(fd, "d", 1)[0],
"WinDens": np.fromfile(fd, "d", 1)[0],
"SpatMat": fd.read(4).rstrip(b"\x00").decode("utf-8"),
"SpatMat": decode(fd.read(4).rstrip(b"\x00")),
"SpatZ": np.fromfile(fd, "<H", 1)[0],
"SpatThic": np.fromfile(fd, "d", 1)[0],
"SiDead": np.fromfile(fd, "d", 1)[0],
Expand Down Expand Up @@ -727,3 +727,13 @@ def read_eds(filename, **kwargs):
"map": read_img,
"pts": read_pts,
"eds": read_eds}


def decode(bytes_string):
try:
string = bytes_string.decode("utf-8")
except:
# See https://github.com/hyperspy/hyperspy/issues/2812
string = bytes_string.decode("shift_jis")

return string
Binary file added hyperspy/tests/io/JEOL_files/181019-BN.ASW
Binary file not shown.
41 changes: 29 additions & 12 deletions hyperspy/tests/io/test_jeol.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,15 @@
# You should have received a copy of the GNU General Public License
# along with HyperSpy. If not, see <http://www.gnu.org/licenses/>.

import os

from pathlib import Path
import pytest
import numpy as np

import hyperspy.api as hs

my_path = os.path.dirname(__file__)

TESTS_FILE_PATH = Path(__file__).resolve().parent / 'JEOL_files'


test_files = ['rawdata.ASW',
'View000_0000000.img',
Expand All @@ -38,7 +39,7 @@

def test_load_project():
# test load all elements of the project rawdata.ASW
filename = os.path.join(my_path, 'JEOL_files', test_files[0])
filename = TESTS_FILE_PATH / test_files[0]
s = hs.load(filename)
# first file is always a 16bit image of the work area
assert s[0].data.dtype == np.uint8
Expand Down Expand Up @@ -74,7 +75,8 @@ def test_load_project():

def test_load_image():
# test load work area haadf image
filename = os.path.join(my_path, 'JEOL_files', 'Sample', '00_View000', test_files[1])
filename = TESTS_FILE_PATH / 'Sample' / '00_View000' / test_files[1]
print(filename)
s = hs.load(filename)
assert s.data.dtype == np.uint8
assert s.data.shape == (512, 512)
Expand All @@ -90,7 +92,7 @@ def test_load_image():
@pytest.mark.parametrize('SI_dtype', [np.int8, np.uint8])
def test_load_datacube(SI_dtype):
# test load eds datacube
filename = os.path.join(my_path, 'JEOL_files', 'Sample', '00_View000', test_files[-1])
filename = TESTS_FILE_PATH / 'Sample' / '00_View000' / test_files[7]
s = hs.load(filename, SI_dtype=SI_dtype)
assert s.data.dtype == SI_dtype
assert s.data.shape == (512, 512, 4096)
Expand All @@ -109,7 +111,7 @@ def test_load_datacube(SI_dtype):


def test_load_datacube_rebin_energy():
filename = os.path.join(my_path, 'JEOL_files', 'Sample', '00_View000', test_files[-1])
filename = TESTS_FILE_PATH / 'Sample' / '00_View000' / test_files[7]
s = hs.load(filename)
s_sum = s.sum()

Expand All @@ -132,7 +134,7 @@ def test_load_datacube_rebin_energy():

def test_load_datacube_cutoff_at_kV():
cutoff_at_kV = 10.
filename = os.path.join(my_path, 'JEOL_files', 'Sample', '00_View000', test_files[-1])
filename = TESTS_FILE_PATH / 'Sample' / '00_View000' / test_files[7]
s = hs.load(filename, cutoff_at_kV=None)
s2 = hs.load(filename, cutoff_at_kV=cutoff_at_kV)

Expand All @@ -145,7 +147,7 @@ def test_load_datacube_cutoff_at_kV():

def test_load_datacube_downsample():
downsample = 8
filename = os.path.join(my_path, 'JEOL_files', test_files[0])
filename = TESTS_FILE_PATH / test_files[0]
s = hs.load(filename, downsample=1)[-1]
s2 = hs.load(filename, downsample=downsample)[-1]

Expand Down Expand Up @@ -180,7 +182,7 @@ def test_load_datacube_downsample():

def test_load_datacube_frames():
rebin_energy = 2048
filename = os.path.join(my_path, 'JEOL_files', 'Sample', '00_View000', test_files[-1])
filename = TESTS_FILE_PATH / 'Sample' / '00_View000' / test_files[7]
s = hs.load(filename, sum_frames=True, rebin_energy=rebin_energy)
assert s.data.shape == (512, 512, 2)
s_frame = hs.load(filename, sum_frames=False, rebin_energy=rebin_energy)
Expand All @@ -192,8 +194,11 @@ def test_load_datacube_frames():
22141, 22024, 22086, 21797]))


def test_load_eds_file():
filename = os.path.join(my_path, 'JEOL_files', 'met03.EDS')
@pytest.mark.parametrize('filename_as_string', [True, False])
def test_load_eds_file(filename_as_string):
filename = TESTS_FILE_PATH / 'met03.EDS'
if filename_as_string:
filename = str(filename)
s = hs.load(filename)
assert isinstance(s, hs.signals.EDSTEMSpectrum)
assert s.data.shape == (2048,)
Expand All @@ -217,3 +222,15 @@ def test_load_eds_file():
'live_time': 30.0}},
'Stage': {'tilt_alpha': 0.0}}


def test_shift_jis_encoding():
# See https://github.com/hyperspy/hyperspy/issues/2812
filename = TESTS_FILE_PATH / '181019-BN.ASW'
# make sure we can open the file
with open(filename, "br"):
pass
try:
_ = hs.load(filename)
except FileNotFoundError:
# we don't have the other files required to open the data
pass

0 comments on commit bcdb232

Please sign in to comment.