Skip to content

Commit

Permalink
Merge pull request #184 from V-F/feature-supporting_of_different_char…
Browse files Browse the repository at this point in the history
…sets

Feature supporting of different charsets
  • Loading branch information
lesserwhirls committed Mar 1, 2020
2 parents 3bfb098 + 7a1e506 commit 7c76c60
Show file tree
Hide file tree
Showing 7 changed files with 254 additions and 30 deletions.
37 changes: 30 additions & 7 deletions cdm/core/src/main/java/ucar/nc2/internal/iosp/hdf4/H4header.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
Expand Down Expand Up @@ -104,7 +105,29 @@ public static void useHdfEos(boolean val) {
private Map<Short, Vinfo> refnoMap = new HashMap<>();

private MemTracker memTracker;
private PrintWriter debugOut = new PrintWriter(new OutputStreamWriter(System.out, StandardCharsets.UTF_8));
private PrintWriter debugOut;

private final Charset valueCharset;

public H4header() {
valueCharset = StandardCharsets.UTF_8;
debugOut = new PrintWriter(new OutputStreamWriter(System.out, StandardCharsets.UTF_8));
}

H4header(H4iosp h4iosp) {
valueCharset = h4iosp.getValueCharset().orElse(StandardCharsets.UTF_8);
debugOut = new PrintWriter(new OutputStreamWriter(System.out, StandardCharsets.UTF_8));
}

/**
* Return defined {@link Charset value charset} that
* will be used by reading HDF4 header.
*
* @return {@link Charset value charset}
*/
protected Charset getValueCharset() {
return valueCharset;
}

public boolean isEos() {
return isEos;
Expand Down Expand Up @@ -429,11 +452,11 @@ private Attribute makeAttribute(TagVH vh) throws IOException {
case 3:
case 4:
if (nelems == 1)
att = new Attribute(name, raf.readStringMax(size));
att = new Attribute(name, raf.readStringMax(size, valueCharset));
else {
String[] vals = new String[nelems];
for (int i = 0; i < nelems; i++)
vals[i] = raf.readStringMax(size);
vals[i] = raf.readStringMax(size, valueCharset);
att = new Attribute(name, Array.factory(DataType.STRING, new int[] {nelems}, vals));
}
break;
Expand Down Expand Up @@ -1139,7 +1162,7 @@ List<DataChunk> readChunks(NetcdfFile ncfile) throws IOException {

String read() throws IOException {
raf.seek(data.offset);
return raf.readString(data.length);
return raf.readString(data.length, valueCharset);
}

public String toString() {
Expand Down Expand Up @@ -1661,7 +1684,7 @@ private class TagText extends Tag {

protected void read() throws IOException {
raf.seek(offset);
text = raf.readStringMax(length);
text = raf.readStringMax(length, valueCharset);
}

public String detail() {
Expand All @@ -1683,7 +1706,7 @@ protected void read() throws IOException {
raf.seek(offset);
obj_tagno = raf.readShort();
obj_refno = raf.readShort();
text = raf.readStringMax(length - 4).trim();
text = raf.readStringMax(length - 4, valueCharset).trim();
}

public String detail() {
Expand Down Expand Up @@ -1840,7 +1863,7 @@ protected void read(int n) throws IOException {
int start = 0;
for (int i = 0; i < length; i++) {
if (b[i] == 0) {
text[count] = new String(b, start, i - start, StandardCharsets.UTF_8);
text[count] = new String(b, start, i - start, valueCharset);
count++;
if (count == n)
break;
Expand Down
54 changes: 47 additions & 7 deletions cdm/core/src/main/java/ucar/nc2/internal/iosp/hdf4/H4iosp.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.List;
import java.util.Optional;
import ucar.ma2.Array;
import ucar.ma2.ArrayStructure;
import ucar.ma2.ArrayStructureBB;
Expand All @@ -35,13 +37,15 @@
import ucar.nc2.util.IO;
import ucar.unidata.io.PositioningDataInputStream;
import ucar.unidata.io.RandomAccessFile;
import javax.annotation.Nullable;

/** HDF4 iosp */
public class H4iosp extends AbstractIOServiceProvider {
private static org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(H4iosp.class);
private static boolean showLayoutTypes;

private H4header header = new H4header();
private H4header header;
private Charset valueCharset;

@Override
public boolean isValidFile(RandomAccessFile raf) throws IOException {
Expand All @@ -50,8 +54,9 @@ public boolean isValidFile(RandomAccessFile raf) throws IOException {

@Override
public String getFileTypeId() {
if (header.isEos())
if (header != null && header.isEos()) {
return "HDF4-EOS";
}
return DataFormatType.HDF4.getDescription();
}

Expand All @@ -64,11 +69,24 @@ public String getFileTypeDescription() {
public void open(RandomAccessFile raf, NetcdfFile ncfile, CancelTask cancelTask) throws IOException {
super.open(raf, ncfile, cancelTask);
Group.Builder rootGroup = Group.builder(null).setName("").setNcfile(ncfile);
header.read(raf, rootGroup, null);
getHeader().read(raf, rootGroup, null);
ncfile.setRootGroup(rootGroup.build(null));
ncfile.finish();
}

/**
* Return header for reading netcdf file.
* Create it if it's not already created.
*
* @return header for reading HDF4 file.
*/
private H4header getHeader() {
if (header == null) {
header = new H4header(this);
}
return header;
}

@Override
public boolean isBuilder() {
return true;
Expand All @@ -79,7 +97,7 @@ public void build(RandomAccessFile raf, Group.Builder rootGroup, CancelTask canc
super.open(raf, rootGroup.getNcfile(), cancelTask);

raf.order(RandomAccessFile.BIG_ENDIAN);
header = new H4header();
header = new H4header(this);
header.read(raf, rootGroup, null);
}

Expand Down Expand Up @@ -475,14 +493,36 @@ public ByteBuffer getByteBuffer() throws IOException {
@Override
public void reacquire() throws IOException {
super.reacquire();
header.raf = this.raf;
getHeader().raf = this.raf;
}

public Object sendIospMessage(Object message) {
if (message.toString().equals("header"))
return header;
if (message instanceof Charset) {
setValueCharset((Charset) message);
}
if (message.toString().equals("header")) {
return getHeader();
}
return super.sendIospMessage(message);
}

/**
* Return {@link Charset value charset} if it was defined. Definition of charset
* occurs by sending a charset as a message using the {@link #sendIospMessage}
* method.
*
* @return {@link Charset value charset} if it was defined.
*/
protected Optional<Charset> getValueCharset() {
return Optional.ofNullable(valueCharset);
}

/**
* Define {@link Charset value charset}.
*
* @param charset may be null.
*/
protected void setValueCharset(@Nullable Charset charset) {
this.valueCharset = charset;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import java.nio.IntBuffer;
import java.nio.LongBuffer;
import java.nio.ShortBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Formatter;
Expand Down Expand Up @@ -177,10 +178,23 @@ public static boolean isValidFile(RandomAccessFile raf) throws IOException {
private PrintWriter debugOut;
private MemTracker memTracker;

private final Charset valueCharset;

H5headerNew(RandomAccessFile myRaf, Group.Builder root, H5iospNew h5iosp) {
this.raf = myRaf;
this.root = root;
this.h5iosp = h5iosp;
valueCharset = h5iosp.getValueCharset().orElse(StandardCharsets.UTF_8);
}

/**
* Return defined {@link Charset value charset} that
* will be used by reading HDF5 header.
*
* @return {@link Charset charset}
*/
protected Charset getValueCharset() {
return valueCharset;
}

public void read(PrintWriter debugPS) throws IOException {
Expand All @@ -189,7 +203,7 @@ public void read(PrintWriter debugPS) throws IOException {
} else if (debug1 || debugContinueMessage || debugCreationOrder || debugDetail || debugDimensionScales
|| debugGroupBtree || debugHardLink || debugHeap || debugPos || debugReference || debugTracker || debugV
|| debugSoftLink || warnings) {
debugOut = new PrintWriter(new OutputStreamWriter(System.out, StandardCharsets.UTF_8));
debugOut = new PrintWriter(new OutputStreamWriter(System.out));
}
h5objects = new H5objects(this, debugOut, memTracker);

Expand Down Expand Up @@ -1194,7 +1208,7 @@ private String convertString(byte[] b) {
break;
count++;
}
return new String(b, 0, count, StandardCharsets.UTF_8); // all strings are considered to be UTF-8 unicode
return new String(b, 0, count, valueCharset); // all strings are considered to be UTF-8 unicode
}

private String convertString(byte[] b, int start, int len) {
Expand All @@ -1205,8 +1219,8 @@ private String convertString(byte[] b, int start, int len) {
break;
count++;
}
return new String(b, start, count - start, StandardCharsets.UTF_8); // all strings are considered to be UTF-8
// unicode
return new String(b, start, count - start, valueCharset); // all strings are considered to be UTF-8
// unicode
}

protected Array convertEnums(Map<Integer, String> map, DataType dataType, Array values) {
Expand Down Expand Up @@ -2104,7 +2118,7 @@ String readHeapString(long heapIdAddress) throws IOException {
if (ho.dataSize > 1000 * 1000)
return String.format("Bad HeapObject.dataSize=%s", ho);
raf.seek(ho.dataPos);
return raf.readString((int) ho.dataSize);
return raf.readString((int) ho.dataSize, valueCharset);
}

/**
Expand All @@ -2121,7 +2135,7 @@ String readHeapString(ByteBuffer bb, int pos) throws IOException {
if (ho == null)
throw new IllegalStateException("Cant find Heap Object,heapId=" + heapId);
raf.seek(ho.dataPos);
return raf.readString((int) ho.dataSize);
return raf.readString((int) ho.dataSize, valueCharset);
}

Array readHeapVlen(ByteBuffer bb, int pos, DataType dataType, int endian) throws IOException, InvalidRangeException {
Expand Down
32 changes: 32 additions & 0 deletions cdm/core/src/main/java/ucar/nc2/internal/iosp/hdf5/H5iospNew.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.charset.Charset;
import java.util.Optional;
import ucar.ma2.Array;
import ucar.ma2.ArrayStructure;
import ucar.ma2.ArrayStructureBB;
Expand All @@ -33,6 +35,7 @@
import ucar.nc2.time.CalendarDate;
import ucar.nc2.util.CancelTask;
import ucar.unidata.io.RandomAccessFile;
import javax.annotation.Nullable;

/**
* HDF5 I/O
Expand Down Expand Up @@ -105,6 +108,7 @@ public boolean isBuilder() {
private H5headerNew header;
private boolean isEos;
boolean includeOriginalAttributes;
private Charset valueCharset;

@Override
public void build(RandomAccessFile raf, Group.Builder rootGroup, CancelTask cancelTask) throws IOException {
Expand All @@ -126,6 +130,34 @@ public void build(RandomAccessFile raf, Group.Builder rootGroup, CancelTask canc
}
}

@Override
public Object sendIospMessage(Object message) {
if (message instanceof Charset) {
setValueCharset((Charset) message);
}
return super.sendIospMessage(message);
}

/**
* Return {@link Charset value charset} if it was defined. Definition of charset
* occurs by sending a charset as a message using the {@link #sendIospMessage}
* method.
*
* @return {@link Charset value charset} if it was defined.
*/
protected Optional<Charset> getValueCharset() {
return Optional.ofNullable(valueCharset);
}

/**
* Define {@link Charset value charset}.
*
* @param charset may be null.
*/
protected void setValueCharset(@Nullable Charset charset) {
this.valueCharset = charset;
}

@Override
public void open(RandomAccessFile raf, NetcdfFile ncfile, CancelTask cancelTask) throws IOException {
super.open(raf, ncfile, cancelTask);
Expand Down
Loading

0 comments on commit 7c76c60

Please sign in to comment.