Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 153 additions & 0 deletions aliyun/src/main/java/org/apache/iceberg/aliyun/oss/OSSInputStream.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg.aliyun.oss;

import com.aliyun.oss.OSS;
import com.aliyun.oss.model.GetObjectRequest;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import org.apache.iceberg.io.SeekableInputStream;
import org.apache.iceberg.relocated.com.google.common.base.Joiner;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.io.ByteStreams;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class OSSInputStream extends SeekableInputStream {
private static final Logger LOG = LoggerFactory.getLogger(OSSInputStream.class);
private static final int SKIP_SIZE = 1024 * 1024;

private final StackTraceElement[] createStack;
private final OSS client;
private final OSSURI uri;

private InputStream stream = null;
private long pos = 0;
private long next = 0;
private boolean closed = false;

public OSSInputStream(OSS client, OSSURI uri) {
this.client = client;
this.uri = uri;
this.createStack = Thread.currentThread().getStackTrace();
}

@Override
public long getPos() {
return next;
}

@Override
public void seek(long newPos) {
Preconditions.checkState(!closed, "Cannot seek: already closed");
Preconditions.checkArgument(newPos >= 0, "Position is negative: %s", newPos);

// this allows a seek beyond the end of the stream but the next read will fail
next = newPos;
}

@Override
public int read() throws IOException {
Preconditions.checkState(!closed, "Cannot read: already closed");
positionStream();

pos += 1;
next += 1;

return stream.read();
}

@Override
public int read(byte[] b, int off, int len) throws IOException {
Preconditions.checkState(!closed, "Cannot read: already closed");
positionStream();

int bytesRead = stream.read(b, off, len);
pos += bytesRead;
next += bytesRead;

return bytesRead;
}

@Override
public void close() throws IOException {
if (closed) {
return;
}

super.close();
closeStream();
closed = true;
}

private void positionStream() throws IOException {
if ((stream != null) && (next == pos)) {
// already at specified position.
return;
}

if ((stream != null) && (next > pos)) {
// seeking forwards
long skip = next - pos;
if (skip <= Math.max(stream.available(), SKIP_SIZE)) {
// already buffered or seek is small enough
LOG.debug("Read-through seek for {} from {} to offset {}", uri, pos, next);
try {
ByteStreams.skipFully(stream, skip);
pos = next;
return;
} catch (IOException ignored) {
// will retry by re-opening the stream.
}
}
}

// close the stream and open at desired position.
LOG.debug("Seek with new stream for {} to offset {}", uri, next);
pos = next;
openStream();
}

private void openStream() throws IOException {
closeStream();

GetObjectRequest request = new GetObjectRequest(uri.bucket(), uri.key()).withRange(pos, -1);
stream = client.getObject(request).getObjectContent();
}

private void closeStream() throws IOException {
if (stream != null) {
stream.close();
stream = null;
}
}

@SuppressWarnings("checkstyle:NoFinalizer")
@Override
protected void finalize() throws Throwable {
super.finalize();
if (!closed) {
close(); // releasing resources is more important than printing the warning
String trace = Joiner.on("\n\t").join(Arrays.copyOfRange(createStack, 1, createStack.length));
LOG.warn("Unclosed input stream created by: \n\t{}", trace);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg.aliyun.oss;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.Random;
import java.util.concurrent.ThreadLocalRandom;
import org.apache.commons.io.IOUtils;
import org.apache.iceberg.io.SeekableInputStream;
import org.junit.Test;

import static org.apache.iceberg.AssertHelpers.assertThrows;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;

public class TestOSSInputStream extends AliyunOSSTestBase {
private final Random random = ThreadLocalRandom.current();

@Test
public void testRead() throws Exception {
OSSURI uri = new OSSURI(location("read.dat"));
int dataSize = 1024 * 1024 * 10;
byte[] data = randomData(dataSize);

writeOSSData(uri, data);

try (SeekableInputStream in = new OSSInputStream(ossClient().get(), uri)) {
int readSize = 1024;

readAndCheck(in, in.getPos(), readSize, data, false);
readAndCheck(in, in.getPos(), readSize, data, true);

// Seek forward in current stream
int seekSize = 1024;
readAndCheck(in, in.getPos() + seekSize, readSize, data, false);
readAndCheck(in, in.getPos() + seekSize, readSize, data, true);

// Buffered read
readAndCheck(in, in.getPos(), readSize, data, true);
readAndCheck(in, in.getPos(), readSize, data, false);

// Seek with new stream
long seekNewStreamPosition = 2 * 1024 * 1024;
readAndCheck(in, in.getPos() + seekNewStreamPosition, readSize, data, true);
readAndCheck(in, in.getPos() + seekNewStreamPosition, readSize, data, false);

// Backseek and read
readAndCheck(in, 0, readSize, data, true);
readAndCheck(in, 0, readSize, data, false);
}
}

private void readAndCheck(SeekableInputStream in, long rangeStart, int size, byte[] original, boolean buffered)
throws IOException {
in.seek(rangeStart);
assertEquals("Should have the correct position", rangeStart, in.getPos());

long rangeEnd = rangeStart + size;
byte[] actual = new byte[size];

if (buffered) {
IOUtils.readFully(in, actual);
} else {
int read = 0;
while (read < size) {
actual[read++] = (byte) in.read();
}
}

assertEquals("Should have the correct position", rangeEnd, in.getPos());

assertArrayEquals("Should have expected range data",
Arrays.copyOfRange(original, (int) rangeStart, (int) rangeEnd), actual);
}

@Test
public void testClose() throws Exception {
OSSURI uri = new OSSURI(location("closed.dat"));
SeekableInputStream closed = new OSSInputStream(ossClient().get(), uri);
closed.close();
assertThrows("Cannot seek the input stream after closed.", IllegalStateException.class,
"Cannot seek: already closed",
() -> {
closed.seek(0);
return null;
});
}

@Test
public void testSeek() throws Exception {
OSSURI uri = new OSSURI(location("seek.dat"));
byte[] expected = randomData(1024 * 1024);

writeOSSData(uri, expected);

try (SeekableInputStream in = new OSSInputStream(ossClient().get(), uri)) {
in.seek(expected.length / 2);
byte[] actual = new byte[expected.length / 2];
IOUtils.readFully(in, actual);
assertArrayEquals("Should have expected seeking stream",
Arrays.copyOfRange(expected, expected.length / 2, expected.length), actual);
}
}

private byte[] randomData(int size) {
byte[] data = new byte[size];
random.nextBytes(data);
return data;
}

private void writeOSSData(OSSURI uri, byte[] data) {
ossClient().get().putObject(uri.bucket(), uri.key(), new ByteArrayInputStream(data));
}
}