Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delagate creation of segmentPath/LoadSpec to DataSegmentPushers and add S3a support #4116

Merged
merged 15 commits into from
Jun 4, 2017
30 changes: 30 additions & 0 deletions api/src/main/java/io/druid/segment/loading/DataSegmentPusher.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,45 @@

package io.druid.segment.loading;

import com.google.common.base.Joiner;
import io.druid.timeline.DataSegment;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.util.Map;

public interface DataSegmentPusher
{
Joiner JOINER = Joiner.on("/").skipNulls();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

static final ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is by definition since it is part of interface


@Deprecated
String getPathForHadoop(String dataSource);
String getPathForHadoop();
DataSegment push(File file, DataSegment segment) throws IOException;
//use map instead of LoadSpec class to avoid dependency pollution.
Map<String, Object> makeLoadSpec(URI finalIndexZipFilePath);
default String getStorageDir(DataSegment dataSegment) {
return getDefaultStorageDir(dataSegment);
}
default String makeIndexPathName(DataSegment dataSegment, String indexName) {
return String.format("./%s/%s", getStorageDir(dataSegment),indexName);
}

// Note: storage directory structure format = .../dataSource/interval/version/partitionNumber/
// If above format is ever changed, make sure to change it appropriately in other places
// e.g. HDFSDataSegmentKiller uses this information to clean the version, interval and dataSource directories
// on segment deletion if segment being deleted was the only segment
static String getDefaultStorageDir(DataSegment segment) {
return JOINER.join(
segment.getDataSource(),
String.format(
"%s_%s",
segment.getInterval().getStart(),
segment.getInterval().getEnd()
),
segment.getVersion(),
segment.getShardSpec().getPartitionNum()
);
}
}

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ druid.processing.numThreads=2

# Hadoop indexing
druid.indexer.task.hadoopWorkingPath=var/druid/hadoop-tmp
druid.indexer.task.defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.3.0"]
druid.indexer.task.defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.7.3"]
2 changes: 1 addition & 1 deletion examples/conf/druid/middleManager/runtime.properties
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ druid.processing.numThreads=2

# Hadoop indexing
druid.indexer.task.hadoopWorkingPath=var/druid/hadoop-tmp
druid.indexer.task.defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.3.0"]
druid.indexer.task.defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.7.3"]
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@
import io.druid.java.util.common.logger.Logger;
import io.druid.segment.SegmentUtils;
import io.druid.segment.loading.DataSegmentPusher;
import io.druid.segment.loading.DataSegmentPusherUtil;
import io.druid.timeline.DataSegment;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Map;
import java.util.concurrent.Callable;
Expand Down Expand Up @@ -85,7 +85,7 @@ public File createSegmentDescriptorFile(final ObjectMapper jsonMapper, final Dat

public Map<String, String> getAzurePaths(final DataSegment segment)
{
final String storageDir = DataSegmentPusherUtil.getStorageDir(segment);
final String storageDir = this.getStorageDir(segment);

return ImmutableMap.of(
"index", String.format("%s/%s", storageDir, AzureStorageDruidModule.INDEX_ZIP_FILE_NAME),
Expand All @@ -109,16 +109,7 @@ public DataSegment uploadDataSegment(

final DataSegment outSegment = segment
.withSize(size)
.withLoadSpec(
ImmutableMap.<String, Object>of(
"type",
AzureStorageDruidModule.SCHEME,
"containerName",
config.getContainer(),
"blobPath",
azurePaths.get("index")
)
)
.withLoadSpec(this.makeLoadSpec(new URI(azurePaths.get("index"))))
.withBinaryVersion(version);

log.info("Deleting file [%s]", compressedSegmentData);
Expand Down Expand Up @@ -174,4 +165,17 @@ public DataSegment call() throws Exception
}
}
}

@Override
public Map<String, Object> makeLoadSpec(URI uri)
{
return ImmutableMap.<String, Object>of(
"type",
AzureStorageDruidModule.SCHEME,
"containerName",
config.getContainer(),
"blobPath",
uri.toString()
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import com.microsoft.azure.storage.StorageException;
import io.druid.jackson.DefaultObjectMapper;
import io.druid.java.util.common.MapUtils;
import io.druid.segment.loading.DataSegmentPusherUtil;
import io.druid.timeline.DataSegment;
import io.druid.timeline.partition.NoneShardSpec;
import org.easymock.EasyMockSupport;
Expand Down Expand Up @@ -112,9 +111,9 @@ public void testPush() throws Exception
@Test
public void getAzurePathsTest()
{
final String storageDir = DataSegmentPusherUtil.getStorageDir(dataSegment);
AzureDataSegmentPusher pusher = new AzureDataSegmentPusher(azureStorage, azureAccountConfig, jsonMapper);

AzureDataSegmentPusher pusher = new AzureDataSegmentPusher(azureStorage, azureAccountConfig, jsonMapper);
final String storageDir = pusher.getStorageDir(dataSegment);
Map<String, String> paths = pusher.getAzurePaths(dataSegment);

assertEquals(String.format("%s/%s", storageDir, AzureStorageDruidModule.INDEX_ZIP_FILE_NAME), paths.get("index"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,13 @@
import io.druid.java.util.common.logger.Logger;
import io.druid.segment.SegmentUtils;
import io.druid.segment.loading.DataSegmentPusher;
import io.druid.segment.loading.DataSegmentPusherUtil;
import io.druid.timeline.DataSegment;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.net.URI;
import java.util.Map;

/**
* Cassandra Segment Pusher
Expand Down Expand Up @@ -77,7 +78,7 @@ public DataSegment push(final File indexFilesDir, DataSegment segment) throws IO
log.info("Writing [%s] to C*", indexFilesDir);
String key = JOINER.join(
config.getKeyspace().isEmpty() ? null : config.getKeyspace(),
DataSegmentPusherUtil.getStorageDir(segment)
this.getStorageDir(segment)
);

// Create index
Expand Down Expand Up @@ -114,4 +115,10 @@ ImmutableMap.<String, Object> of("type", "c*", "key", key)
compressedIndexFile.delete();
return segment;
}

@Override
public Map<String, Object> makeLoadSpec(URI uri)
{
throw new UnsupportedOperationException("not supported");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import java.util.Map;
import java.util.concurrent.Callable;

public class CloudFilesDataSegmentPusher implements DataSegmentPusher
Expand Down Expand Up @@ -75,7 +77,7 @@ public String getPathForHadoop(final String dataSource)
@Override
public DataSegment push(final File indexFilesDir, final DataSegment inSegment) throws IOException
{
final String segmentPath = CloudFilesUtils.buildCloudFilesPath(this.config.getBasePath(), inSegment);
final String segmentPath = CloudFilesUtils.buildCloudFilesPath(this.config.getBasePath(), getStorageDir(inSegment));

File descriptorFile = null;
File zipOutFile = null;
Expand Down Expand Up @@ -112,18 +114,7 @@ public DataSegment call() throws Exception

final DataSegment outSegment = inSegment
.withSize(indexSize)
.withLoadSpec(
ImmutableMap.<String, Object>of(
"type",
CloudFilesStorageDruidModule.SCHEME,
"region",
segmentData.getRegion(),
"container",
segmentData.getContainer(),
"path",
segmentData.getPath()
)
)
.withLoadSpec(makeLoadSpec(new URI(segmentData.getPath())))
.withBinaryVersion(SegmentUtils.getVersionFromDir(indexFilesDir));

return outSegment;
Expand All @@ -146,4 +137,19 @@ public DataSegment call() throws Exception
}
}
}

@Override
public Map<String, Object> makeLoadSpec(URI uri)
{
return ImmutableMap.<String, Object>of(
"type",
CloudFilesStorageDruidModule.SCHEME,
"region",
objectApi.getRegion(),
"container",
objectApi.getContainer(),
"path",
uri.toString()
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@
import com.google.common.base.Predicate;

import io.druid.java.util.common.RetryUtils;
import io.druid.segment.loading.DataSegmentPusherUtil;
import io.druid.timeline.DataSegment;

import java.io.IOException;
import java.util.concurrent.Callable;
Expand Down Expand Up @@ -70,9 +68,4 @@ public static String buildCloudFilesPath(String basePath, final String fileName)
return path;
}

public static String buildCloudFilesPath(String basePath, final DataSegment segment)
{
return buildCloudFilesPath(basePath, DataSegmentPusherUtil.getStorageDir(segment));
}

}