Skip to content
Permalink
Browse files
Create Reference interface for new GC classes (#2767)
* Create ReferenceFile class to implement Reference interface
* Make ReferenceDirectory extend ReferenceFile
* Create AllVolumesDiretory class to extend ReferenceFile and move GcVolumeUtil method to class
* Comment and clean up GC code
* Update MetadataSchema.isValidDirCol regex to be more strict
* Make TableGroupWatcher use ReferenceFile for calls to GC
* Updates to various relevant tests
  • Loading branch information
milleruntime committed Jun 23, 2022
1 parent ad686b9 commit afe78566f85f9e9fd5b43165305bc70cd3a9c878
Showing 25 changed files with 352 additions and 187 deletions.
@@ -39,6 +39,7 @@
import org.apache.accumulo.core.crypto.CryptoServiceFactory;
import org.apache.accumulo.core.crypto.CryptoServiceFactory.ClassloaderType;
import org.apache.accumulo.core.file.FileOperations;
import org.apache.accumulo.core.metadata.ValidationUtil;
import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
import org.apache.accumulo.core.spi.crypto.CryptoService;
import org.apache.hadoop.fs.FSDataOutputStream;
@@ -131,11 +132,7 @@ public WriterOptions withFileSystem(FileSystem fs) {

@Override
public WriterFSOptions to(String filename) {
Objects.requireNonNull(filename);
if (!filename.endsWith(".rf")) {
throw new IllegalArgumentException(
"Provided filename (" + filename + ") does not end with '.rf'");
}
ValidationUtil.validateRFileName(filename);
this.out = new OutputArgs(filename);
return this;
}
@@ -19,50 +19,29 @@
package org.apache.accumulo.core.gc;

import org.apache.accumulo.core.data.TableId;
import org.apache.accumulo.core.metadata.schema.MetadataSchema;

/**
* A GC reference to a tablet file or directory.
* A GC reference used for collecting files and directories into a single stream. The GC deals with
* two inputs conceptually: candidates and references. Candidates are files that could be possibly
* be deleted if they are not defeated by a reference.
*/
public class Reference implements Comparable<Reference> {
// parts of an absolute URI, like "hdfs://1.2.3.4/accumulo/tables/2a/t-0003"
public final TableId tableId; // 2a
public interface Reference {
/**
* Only return true if the reference is a directory.
*/
boolean isDirectory();

// the exact string that is stored in the metadata
public final String metadataEntry;
/**
* Get the {@link TableId} of the reference.
*/
TableId getTableId();

public Reference(TableId tableId, String metadataEntry) {
MetadataSchema.TabletsSection.ServerColumnFamily.validateDirCol(tableId.canonical());
this.tableId = tableId;
this.metadataEntry = metadataEntry;
}

@Override
public int compareTo(Reference that) {
if (equals(that)) {
return 0;
} else {
return this.metadataEntry.compareTo(that.metadataEntry);
}
}

@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
Reference other = (Reference) obj;
if (metadataEntry == null) {
return other.metadataEntry == null;
} else
return metadataEntry.equals(other.metadataEntry);
}

@Override
public int hashCode() {
return this.metadataEntry.hashCode();
}
/**
* Get the exact string stored in the metadata table for this file or directory. A file will be
* read from the Tablet "file" column family:
* {@link org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.DataFileColumnFamily}
* A directory will be read from the "srv:dir" column family:
* {@link org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily}
*/
String getMetadataEntry();
}
@@ -19,15 +19,38 @@
package org.apache.accumulo.core.gc;

import org.apache.accumulo.core.data.TableId;
import org.apache.accumulo.core.metadata.schema.MetadataSchema;

/**
* Part of the Tablet File path that is definitely a directory.
* A GC reference to a Tablet directory, like t-0003.
*/
public class ReferenceDirectory extends Reference {
public final String tabletDir; // t-0003
public class ReferenceDirectory extends ReferenceFile {
private final String tabletDir; // t-0003

public ReferenceDirectory(TableId tableId, String dirName) {
super(tableId, dirName);
MetadataSchema.TabletsSection.ServerColumnFamily.validateDirCol(dirName);
this.tabletDir = dirName;
}

@Override
public boolean isDirectory() {
return true;
}

public String getTabletDir() {
return tabletDir;
}

/**
* A Tablet directory should have a metadata entry equal to the dirName.
*/
@Override
public String getMetadataEntry() {
if (!tabletDir.equals(metadataEntry)) {
throw new IllegalStateException(
"Tablet dir " + tabletDir + " is not equal to metadataEntry: " + metadataEntry);
}
return metadataEntry;
}
}
@@ -0,0 +1,81 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.accumulo.core.gc;

import java.util.Objects;

import org.apache.accumulo.core.data.TableId;

/**
* A GC reference used for streaming and delete markers. This type is a file. Subclass is a
* directory.
*/
public class ReferenceFile implements Reference, Comparable<ReferenceFile> {
// parts of an absolute URI, like "hdfs://1.2.3.4/accumulo/tables/2a/t-0003"
public final TableId tableId; // 2a

// the exact string that is stored in the metadata
protected final String metadataEntry;

public ReferenceFile(TableId tableId, String metadataEntry) {
this.tableId = Objects.requireNonNull(tableId);
this.metadataEntry = Objects.requireNonNull(metadataEntry);
}

@Override
public boolean isDirectory() {
return false;
}

@Override
public TableId getTableId() {
return tableId;
}

@Override
public String getMetadataEntry() {
return metadataEntry;
}

@Override
public int compareTo(ReferenceFile that) {
if (equals(that)) {
return 0;
} else {
return this.metadataEntry.compareTo(that.metadataEntry);
}
}

@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
ReferenceFile other = (ReferenceFile) obj;
return metadataEntry.equals(other.metadataEntry);
}

@Override
public int hashCode() {
return this.metadataEntry.hashCode();
}
}
@@ -23,9 +23,10 @@
import java.util.Objects;

import org.apache.accumulo.core.data.TableId;
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Preconditions;

@@ -46,17 +47,20 @@ public class TabletFile implements Comparable<TabletFile> {
protected final Path metaPath;
private final String normalizedPath;

private static final Logger log = LoggerFactory.getLogger(TabletFile.class);

/**
* Construct new tablet file using a Path. Used in the case where we had to use Path object to
* qualify an absolute path or create a new file.
*/
public TabletFile(Path metaPath) {
this.metaPath = Objects.requireNonNull(metaPath);
String errorMsg = "Missing or invalid part of tablet file metadata entry: " + metaPath;
log.debug("Parsing TabletFile from {}", metaPath);

// use Path object to step backwards from the filename through all the parts
this.fileName = metaPath.getName();
ServerColumnFamily.validateDirCol(fileName);
ValidationUtil.validateFileName(fileName);

Path tabletDirPath = Objects.requireNonNull(metaPath.getParent(), errorMsg);

@@ -18,7 +18,9 @@
*/
package org.apache.accumulo.core.metadata;

import org.apache.accumulo.core.gc.Reference;
import java.util.Objects;

import org.apache.accumulo.core.gc.ReferenceFile;
import org.apache.hadoop.fs.Path;

/**
@@ -37,8 +39,8 @@ public static String validate(String path) {
return validate(p).toString();
}

public static Reference validate(Reference reference) {
validate(new Path(reference.metadataEntry));
public static ReferenceFile validate(ReferenceFile reference) {
validate(new Path(reference.getMetadataEntry()));
return reference;
}

@@ -48,4 +50,20 @@ public static Path validate(Path path) {
}
return path;
}

public static void validateRFileName(String fileName) {
Objects.requireNonNull(fileName);
if (!fileName.endsWith(".rf") && !fileName.endsWith("_tmp")) {
throw new IllegalArgumentException(
"Provided filename (" + fileName + ") does not end with '.rf' or '_tmp'");
}
}

public static void validateFileName(String fileName) {
Objects.requireNonNull(fileName);
if (!fileName.matches("[\\dA-Za-z._-]+")) {
throw new IllegalArgumentException(
"Provided filename (" + fileName + ") contains invalid characters.");
}
}
}
@@ -25,7 +25,7 @@
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.data.TableId;
import org.apache.accumulo.core.dataImpl.KeyExtent;
import org.apache.accumulo.core.gc.Reference;
import org.apache.accumulo.core.gc.ReferenceFile;
import org.apache.accumulo.core.metadata.MetadataTable;
import org.apache.accumulo.core.metadata.RootTable;
import org.apache.accumulo.core.metadata.StoredTabletFile;
@@ -191,7 +191,7 @@ default void putGcCandidates(TableId tableId, Collection<StoredTabletFile> candi
/**
* Unlike {@link #putGcCandidates(TableId, Collection)} this takes file and dir GC candidates.
*/
default void putGcFileAndDirCandidates(TableId tableId, Collection<Reference> candidates) {
default void putGcFileAndDirCandidates(TableId tableId, Collection<ReferenceFile> candidates) {
throw new UnsupportedOperationException();
}

@@ -218,16 +218,16 @@ default Stream<ExternalCompactionFinalState> getExternalCompactionFinalStates()
}

/**
* Return an encoded delete marker Mutation to delete the specified TabletFile path. A Reference
* is used for the parameter because the Garbage Collector is optimized to store a directory for
* Tablet File. Otherwise, a {@link TabletFile} object could be used. The tabletFilePathToRemove
* is validated and normalized before creating the mutation.
* Return an encoded delete marker Mutation to delete the specified TabletFile path. A
* ReferenceFile is used for the parameter because the Garbage Collector is optimized to store a
* directory for Tablet File. Otherwise, a {@link TabletFile} object could be used. The
* tabletFilePathToRemove is validated and normalized before creating the mutation.
*
* @param tabletFilePathToRemove
* String full path of the TabletFile
* @return Mutation with encoded delete marker
*/
default Mutation createDeleteMutation(Reference tabletFilePathToRemove) {
default Mutation createDeleteMutation(ReferenceFile tabletFilePathToRemove) {
throw new UnsupportedOperationException();
}

@@ -192,11 +192,13 @@ public static class ServerColumnFamily {
public static final String DEFAULT_TABLET_DIR_NAME = "default_tablet";

/**
* Matches regex for a tablet directory like "default_tablet" or "t-000009x"
*
* @return true if dirName is a valid value for the {@link #DIRECTORY_COLUMN} in the metadata
* table. Returns false otherwise.
*/
public static boolean isValidDirCol(String dirName) {
return !dirName.contains("/");
return dirName.matches("[\\dA-Za-z_-]+");
}

/**

0 comments on commit afe7856

Please sign in to comment.