Skip to content

Commit

Permalink
HIVE-2213. Optimize partial specification metastore functions (Sohan …
Browse files Browse the repository at this point in the history
…Jain via pauly)

git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1137826 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
Paul Yang committed Jun 20, 2011
1 parent caa0107 commit 4b08a30
Show file tree
Hide file tree
Showing 6 changed files with 246 additions and 55 deletions.
39 changes: 34 additions & 5 deletions common/src/java/org/apache/hadoop/hive/common/FileUtils.java
Expand Up @@ -94,15 +94,27 @@ private FileUtils() {


public static String makePartName(List<String> partCols, List<String> vals) {
return makePartName(partCols, vals, null);
}

/**
* Makes a valid partition name.
* @param partCols The partition keys' names
* @param vals The partition values
* @param defaultStr
* The default name given to a partition value if the respective value is empty or null.
* @return An escaped, valid partition name.
*/
public static String makePartName(List<String> partCols, List<String> vals,
String defaultStr) {
StringBuilder name = new StringBuilder();
for (int i = 0; i < partCols.size(); i++) {
if (i > 0) {
name.append(Path.SEPARATOR);
}
name.append(escapePathName((partCols.get(i)).toLowerCase()));
name.append(escapePathName((partCols.get(i)).toLowerCase(), defaultStr));
name.append('=');
name.append(escapePathName(vals.get(i)));
name.append(escapePathName(vals.get(i), defaultStr));
}
return name.toString();
}
Expand All @@ -121,7 +133,7 @@ public static String makePartName(List<String> partCols, List<String> vals) {
for (char c = 0; c < ' '; c++) {
charToEscape.set(c);
}

/**
* ASCII 01-1F are HTTP control characters that need to be escaped.
* \u000A and \u000D are \n and \r, respectively.
Expand All @@ -143,11 +155,28 @@ static boolean needsEscaping(char c) {
}

public static String escapePathName(String path) {
return escapePathName(path, null);
}

// __HIVE_DEFAULT_NULL__ is the system default value for null and empty string. We should
/**
* Escapes a path name.
* @param path The path to escape.
* @param defaultPath
* The default name for the path, if the given path is empty or null.
* @return An escaped path name.
*/
public static String escapePathName(String path, String defaultPath) {

// __HIVE_DEFAULT_NULL__ is the system default value for null and empty string.
// TODO: we should allow user to specify default partition or HDFS file location.
if (path == null || path.length() == 0) {
return "__HIVE_DEFAULT_PARTITION__";
if (defaultPath == null) {
//previously, when path is empty or null and no default path is specified,
// __HIVE_DEFAULT_PARTITION__ was the return value for escapePathName
return "__HIVE_DEFAULT_PARTITION__";
} else {
return defaultPath;
}
}

StringBuilder sb = new StringBuilder();
Expand Down
Expand Up @@ -2133,7 +2133,7 @@ public List<Partition> get_partitions_ps(final String db_name,
final short max_parts) throws MetaException, TException {
startPartitionFunction("get_partitions_ps", db_name, tbl_name, part_vals);
try {
return this.get_partitions_ps_with_auth(db_name, tbl_name, part_vals,
return get_partitions_ps_with_auth(db_name, tbl_name, part_vals,
max_parts, null, null);
}
finally {
Expand All @@ -2148,67 +2148,50 @@ public List<Partition> get_partitions_ps_with_auth(final String db_name,
final List<String> groupNames) throws MetaException, TException {
startPartitionFunction("get_partitions_ps_with_auth", db_name, tbl_name,
part_vals);
List<Partition> parts = null;
List<Partition> matchingParts = new ArrayList<Partition>();

List<Partition> ret;
try {
// This gets all the partitions and then filters based on the specified
// criteria. An alternative approach would be to get all the partition
// names, do the filtering on the names, and get the partition for each
// of the names. that match.

try {
parts = get_partitions(db_name, tbl_name, (short) -1);
} catch (NoSuchObjectException e) {
throw new MetaException(e.getMessage());
}

for (Partition p : parts) {
if (MetaStoreUtils.pvalMatches(part_vals, p.getValues())) {
matchingParts.add(p);
ret = executeWithRetry(new Command<List<Partition>>() {
@Override
public List<Partition> run(RawStore ms) throws Exception {
return ms.listPartitionsPsWithAuth(db_name, tbl_name, part_vals, max_parts,
userName, groupNames);
}
}

return matchingParts;
}
finally {
});
} catch (MetaException e) {
throw e;
} catch (InvalidObjectException e) {
throw new MetaException(e.getMessage());
} catch (Exception e) {
assert(e instanceof RuntimeException);
throw (RuntimeException)e;
} finally {
endFunction("get_partitions_ps_with_auth");
}
return ret;
}

@Override
public List<String> get_partition_names_ps(final String db_name,
final String tbl_name, final List<String> part_vals, final short max_parts)
throws MetaException, TException {
startPartitionFunction("get_partitions_names_ps", db_name, tbl_name, part_vals);
List<String> ret;
try {
Table t;
try {
t = get_table(db_name, tbl_name);
} catch (NoSuchObjectException e) {
throw new MetaException(e.getMessage());
}

List<String> partNames = get_partition_names(db_name, tbl_name, max_parts);
List<String> filteredPartNames = new ArrayList<String>();

for(String name : partNames) {
LinkedHashMap<String, String> spec = Warehouse.makeSpecFromName(name);
List<String> vals = new ArrayList<String>();
// Since we are iterating through a LinkedHashMap, iteration should
// return the partition values in the correct order for comparison.
for (String val : spec.values()) {
vals.add(val);
}
if (MetaStoreUtils.pvalMatches(part_vals, vals)) {
filteredPartNames.add(name);
ret = executeWithRetry(new Command<List<String>>() {
@Override
public List<String> run(RawStore ms) throws Exception {
return ms.listPartitionNamesPs(db_name, tbl_name, part_vals, max_parts);
}
}

return filteredPartNames;
});
} catch (MetaException e) {
throw e;
} catch (Exception e) {
assert(e instanceof RuntimeException);
throw (RuntimeException)e;
} finally {
endFunction("get_partitions_names_ps");
}
return ret;
}

@Override
Expand Down
115 changes: 114 additions & 1 deletion metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
Expand Up @@ -1282,6 +1282,120 @@ public List<String> listPartitionNames(String dbName, String tableName,
return pns;
}

/**
* Retrieves a Collection of partition-related results from the database that match
* the partial specification given for a specific table.
* @param dbName the name of the database
* @param tableName the name of the table
* @param part_vals the partial specification values
* @param max_parts the maximum number of partitions to return
* @param resultsCol the metadata column of the data to return, e.g. partitionName, etc.
* if resultsCol is empty or null, a collection of MPartition objects is returned
* @results A Collection of partition-related items from the db that match the partial spec
* for a table. The type of each item in the collection corresponds to the column
* you want results for. E.g., if resultsCol is partitionName, the Collection
* has types of String, and if resultsCol is null, the types are MPartition.
*/
private Collection getPartitionPsQueryResults(String dbName, String tableName,
List<String> part_vals, short max_parts, String resultsCol)
throws MetaException {
dbName = dbName.toLowerCase().trim();
tableName = tableName.toLowerCase().trim();
Table table = getTable(dbName, tableName);

List<FieldSchema> partCols = table.getPartitionKeys();
int numPartKeys = partCols.size();
if (part_vals.size() > numPartKeys) {
throw new MetaException("Incorrect number of partition values");
}

partCols = partCols.subList(0, part_vals.size());
//Construct a pattern of the form: partKey=partVal/partKey2=partVal2/...
// where partVal is either the escaped partition value given as input,
// or a regex of the form ".*"
//This works because the "=" and "/" separating key names and partition key/values
// are not escaped.
String partNameMatcher = Warehouse.makePartName(partCols, part_vals, ".*");
//add ".*" to the regex to match anything else afterwards the partial spec.
if (part_vals.size() < numPartKeys) {
partNameMatcher += ".*";
}

Query q = pm.newQuery(MPartition.class);
StringBuilder queryFilter = new StringBuilder("table.database.name == dbName");
queryFilter.append(" && table.tableName == tableName");
queryFilter.append(" && partitionName.matches(partialRegex)");
q.setFilter(queryFilter.toString());
q.declareParameters("java.lang.String dbName, " +
"java.lang.String tableName, java.lang.String partialRegex");

if( max_parts >= 0 ) {
//User specified a row limit, set it on the Query
q.setRange(0, max_parts);
}
if (resultsCol != null && !resultsCol.isEmpty()) {
q.setResult(resultsCol);
}

return (Collection) q.execute(dbName, tableName, partNameMatcher);
}

@Override
public List<Partition> listPartitionsPsWithAuth(String db_name, String tbl_name,
List<String> part_vals, short max_parts, String userName, List<String> groupNames)
throws MetaException, InvalidObjectException {
List<Partition> partitions = new ArrayList<Partition>();
boolean success = false;
try {
openTransaction();
LOG.debug("executing listPartitionNamesPsWithAuth");
Collection parts = getPartitionPsQueryResults(db_name, tbl_name,
part_vals, max_parts, null);
MTable mtbl = getMTable(db_name, tbl_name);
for (Object o : parts) {
Partition part = convertToPart((MPartition) o);
//set auth privileges
if (null != userName && null != groupNames &&
"TRUE".equalsIgnoreCase(mtbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE"))) {
String partName = Warehouse.makePartName(this.convertToFieldSchemas(mtbl
.getPartitionKeys()), part.getValues());
PrincipalPrivilegeSet partAuth = getPartitionPrivilegeSet(db_name,
tbl_name, partName, userName, groupNames);
part.setPrivileges(partAuth);
}
partitions.add(part);
}
success = commitTransaction();
} finally {
if (!success) {
rollbackTransaction();
}
}
return partitions;
}

@Override
public List<String> listPartitionNamesPs(String dbName, String tableName,
List<String> part_vals, short max_parts) throws MetaException {
List<String> partitionNames = new ArrayList<String>();
boolean success = false;
try {
openTransaction();
LOG.debug("Executing listPartitionNamesPs");
Collection names = getPartitionPsQueryResults(dbName, tableName,
part_vals, max_parts, "partitionName");
for (Object o : names) {
partitionNames.add((String) o);
}
success = commitTransaction();
} finally {
if (!success) {
rollbackTransaction();
}
}
return partitionNames;
}

// TODO:pc implement max
private List<MPartition> listMPartitions(String dbName, String tableName,
int max) {
Expand Down Expand Up @@ -1484,7 +1598,6 @@ public List<String> listPartitionNamesByFilter(String dbName, String tableName,
Map<String, String> params = new HashMap<String, String>();
String queryFilterString =
makeQueryFilterString(mtable, filter, params);

Query query = pm.newQuery(
"select partitionName from org.apache.hadoop.hive.metastore.model.MPartition "
+ "where " + queryFilterString);
Expand Down
47 changes: 45 additions & 2 deletions metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java
Expand Up @@ -241,6 +241,49 @@ public abstract Partition getPartitionWithAuth(String dbName, String tblName,

public abstract List<Partition> getPartitionsWithAuth(String dbName,
String tblName, short maxParts, String userName, List<String> groupNames)
throws MetaException, NoSuchObjectException, InvalidObjectException;;
throws MetaException, NoSuchObjectException, InvalidObjectException;

}
/**
* Lists partition names that match a given partial specification
* @param db_name
* The name of the database which has the partitions
* @param tbl_name
* The name of the table which has the partitions
* @param part_vals
* A partial list of values for partitions in order of the table's partition keys.
* Entries can be empty if you only want to specify latter partitions.
* @param max_parts
* The maximum number of partitions to return
* @return A list of partition names that match the partial spec.
* @throws MetaException
* @throws NoSuchObjectException
*/
public abstract List<String> listPartitionNamesPs(String db_name, String tbl_name,
List<String> part_vals, short max_parts)
throws MetaException;

/**
* Lists partitions that match a given partial specification and sets their auth privileges.
* If userName and groupNames null, then no auth privileges are set.
* @param db_name
* The name of the database which has the partitions
* @param tbl_name
* The name of the table which has the partitions
* @param part_vals
* A partial list of values for partitions in order of the table's partition keys
* Entries can be empty if you need to specify latter partitions.
* @param max_parts
* The maximum number of partitions to return
* @param userName
* The user name for the partition for authentication privileges
* @param groupNames
* The groupNames for the partition for authentication privileges
* @return A list of partitions that match the partial spec.
* @throws MetaException
* @throws NoSuchObjectException
* @throws InvalidObjectException
*/
public abstract List<Partition> listPartitionsPsWithAuth(String db_name, String tbl_name,
List<String> part_vals, short max_parts, String userName, List<String> groupNames)
throws MetaException, InvalidObjectException;
}
Expand Up @@ -390,14 +390,28 @@ public boolean isDir(Path f) throws MetaException {

public static String makePartName(List<FieldSchema> partCols,
List<String> vals) throws MetaException {
return makePartName(partCols, vals, null);
}

/**
* Makes a valid partition name.
* @param partCols The partition columns
* @param vals The partition values
* @param defaultStr
* The default name given to a partition value if the respective value is empty or null.
* @return An escaped, valid partition name.
* @throws MetaException
*/
public static String makePartName(List<FieldSchema> partCols,
List<String> vals, String defaultStr) throws MetaException {
if ((partCols.size() != vals.size()) || (partCols.size() == 0)) {
throw new MetaException("Invalid partition key & values");
}
List<String> colNames = new ArrayList<String>();
for (FieldSchema col: partCols) {
colNames.add(col.getName());
}
return FileUtils.makePartName(colNames, vals);
return FileUtils.makePartName(colNames, vals, defaultStr);
}

public static List<String> getPartValuesFromPartName(String partName)
Expand Down

0 comments on commit 4b08a30

Please sign in to comment.