Skip to content

Commit

Permalink
Updated DOC for No-Sort Compaction
Browse files Browse the repository at this point in the history
  • Loading branch information
NamanRastogi committed Jan 21, 2019
1 parent 1b45c41 commit 62fc013
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 9 deletions.
Expand Up @@ -36,7 +36,7 @@ public static SortScope getSortScope(String sortScope) {
case "NO_SORT":
return SortScope.NO_SORT;
default:
return SortScope.LOCAL_SORT;
return getSortScope(CarbonCommonConstants.LOAD_SORT_SCOPE_DEFAULT);
}
}

Expand Down
Expand Up @@ -1347,12 +1347,20 @@ public SortScopeOptions.SortScope getSortScope() {
if (getNumberOfSortColumns() == 0) {
return SortScopeOptions.SortScope.NO_SORT;
} else {
return SortScopeOptions.getSortScope(
CarbonProperties.getInstance().getProperty(
CarbonLoadOptionConstants.CARBON_OPTIONS_SORT_SCOPE,
CarbonProperties.getInstance().getProperty(
CarbonCommonConstants.LOAD_SORT_SCOPE,
CarbonCommonConstants.LOAD_SORT_SCOPE_DEFAULT)));
// Check SORT_SCOPE in Session Properties first.
String sortScopeSessionProp = CarbonProperties.getInstance().getProperty(
CarbonLoadOptionConstants.CARBON_TABLE_LOAD_SORT_SCOPE + getDatabaseName() + "."
+ getTableName());
if (null != sortScopeSessionProp) {
return SortScopeOptions.getSortScope(sortScopeSessionProp);
}

// If SORT_SCOPE is not found in Session Properties,
// then retrieve it from Table.
return SortScopeOptions.getSortScope(CarbonProperties.getInstance()
.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_SORT_SCOPE,
CarbonProperties.getInstance()
.getProperty(CarbonCommonConstants.LOAD_SORT_SCOPE, "LOCAL_SORT")));
}
} else {
return SortScopeOptions.getSortScope(sortScope);
Expand Down
1 change: 1 addition & 0 deletions docs/configuration-parameters.md
Expand Up @@ -208,6 +208,7 @@ RESET
| carbon.options.date.format | Specifies the data format of the date columns in the data being loaded |
| carbon.options.timestamp.format | Specifies the timestamp format of the time stamp columns in the data being loaded |
| carbon.options.sort.scope | Specifies how the current data load should be sorted with. **NOTE:** Refer to [Data Loading Configuration](#data-loading-configuration)#carbon.sort.scope for detailed information. |
| carbon.table.load.sort.scope | Overrides the SORT_SCOPE provided in CREATE TABLE. |
| carbon.options.global.sort.partitions | |
| carbon.options.serialization.null.format | Default Null value representation in the data being loaded. **NOTE:** Refer to [Data Loading Configuration](#data-loading-configuration)#carbon.options.serialization.null.format for detailed information. |
| carbon.query.directQueryOnDataMap.enabled | Specifies whether datamap can be queried directly. This is useful for debugging purposes.**NOTE: **Refer to [Query Configuration](#query-configuration) for detailed information. |
Expand Down
9 changes: 8 additions & 1 deletion docs/dml-of-carbondata.md
Expand Up @@ -49,6 +49,7 @@ CarbonData DML statements are documented here,which includes:
| [COMMENTCHAR](#commentchar) | Character used to comment the rows in the input csv file. Those rows will be skipped from processing |
| [HEADER](#header) | Whether the input csv files have header row |
| [FILEHEADER](#fileheader) | If header is not present in the input csv, what is the column names to be used for data read from input csv |
| [SORT_SCOPE](#sort_scope) | Sort Scope to be used for current load. |
| [MULTILINE](#multiline) | Whether a row data can span across multiple lines. |
| [ESCAPECHAR](#escapechar) | Escape character used to excape the data in input csv file.For eg.,\ is a standard escape character |
| [SKIP_EMPTY_LINE](#skip_empty_line) | Whether empty lines in input csv file should be skipped or loaded as null row |
Expand Down Expand Up @@ -106,6 +107,13 @@ CarbonData DML statements are documented here,which includes:
OPTIONS('FILEHEADER'='column1,column2')
```

- ##### SORT_SCOPE:
Sort Scope to be used for the current load. This overrides the Sort Scope of Table.

```
OPTIONS('SORT_SCOPE'='BATCH_SORT')
```

- ##### MULTILINE:

CSV with new line character in quotes.
Expand Down Expand Up @@ -458,4 +466,3 @@ CarbonData DML statements are documented here,which includes:
```
CLEAN FILES FOR TABLE carbon_table
```

Expand Up @@ -35,6 +35,7 @@ import org.apache.carbondata.common.constants.LoggerAction
import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException
import org.apache.carbondata.common.logging.LogServiceFactory
import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.exception.InvalidConfigurationException
import org.apache.carbondata.core.metadata.datatype.DataTypes
import org.apache.carbondata.core.metadata.schema.PartitionInfo
import org.apache.carbondata.core.metadata.schema.partition.PartitionType
Expand Down Expand Up @@ -1201,6 +1202,16 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser {
}
}

// Validate SORT_SCOPE
if (options.exists(_._1.equalsIgnoreCase("SORT_SCOPE"))) {
val optionValue: String = options.get("sort_scope").get.head._2
if (!CarbonUtil.isValidSortOption(optionValue)) {
throw new InvalidConfigurationException(
s"Passing invalid SORT_SCOPE '$optionValue', valid SORT_SCOPE are 'NO_SORT'," +
s" 'BATCH_SORT', 'LOCAL_SORT' and 'GLOBAL_SORT' ")
}
}

// check for duplicate options
val duplicateOptions = options filter {
case (_, optionList) => optionList.size > 1
Expand Down
Expand Up @@ -135,8 +135,8 @@ public void addInMemoryRawResultIterator(List<RawResultIterator> sortedRawResult
noDicAndComplexColumns, sortParameters, measureDataType);
if (inMemorySortTempChunkHolder.hasNext()) {
inMemorySortTempChunkHolder.readRow();
recordHolderHeapLocal.add(inMemorySortTempChunkHolder);
}
recordHolderHeapLocal.add(inMemorySortTempChunkHolder);
}
}

Expand Down

0 comments on commit 62fc013

Please sign in to comment.