-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Open
Labels
Description
Hi, I'm trying to import a CSV, I'm following the guide here: https://docs.pinot.apache.org/basics/getting-started/pushing-your-data-to-pinot
I run Pinot using docker-compose on M2-Apple chip
docker-compose.yaml
version: '3.7'
services:
zookeeper:
image: zookeeper
hostname: zookeeper
container_name: manual-zookeeper
ports:
- "2181:2181"
environment:
ZOOKEEPER_CLIENT_PORT: 2181
ZOOKEEPER_TICK_TIME: 2000
pinot-controller:
image: apachepinot/pinot:1.0.0
command: "StartController -zkAddress manual-zookeeper:2181"
container_name: "manual-pinot-controller"
volumes:
- ./config:/config
- ./data:/data
restart: unless-stopped
ports:
- "9000:9000"
depends_on:
- zookeeper
pinot-broker:
image: apachepinot/pinot:1.0.0
command: "StartBroker -zkAddress manual-zookeeper:2181"
restart: unless-stopped
container_name: "manual-pinot-broker"
volumes:
- ./config:/config
- ./data:/data
ports:
- "8099:8099"
depends_on:
- pinot-controller
pinot-server:
image: apachepinot/pinot:1.0.0
command: "StartServer -zkAddress manual-zookeeper:2181"
restart: unless-stopped
container_name: "manual-pinot-server"
volumes:
- ./config:/config
- ./data:/data
depends_on:
- pinot-brokerconfig/schema.json
{
"schemaName": "transcript",
"dimensionFieldSpecs": [
{
"name": "studentID",
"dataType": "INT"
},
{
"name": "firstName",
"dataType": "STRING"
},
{
"name": "lastName",
"dataType": "STRING"
},
{
"name": "gender",
"dataType": "STRING"
},
{
"name": "subject",
"dataType": "STRING"
}
],
"metricFieldSpecs": [
{
"name": "score",
"dataType": "FLOAT"
}
],
"dateTimeFieldSpecs": [
{
"name": "timestampInEpoch",
"dataType": "LONG",
"format": "1:MILLISECONDS:EPOCH",
"granularity": "1:MILLISECONDS"
}
]
}table.json
{
"tableName": "transcript",
"segmentsConfig": {
"timeColumnName": "timestampInEpoch",
"timeType": "MILLISECONDS",
"replication": "1",
"schemaName": "transcript"
},
"tableIndexConfig": {
"invertedIndexColumns": [],
"loadMode": "MMAP"
},
"tenants": {
"broker": "DefaultTenant",
"server": "DefaultTenant"
},
"tableType": "OFFLINE",
"metadata": {}
}job-spec.yml
executionFrameworkSpec:
name: 'standalone'
segmentGenerationJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner'
segmentTarPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentTarPushJobRunner'
segmentUriPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentUriPushJobRunner'
jobType: SegmentCreationAndTarPush
inputDirURI: '/data/'
includeFileNamePattern: 'glob:**/*.csv'
outputDirURI: '/tmp/pinot-quick-start/segments/'
overwriteOutput: true
pinotFSSpecs:
- scheme: file
className: org.apache.pinot.spi.filesystem.LocalPinotFS
recordReaderSpec:
dataFormat: 'csv'
className: 'org.apache.pinot.plugin.inputformat.csv.CSVRecordReader'
configClassName: 'org.apache.pinot.plugin.inputformat.csv.CSVRecordReaderConfig'
tableSpec:
tableName: 'transcript'
schemaURI: 'http://manual-pinot-controller:9000/tables/transcript/schema'
tableConfigURI: 'http://manual-pinot-controller:9000/tables/transcript'
pinotClusterSpecs:
- controllerURI: 'http://manual-pinot-controller:9000'data/transcript.csv
studentID,firstName,lastName,gender,subject,score,timestampInEpoch
200,Lucy,Smith,Female,Maths,3.8,1570863600000
200,Lucy,Smith,Female,English,3.5,1571036400000
201,Bob,King,Male,Maths,3.2,1571900400000
202,Nick,Young,Male,Physics,3.6,1572418800000I first apply the schema:
docker exec \
-it manual-pinot-controller bin/pinot-admin.sh AddTable \
-tableConfigFile /config/table.json \
-schemaFile /config/schema.json \
-execThen I execute the job-spec
docker exec \
-it manual-pinot-controller bin/pinot-admin.sh LaunchDataIngestionJob \
-jobSpecFile /config/job-spec.ymlThen I get this error message and java stack-trace which we all love 😢:
2023/11/02 23:02:45.260 ERROR [LaunchDataIngestionJobCommand] [main] Got exception to kick off standalone data ingestion job -
java.lang.RuntimeException: Caught exception during running - org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner
at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.kickoffIngestionJob(IngestionJobLauncher.java:152) ~[pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79]
at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.runIngestionJob(IngestionJobLauncher.java:121) ~[pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79]
at org.apache.pinot.tools.admin.command.LaunchDataIngestionJobCommand.execute(LaunchDataIngestionJobCommand.java:130) [pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79]
at org.apache.pinot.tools.Command.call(Command.java:33) [pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79]
at org.apache.pinot.tools.Command.call(Command.java:29) [pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79]
at picocli.CommandLine.executeUserObject(CommandLine.java:1953) [pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79]
at picocli.CommandLine.access$1300(CommandLine.java:145) [pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79]
at picocli.CommandLine$RunLast.executeUserObjectOfLastSubcommandWithSameParent(CommandLine.java:2352) [pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79]
at picocli.CommandLine$RunLast.handle(CommandLine.java:2346) [pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79]
at picocli.CommandLine$RunLast.handle(CommandLine.java:2311) [pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79]
at picocli.CommandLine$AbstractParseResultHandler.execute(CommandLine.java:2179) [pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79]
at picocli.CommandLine.execute(CommandLine.java:2078) [pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79]
at org.apache.pinot.tools.admin.PinotAdministrator.execute(PinotAdministrator.java:171) [pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79]
at org.apache.pinot.tools.admin.PinotAdministrator.main(PinotAdministrator.java:202) [pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79]
Caused by: java.nio.file.NoSuchFileException: /tmp/pinot-quick-start/rawdata
at sun.nio.fs.UnixException.translateToIOException(UnixException.java:92) ~[?:?]
at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:111) ~[?:?]
at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:116) ~[?:?]
at sun.nio.fs.UnixFileAttributeViews$Basic.readAttributes(UnixFileAttributeViews.java:55) ~[?:?]
at sun.nio.fs.UnixFileSystemProvider.readAttributes(UnixFileSystemProvider.java:149) ~[?:?]
at sun.nio.fs.LinuxFileSystemProvider.readAttributes(LinuxFileSystemProvider.java:99) ~[?:?]
at java.nio.file.Files.readAttributes(Files.java:1764) ~[?:?]
at java.nio.file.FileTreeWalker.getAttributes(FileTreeWalker.java:219) ~[?:?]
at java.nio.file.FileTreeWalker.visit(FileTreeWalker.java:276) ~[?:?]
at java.nio.file.FileTreeWalker.walk(FileTreeWalker.java:322) ~[?:?]
at java.nio.file.FileTreeIterator.<init>(FileTreeIterator.java:71) ~[?:?]
at java.nio.file.Files.walk(Files.java:3825) ~[?:?]
at java.nio.file.Files.walk(Files.java:3879) ~[?:?]
at org.apache.pinot.spi.filesystem.LocalPinotFS.listFiles(LocalPinotFS.java:115) ~[pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79]
at org.apache.pinot.spi.filesystem.NoClosePinotFS.listFiles(NoClosePinotFS.java:86) ~[pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79]
at org.apache.pinot.common.segment.generation.SegmentGenerationUtils.listMatchedFilesWithRecursiveOption(SegmentGenerationUtils.java:259) ~[pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79]
at org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner.run(SegmentGenerationJobRunner.java:177) ~[pinot-batch-ingestion-standalone-1.0.0-shaded.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79]
at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.kickoffIngestionJob(IngestionJobLauncher.java:150) ~[pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79]
... 13 more
java.lang.RuntimeException: Caught exception during running - org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner
at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.kickoffIngestionJob(IngestionJobLauncher.java:152)
at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.runIngestionJob(IngestionJobLauncher.java:121)
at org.apache.pinot.tools.admin.command.LaunchDataIngestionJobCommand.execute(LaunchDataIngestionJobCommand.java:130)
at org.apache.pinot.tools.Command.call(Command.java:33)
at org.apache.pinot.tools.Command.call(Command.java:29)
at picocli.CommandLine.executeUserObject(CommandLine.java:1953)
at picocli.CommandLine.access$1300(CommandLine.java:145)
at picocli.CommandLine$RunLast.executeUserObjectOfLastSubcommandWithSameParent(CommandLine.java:2352)
at picocli.CommandLine$RunLast.handle(CommandLine.java:2346)
at picocli.CommandLine$RunLast.handle(CommandLine.java:2311)
at picocli.CommandLine$AbstractParseResultHandler.execute(CommandLine.java:2179)
at picocli.CommandLine.execute(CommandLine.java:2078)
at org.apache.pinot.tools.admin.PinotAdministrator.execute(PinotAdministrator.java:171)
at org.apache.pinot.tools.admin.PinotAdministrator.main(PinotAdministrator.java:202)
Caused by: java.nio.file.NoSuchFileException: /tmp/pinot-quick-start/rawdata
at java.base/sun.nio.fs.UnixException.translateToIOException(UnixException.java:92)
at java.base/sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:111)
at java.base/sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:116)
at java.base/sun.nio.fs.UnixFileAttributeViews$Basic.readAttributes(UnixFileAttributeViews.java:55)
at java.base/sun.nio.fs.UnixFileSystemProvider.readAttributes(UnixFileSystemProvider.java:149)
at java.base/sun.nio.fs.LinuxFileSystemProvider.readAttributes(LinuxFileSystemProvider.java:99)
at java.base/java.nio.file.Files.readAttributes(Files.java:1764)
at java.base/java.nio.file.FileTreeWalker.getAttributes(FileTreeWalker.java:219)
at java.base/java.nio.file.FileTreeWalker.visit(FileTreeWalker.java:276)
at java.base/java.nio.file.FileTreeWalker.walk(FileTreeWalker.java:322)
at java.base/java.nio.file.FileTreeIterator.<init>(FileTreeIterator.java:71)
at java.base/java.nio.file.Files.walk(Files.java:3825)
at java.base/java.nio.file.Files.walk(Files.java:3879)
at org.apache.pinot.spi.filesystem.LocalPinotFS.listFiles(LocalPinotFS.java:115)
at org.apache.pinot.spi.filesystem.NoClosePinotFS.listFiles(NoClosePinotFS.java:86)
at org.apache.pinot.common.segment.generation.SegmentGenerationUtils.listMatchedFilesWithRecursiveOption(SegmentGenerationUtils.java:259)
at org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner.run(SegmentGenerationJobRunner.java:177)
at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.kickoffIngestionJob(IngestionJobLauncher.java:150)
... 13 more
I followed several guides and tried v1.1.0 but I keep seeing the error
Reactions are currently unavailable