Skip to content
Permalink
Browse files
[GOBBLIN-913] Add MySQL and configurations to cluster
Closes #2781 from Will-Lo/mysql-k8s-init
  • Loading branch information
Will-Lo authored and suvasude committed Nov 7, 2019
1 parent b685271 commit e785b02d9fd59dbeb4282387d1956778df4f6fca
Show file tree
Hide file tree
Showing 12 changed files with 361 additions and 3 deletions.
@@ -17,5 +17,4 @@
#
GOBBLIN_HOME="$(cd `dirname $0`/..; pwd)"

./bin/gobblin.sh service gobblin-as-service start --log-to-stdout $@

./bin/gobblin.sh service gobblin-as-service start --log-to-stdout "$@"
@@ -0,0 +1,4 @@
# Kubernetes Cluster For GaaS

This folder includes the files needed to create and run a GaaS instance, using the FS as the communication mechansim between GaaS and Gobblin Standalone

@@ -30,7 +30,7 @@ spec:
image: will97/gobblin-as-a-service:latest
volumeMounts:
- name: shared-jobs
mountPath: /tmp/gobblin-service/jobs
mountPath: /tmp/gobblin-as-service/jobs
- name: shared-template-catalogs
mountPath: /tmp/templateCatalog

@@ -0,0 +1,4 @@
# MySQL K8s Cluster

- Adds MySQL as the SpecStore
- Adds configuration files to be loaded as configMaps, allowing configuration changes to GaaS without rebuilding a new image.
@@ -0,0 +1,105 @@
# In the future, build the kubernetes cluster from the official docker account
# Also ensure that proper tagging/versioning is done i.e. remove :latest tag and instead use the digest of the container

apiVersion: apps/v1
kind: Deployment
metadata:
name: 'gaas-deployment'
labels:
app: gaas-deployment
spec:
selector:
matchLabels:
app: gaas
replicas: 1
template:
metadata:
name: 'gaas'
labels:
app: gaas
spec:
volumes:
- name: shared-jobs
persistentVolumeClaim:
claimName: shared-jobs-claim
- name: gaas-config
configMap:
name: gaas-config
containers:
- name: gobblin-service
image: will97/gobblin-as-a-service:latest
command: ["./bin/entrypoint.sh"]
args: ["--jvmopts", "-DmysqlCredentials.user=$(MYSQL_USERNAME) -DmysqlCredentials.password=$(MYSQL_PASSWORD)"]
env:
- name: MYSQL_USERNAME
valueFrom:
secretKeyRef:
name: mysql-credentials
key: username
- name: MYSQL_PASSWORD
valueFrom:
secretKeyRef:
name: mysql-credentials
key: password
volumeMounts:
- name: shared-jobs
mountPath: /tmp/gobblin-as-service/jobs
- name: gaas-config
mountPath: /home/gobblin/conf/gobblin-as-service/application.conf
subPath: gaas-application.conf
# dependency on mysql to be initialized before gaas can be initialized
initContainers:
- name: init-mysql
image: busybox:1.28
command: ["sh", "-c", "until nslookup mysql; do echo waiting for mysql; sleep 2; done;"]


---
apiVersion: apps/v1
kind: Deployment
metadata:
name: gobblin-standalone-deployment
labels:
app: gobblin-standalone-deployment
spec:
selector:
matchLabels:
app: gobblin-standalone
replicas: 1
template:
metadata:
name: 'gobblin-standalone'
labels:
app: gobblin-standalone
spec:
volumes:
- name: shared-jobs
persistentVolumeClaim:
claimName: shared-jobs-claim
- name: standalone-config
configMap:
name: standalone-config
containers:
- name: gobblin-standalone
image: will97/gobblin-standalone:latest
volumeMounts:
- name: shared-jobs
mountPath: /tmp/gobblin-standalone/jobs
- name: standalone-config
mountPath: /home/gobblin/conf/standalone/application.conf
subPath: standalone-application.conf
---
apiVersion: v1
kind: Service
metadata:
name: gaas-svc
labels:
app: gobblin-service
spec:
type: NodePort
ports:
- port: 6956
protocol: TCP
targetPort: 6956
selector:
app: gaas
@@ -0,0 +1,73 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Sample configuration properties for the Gobblin Service

# Topology Catalog and Store
gobblin.service.work.dir=/tmp/gobblin-as-service

# TopologySpec Factory
topologySpec.store.dir=${gobblin.service.work.dir}/topologySpecStore
topologySpecFactory.topologyNames=localGobblinCluster
topologySpecFactory.localGobblinCluster.description="StandaloneClusterTopology"
topologySpecFactory.localGobblinCluster.version="1"
topologySpecFactory.localGobblinCluster.uri="gobblinCluster"
topologySpecFactory.localGobblinCluster.specExecutorInstance.class="org.apache.gobblin.runtime.spec_executorInstance.LocalFsSpecExecutor"
topologySpecFactory.localGobblinCluster.specExecInstance.capabilities="source:dest"
topologySpecFactory.localGobblinCluster.gobblin.cluster.localSpecProducer.dir=${gobblin.service.work.dir}/jobs

# Flow Catalog and Store
flowSpec.store.dir=${gobblin.service.work.dir}/flowSpecStore

# Template Catalog
gobblin.service.templateCatalogs.fullyQualifiedPath="file://"

# JobStatusMonitor
gobblin.service.jobStatusMonitor.enabled=false

# FsJobStatusRetriever
fsJobStatusRetriever.state.store.dir=${gobblin.service.work.dir}/state-store

# DagManager
gobblin.service.dagManager.enabled=true
gobblin.service.dagManager.jobStatusRetriever.class="org.apache.gobblin.service.monitoring.FsJobStatusRetriever"
gobblin.service.dagManager.dagStateStoreClass="org.apache.gobblin.service.modules.orchestration.FSDagStateStore"
gobblin.service.dagManager.dagStateStoreDir=${gobblin.service.work.dir}/dagStateStoreDir

# RestLI
gobblin.service.port=6956

# MySQL State Store
flowSpec.store.class="org.apache.gobblin.runtime.spec_store.MysqlSpecStore"
flowSpec.serde.class="org.apache.gobblin.runtime.spec_serde.GsonFlowSpecSerDe"
state.store.factory.class="org.apache.gobblin.metastore.MysqlJobStatusStateStoreFactory"

mysqlSpecStore.state.store.db.table="flow_spec_store"

# Assuming default namespace. URL of the service takes the form of <service>.<namespace>.svc.cluster.local, see https://github.com/kubernetes/dns/blob/master/docs/specification.md
mysqlSpecStore.state.store.db.url="jdbc:mysql://mysql.default.svc.cluster.local:3306/gaas_db"
mysqlSpecStore.state.store.db.user=${mysqlCredentials.user}
mysqlSpecStore.state.store.db.password=${mysqlCredentials.password}

# MySQL Job Status Retriever
jobStatusRetriever.class="org.apache.gobblin.service.monitoring.MysqlJobStatusRetriever"
mysqlJobStatusRetriever.state.store.db.table="gaas_job_status"

# Assuming default namespace. URL of the service takes the form of <service>.<namespace>.cluster.local
mysqlJobStatusRetriever.state.store.db.url="jdbc:mysql://mysql.default.svc.cluster.local:3306/gaas_db"
mysqlJobStatusRetriever.state.store.db.user=${mysqlCredentials.user}
mysqlJobStatusRetriever.state.store.db.password=${mysqlCredentials.password}
@@ -0,0 +1,17 @@
resources:
- application.yaml
- mysql-deployment.yaml
- mysql-pv.yaml
configMapGenerator:
- name: gaas-config
files:
- ./gaas-application.conf
- name: standalone-config
files:
- ./standalone-application.conf
secretGenerator:
# this should be replaced with references to files/vars stored securely
- name: mysql-credentials
literals:
- username=default-user
- password=default-password
@@ -0,0 +1,55 @@
apiVersion: v1
kind: Service
metadata:
name: mysql
spec:
ports:
- protocol: TCP
port: 3306
targetPort: 3306
selector:
app: mysql
---
apiVersion: apps/v1 # for versions before 1.9.0 use apps/v1beta2
kind: Deployment
metadata:
name: mysql
spec:
selector:
matchLabels:
app: mysql
strategy:
type: Recreate
template:
metadata:
labels:
app: mysql
spec:
volumes:
- name: mysql-persistent-storage
persistentVolumeClaim:
claimName: mysql-pv-claim
containers:
- image: mysql:5.6
name: mysql
env:
- name: MYSQL_RANDOM_ROOT_PASSWORD
value: "yes"
- name: MYSQL_DATABASE
value: "gaas_db"
- name: MYSQL_USER
valueFrom:
secretKeyRef:
name: mysql-credentials
key: username
- name: MYSQL_PASSWORD
valueFrom:
secretKeyRef:
name: mysql-credentials
key: password
ports:
- containerPort: 3306
name: mysql
volumeMounts:
- name: mysql-persistent-storage
mountPath: /var/lib/mysql
@@ -0,0 +1,26 @@
apiVersion: v1
kind: PersistentVolume
metadata:
name: mysql-pv-volume
labels:
type: local
spec:
storageClassName: manual
capacity:
storage: 1Gi
accessModes:
- ReadWriteOnce
hostPath:
path: "/mnt/data"
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: mysql-pv-claim
spec:
storageClassName: manual
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
@@ -0,0 +1,75 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Thread pool settings for the task executor
taskexecutor.threadpool.size=2
taskretry.threadpool.coresize=1
taskretry.threadpool.maxsize=2

# File system URIs
fs.uri=file:///
writer.fs.uri=${fs.uri}
state.store.fs.uri=${fs.uri}

# Writer related configuration properties
writer.output.format=AVRO
writer.staging.dir=${env:GOBBLIN_WORK_DIR}/task-staging
writer.output.dir=${env:GOBBLIN_WORK_DIR}/task-output

# Data publisher related configuration properties
data.publisher.type=org.apache.gobblin.publisher.BaseDataPublisher
data.publisher.final.dir=${env:GOBBLIN_WORK_DIR}/job-output
data.publisher.replace.final.dir=false

# Directory where job configuration files are stored
jobconf.dir=${env:GOBBLIN_JOB_CONFIG_DIR}
jobconf.fullyQualifiedPath=file://${env:GOBBLIN_JOB_CONFIG_DIR}

# Directory where job/task state files are stored
state.store.dir=${env:GOBBLIN_WORK_DIR}/state-store

# Directory where commit sequences are stored
gobblin.runtime.commit.sequence.store.dir=${env:GOBBLIN_WORK_DIR}/commit-sequence-store

# Directory where error files from the quality checkers are stored
qualitychecker.row.err.file=${env:GOBBLIN_WORK_DIR}/err

# Directory where job locks are stored
job.lock.dir=${env:GOBBLIN_WORK_DIR}/locks

# Directory where metrics log files are stored
metrics.log.dir=${env:GOBBLIN_WORK_DIR}/metrics

# Enable metrics / events
metrics.enabled=true

# UI
#admin.server.enabled=false
admin.server.enabled=true
admin.server.port=9000

rest.server.host=localhost
rest.server.port=9090

# job history store ( WARN [GobblinYarnAppLauncher] NOT starting the admin UI because the job execution info server is NOT enabled )
job.execinfo.server.enabled=false
job.history.store.enabled=false
task.status.reportintervalinms=5000

# The time gap for Job Detector to detect modification/deletion/creation of jobconfig.
# Unit in milliseconds, configurable.
jobconf.monitor.interval=30000

0 comments on commit e785b02

Please sign in to comment.