From cf3a8ba90030b256e69922533eb4f639b72f5ce3 Mon Sep 17 00:00:00 2001 From: Rub21 Date: Fri, 27 Jun 2025 18:06:15 -0500 Subject: [PATCH 1/7] Add planet-dump-ng for planet file processing --- images/backup-restore/start.sh | 15 ++++++----- images/osm-processor/Dockerfile | 48 ++++++++++++++++++++++++++------- 2 files changed, 46 insertions(+), 17 deletions(-) diff --git a/images/backup-restore/start.sh b/images/backup-restore/start.sh index e9fa1240..c23bc2e5 100755 --- a/images/backup-restore/start.sh +++ b/images/backup-restore/start.sh @@ -35,17 +35,18 @@ cloudStorageOps() { } backupDB() { - local LOCAL_BACKUP_FILE=${BACKUP_CLOUD_FILE}.sql.gz - local CLOUD_BACKUP_FILE="${BACKUP_CLOUD_FOLDER}/${BACKUP_CLOUD_FILE}.sql.gz" + local LOCAL_BACKUP_FILE=${BACKUP_CLOUD_FILE}.dump + local CLOUD_BACKUP_FILE="${BACKUP_CLOUD_FOLDER}/${BACKUP_CLOUD_FILE}.dump" if [ "$SET_DATE_AT_NAME" == "true" ]; then local CURRENT_DATE=$(date '+%Y%m%d-%H%M') - LOCAL_BACKUP_FILE="${BACKUP_CLOUD_FILE}-${CURRENT_DATE}.sql.gz" - CLOUD_BACKUP_FILE="${BACKUP_CLOUD_FOLDER}/${BACKUP_CLOUD_FILE}-${CURRENT_DATE}.sql.gz" + LOCAL_BACKUP_FILE="${BACKUP_CLOUD_FILE}-${CURRENT_DATE}.dump" + CLOUD_BACKUP_FILE="${BACKUP_CLOUD_FOLDER}/${BACKUP_CLOUD_FILE}-${CURRENT_DATE}.dump" fi # Backup database with max compression echo "Backing up DB ${POSTGRES_DB} into ${LOCAL_BACKUP_FILE}" - pg_dump -h ${POSTGRES_HOST} -U ${POSTGRES_USER} ${POSTGRES_DB} | gzip -9 >${LOCAL_BACKUP_FILE} + # pg_dump -h ${POSTGRES_HOST} -U ${POSTGRES_USER} ${POSTGRES_DB} | gzip -9 >${LOCAL_BACKUP_FILE} + pg_dump -h ${POSTGRES_HOST} -U ${POSTGRES_USER} -Fc -f ${LOCAL_BACKUP_FILE} ${POSTGRES_DB} # Handle cloud storage based on the provider cloudStorageOps "${LOCAL_BACKUP_FILE}" "${CLOUD_BACKUP_FILE}" @@ -53,7 +54,7 @@ backupDB() { restoreDB() { local CURRENT_DATE=$(date '+%Y%m%d-%H%M') - local RESTORE_FILE="backup.sql.gz" + local RESTORE_FILE="backup.dump" local LOG_RESULT_FILE="restore_results-${CURRENT_DATE}.log" local flag=true @@ -62,7 +63,7 @@ restoreDB() { flag=false wget -O ${RESTORE_FILE} ${RESTORE_URL_FILE} echo "Restoring ${RESTORE_URL_FILE} in ${POSTGRES_DB}" - gunzip -c <${RESTORE_FILE} | psql -h ${POSTGRES_HOST} -U ${POSTGRES_USER} -d ${POSTGRES_DB} | tee ${LOG_RESULT_FILE} + pg_restore -h ${POSTGRES_HOST} -U ${POSTGRES_USER} -d ${POSTGRES_DB} --create --no-owner ${RESTORE_FILE} | tee ${LOG_RESULT_FILE} # aws s3 cp ${LOG_RESULT_FILE} s3://${AWS_S3_BUCKET}/${LOG_RESULT_FILE} echo "Import data to ${POSTGRES_DB} has finished ..." done diff --git a/images/osm-processor/Dockerfile b/images/osm-processor/Dockerfile index b2392ff3..57b8dd29 100644 --- a/images/osm-processor/Dockerfile +++ b/images/osm-processor/Dockerfile @@ -7,17 +7,45 @@ RUN set -ex \ && apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install \ -y --no-install-recommends \ - "osmosis" \ - "osmium-tool" \ + osmosis \ + osmium-tool \ # Cloud provider CLIs - "awscli" \ - "gsutil" \ - "azure-cli" \ + awscli \ + gsutil \ + azure-cli \ # PostgreSQL client - "postgresql-client" \ + postgresql-client \ # Other useful packages - "rsync" \ - "pyosmium" \ - "tmux" \ - "zsh" \ + rsync \ + pyosmium \ + tmux \ + zsh \ + git \ + && rm -rf /var/lib/apt/lists/* + +# Install planet-dump-ng +RUN set -ex \ + && apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + build-essential \ + automake \ + autoconf \ + libxml2-dev \ + libboost-dev \ + libboost-program-options-dev \ + libboost-date-time-dev \ + libboost-filesystem-dev \ + libboost-thread-dev \ + libboost-iostreams-dev \ + libosmpbf-dev \ + osmpbf-bin \ + libprotobuf-dev \ + pkg-config \ + git \ + && git clone https://github.com/zerebubuth/planet-dump-ng.git /opt/planet-dump-ng \ + && cd /opt/planet-dump-ng \ + && ./autogen.sh \ + && ./configure \ + && make \ + && ln -s /opt/planet-dump-ng/planet-dump-ng /usr/local/bin/planet-dump-ng \ && rm -rf /var/lib/apt/lists/* \ No newline at end of file From ee57c16be0cae03522fb30f998ab9714c84d85b5 Mon Sep 17 00:00:00 2001 From: Rub21 Date: Mon, 30 Jun 2025 15:04:48 -0500 Subject: [PATCH 2/7] Update scripts and continaer for planet-dump-ng --- compose/db-backup-restore.yml | 3 +- images/osm-processor/Dockerfile | 5 +- images/planet-dump/start.sh | 114 ++++++++++++++++++-------------- osm-seed/values.yaml | 1 + 4 files changed, 71 insertions(+), 52 deletions(-) diff --git a/compose/db-backup-restore.yml b/compose/db-backup-restore.yml index 982e94ad..31080903 100644 --- a/compose/db-backup-restore.yml +++ b/compose/db-backup-restore.yml @@ -1,10 +1,9 @@ -version: '3' services: ##################################################### ## OSM Database backup and restore section ##################################################### db-backup-restore: - image: osmseed-backup-restore:v1 + image: rub21/osmseed-backup-restore:v1 build: context: ../images/backup-restore dockerfile: Dockerfile diff --git a/images/osm-processor/Dockerfile b/images/osm-processor/Dockerfile index 57b8dd29..62ed99be 100644 --- a/images/osm-processor/Dockerfile +++ b/images/osm-processor/Dockerfile @@ -42,10 +42,11 @@ RUN set -ex \ libprotobuf-dev \ pkg-config \ git \ - && git clone https://github.com/zerebubuth/planet-dump-ng.git /opt/planet-dump-ng \ + && git clone -b timestamp_epoch https://github.com/OpenHistoricalMap/planet-dump-ng.git /opt/planet-dump-ng \ && cd /opt/planet-dump-ng \ && ./autogen.sh \ && ./configure \ && make \ && ln -s /opt/planet-dump-ng/planet-dump-ng /usr/local/bin/planet-dump-ng \ - && rm -rf /var/lib/apt/lists/* \ No newline at end of file + && rm -rf /var/lib/apt/lists/* + \ No newline at end of file diff --git a/images/planet-dump/start.sh b/images/planet-dump/start.sh index 03e47e4f..7218f206 100755 --- a/images/planet-dump/start.sh +++ b/images/planet-dump/start.sh @@ -1,6 +1,5 @@ #!/usr/bin/env bash set -e -export VOLUME_DIR=/mnt/data # osmosis tuning: https://wiki.openstreetmap.org/wiki/Osmosis/Tuning,https://lists.openstreetmap.org/pipermail/talk/2012-October/064771.html if [ -z "$MEMORY_JAVACMD_OPTIONS" ]; then @@ -10,64 +9,83 @@ else echo JAVACMD_OPTIONS=\"-server -Xmx$memory\" >~/.osmosis fi -# Read the DB and create the planet osm file +export VOLUME_DIR=/mnt/data date=$(date '+%y%m%d_%H%M') + local_planetPBFFile=$VOLUME_DIR/planet-${date}.osm.pbf cloud_planetPBFFile=planet/planet-${date}.osm.pbf +stateFile="$VOLUME_DIR/state.txt" -# In case overwrite the file +# If overwrite flag is enabled, use fixed filenames if [ "$OVERWRITE_PLANET_FILE" == "true" ]; then local_planetPBFFile=$VOLUME_DIR/planet-latest.osm.pbf cloud_planetPBFFile=planet/planet-latest.osm.pbf fi -stateFile="$VOLUME_DIR/state.txt" +# =============================== +# Download db .dump file +# =============================== +download_dump_file() { + local_dumpFile="$VOLUME_DIR/input-latest.dump" + echo "Downloading db .dump file from cloud..." -# Creating the replication file -osmosis --read-apidb \ - host=$POSTGRES_HOST \ - database=$POSTGRES_DB \ - user=$POSTGRES_USER \ - password=$POSTGRES_PASSWORD \ - validateSchemaVersion=no \ - --write-pbf \ - file=$local_planetPBFFile + if [ "$CLOUDPROVIDER" == "aws" ]; then + aws s3 cp "$DUMP_CLOUD_URL" "$local_dumpFile" + elif [ "$CLOUDPROVIDER" == "gcp" ]; then + gsutil cp "$DUMP_CLOUD_URL" "$local_dumpFile" + fi +} -# AWS -if [ $CLOUDPROVIDER == "aws" ]; then - # Save the path file - AWS_URL=${AWS_S3_BUCKET/s3:\/\//http:\/\/} - echo "$AWS_URL.s3.amazonaws.com/$cloud_planetPBFFile" > $stateFile - # Upload planet.osm.pbf file to s3 - aws s3 cp $local_planetPBFFile $AWS_S3_BUCKET/$cloud_planetPBFFile --acl public-read - # Upload state.txt file to s3 - aws s3 cp $stateFile $AWS_S3_BUCKET/planet/state.txt --acl public-read -fi +# =============================== +# Upload planet + state +# =============================== +upload_planet_file() { + echo "Uploading planet file and updating state.txt..." -# gcp -if [ $CLOUDPROVIDER == "gcp" ]; then - # Save the path file - echo "https://storage.cloud.google.com/$GCP_STORAGE_BUCKET/$cloud_planetPBFFile" > $stateFile - # Upload planet.osm.pbf file to cloud storage - gsutil cp -a public-read $local_planetPBFFile $GCP_STORAGE_BUCKET/$cloud_planetPBFFile - # Upload state.txt file to cloud storage - gsutil cp -a public-read $stateFile $GCP_STORAGE_BUCKET/planet/state.txt -fi + if [ "$CLOUDPROVIDER" == "aws" ]; then + AWS_URL=${AWS_S3_BUCKET/s3:\/\//http:\/\/} + echo "$AWS_URL.s3.amazonaws.com/$cloud_planetPBFFile" > "$stateFile" + aws s3 cp "$local_planetPBFFile" "$AWS_S3_BUCKET/$cloud_planetPBFFile" --acl public-read + aws s3 cp "$stateFile" "$AWS_S3_BUCKET/planet/state.txt" --acl public-read + + elif [ "$CLOUDPROVIDER" == "gcp" ]; then + echo "https://storage.cloud.google.com/$GCP_STORAGE_BUCKET/$cloud_planetPBFFile" > "$stateFile" + gsutil cp -a public-read "$local_planetPBFFile" "$GCP_STORAGE_BUCKET/$cloud_planetPBFFile" + gsutil cp -a public-read "$stateFile" "$GCP_STORAGE_BUCKET/planet/state.txt" + fi +} + +# =============================== +# Generate planet file +# =============================== -# Azure -if [ $CLOUDPROVIDER == "azure" ]; then - # Save the path file - echo "https://$AZURE_STORAGE_ACCOUNT.blob.core.windows.net/$AZURE_CONTAINER_NAME/$cloud_planetPBFFile" > $stateFile - # Upload planet.osm.pbf file to blob storage - az storage blob upload \ - --container-name $AZURE_CONTAINER_NAME \ - --file $local_planetPBFFile \ - --name $cloud_planetPBFFile \ - --output table - # Upload state.txt file to blob storage - az storage blob upload \ - --container-name $AZURE_CONTAINER_NAME \ - --file $stateFile \ - --name planet/state.txt \ - --output table +if [ "$PLANET_EXPORT_METHOD" == "planet-dump-ng" ]; then + download_dump_file + echo "Generating planet file with planet-dump-ng..." + planet-dump-ng \ + --dump-file "$VOLUME_DIR/input-latest.dump" \ + --pbf "$local_planetPBFFile" +elif [ "$PLANET_EXPORT_METHOD" == "osmosis" ]; then + echo "Generating planet file with osmosis..." + if [ -z "$MEMORY_JAVACMD_OPTIONS" ]; then + echo JAVACMD_OPTIONS=\"-server\" > ~/.osmosis + else + memory="${MEMORY_JAVACMD_OPTIONS//i/}" + echo JAVACMD_OPTIONS=\"-server -Xmx$memory\" > ~/.osmosis + fi + + osmosis --read-apidb \ + host=$POSTGRES_HOST \ + database=$POSTGRES_DB \ + user=$POSTGRES_USER \ + password=$POSTGRES_PASSWORD \ + validateSchemaVersion=no \ + --write-pbf \ + file=$local_planetPBFFile +else + echo "Error: Unknown PLANET_EXPORT_METHOD value. Use 'planet-dump-ng' or 'osmosis'." + exit 1 fi + +# Upload results +upload_planet_file diff --git a/osm-seed/values.yaml b/osm-seed/values.yaml index 36337ade..46de79a6 100644 --- a/osm-seed/values.yaml +++ b/osm-seed/values.yaml @@ -342,6 +342,7 @@ planetDump: schedule: "* * * * *" env: OVERWRITE_PLANET_FILE: false + DUMP_CLOUD_URL : s3://osm-seed/db.dump resources: enabled: false requests: From 1bcf371a2cbc11f9abc368efcff04173ec3db330 Mon Sep 17 00:00:00 2001 From: Rub21 Date: Mon, 30 Jun 2025 15:42:37 -0500 Subject: [PATCH 3/7] Update docker base image for planet dump and history --- images/full-history/Dockerfile | 4 +- images/full-history/start.sh | 139 ++++++++++-------- images/planet-dump/Dockerfile | 2 +- images/planet-dump/start.sh | 17 ++- osm-seed/templates/jobs/full-history-job.yaml | 6 + osm-seed/templates/jobs/planet-dump-job.yaml | 6 + osm-seed/values.yaml | 3 + 7 files changed, 107 insertions(+), 70 deletions(-) diff --git a/images/full-history/Dockerfile b/images/full-history/Dockerfile index ef89f12c..e459c334 100644 --- a/images/full-history/Dockerfile +++ b/images/full-history/Dockerfile @@ -1,5 +1,5 @@ -FROM developmentseed/osmseed-osm-processor:0.1.0-n802.h0d9f574 +FROM developmentseed/osmseed-osm-processor:0.1.0-0.dev.git.962.hee57c16 VOLUME /mnt/data COPY ./start.sh / -CMD /start.sh \ No newline at end of file +CMD /start.sh diff --git a/images/full-history/start.sh b/images/full-history/start.sh index c6636881..533ea641 100755 --- a/images/full-history/start.sh +++ b/images/full-history/start.sh @@ -1,82 +1,97 @@ #!/usr/bin/env bash set -e -export VOLUME_DIR=/mnt/data # osmosis tuning: https://wiki.openstreetmap.org/wiki/Osmosis/Tuning,https://lists.openstreetmap.org/pipermail/talk/2012-October/064771.html if [ -z "$MEMORY_JAVACMD_OPTIONS" ]; then - echo JAVACMD_OPTIONS=\"-server\" >~/.osmosis + echo JAVACMD_OPTIONS="-server" >~/.osmosis else memory="${MEMORY_JAVACMD_OPTIONS//i/}" - echo JAVACMD_OPTIONS=\"-server -Xmx$memory\" >~/.osmosis + echo JAVACMD_OPTIONS="-server -Xmx$memory" >~/.osmosis fi -# Fixing name for historical file +export VOLUME_DIR=/mnt/data +export PLANET_EPOCH_DATE="${PLANET_EPOCH_DATE:-2004-01-01}" date=$(date '+%y%m%d_%H%M') -local_fullHistoryFile=$VOLUME_DIR/history-${date}.osh.pbf -cloud_fullHistoryFile=planet/full-history/history-${date}.osh.pbf -# In case overwrite the file -if [ "$OVERWRITE_FHISTORY_FILE" == "true" ]; then - local_fullHistoryFile=$VOLUME_DIR/history-latest.osh.pbf - cloud_fullHistoryFile=planet/full-history/history-latest.osh.pbf +local_planetPBFFile=$VOLUME_DIR/planet-history-${date}.osm.pbf +cloud_planetPBFFile=planet/planet-history-${date}.osm.pbf +stateFile="$VOLUME_DIR/state.txt" +dumpFile="$VOLUME_DIR/input-latest.dump" + + +# If overwrite flag is enabled, use fixed filenames +if [ "$OVERWRITE_PLANET_FILE" == "true" ]; then + local_planetPBFFile=$VOLUME_DIR/planet-history-latest.osm.pbf + cloud_planetPBFFile=planet/planet-history-latest.osm.pbf fi -# State file nname -stateFile="$VOLUME_DIR/state.txt" -osm_tmp_file="osm_tmp.osm" +# =============================== +# Download db .dump file +# =============================== +download_dump_file() { + echo "Downloading db .dump file from cloud..." + if [ "$CLOUDPROVIDER" == "aws" ]; then + if [[ "$DUMP_CLOUD_URL" == *.txt ]]; then + temp_txt="$VOLUME_DIR/tmp_dump_url.txt" + aws s3 cp "$DUMP_CLOUD_URL" "$temp_txt" + first_line=$(head -n 1 "$temp_txt") + aws s3 cp "$first_line" "$dumpFile" + else + aws s3 cp "$DUMP_CLOUD_URL" "$dumpFile" + fi + elif [ "$CLOUDPROVIDER" == "gcp" ]; then + gsutil cp "$DUMP_CLOUD_URL" "$dumpFile" + fi +} -# Creating full history -osmosis --read-apidb-change \ - host=$POSTGRES_HOST \ - database=$POSTGRES_DB \ - user=$POSTGRES_USER \ - password=$POSTGRES_PASSWORD \ - validateSchemaVersion=no \ - readFullHistory=yes \ - --write-xml-change \ - compressionMethod=auto \ - $osm_tmp_file +# =============================== +# Upload planet + state +# =============================== +upload_planet_file() { + echo "Uploading history planet file and updating state.txt..." -# Convert file to PBF file -osmium cat $osm_tmp_file -o $local_fullHistoryFile -osmium fileinfo $local_fullHistoryFile + if [ "$CLOUDPROVIDER" == "aws" ]; then + AWS_URL=${AWS_S3_BUCKET/s3:\/\//http:\/\/} + echo "$AWS_URL.s3.amazonaws.com/$cloud_planetPBFFile" > "$stateFile" + aws s3 cp "$local_planetPBFFile" "$AWS_S3_BUCKET/$cloud_planetPBFFile" --acl public-read + aws s3 cp "$stateFile" "$AWS_S3_BUCKET/planet/state.txt" --acl public-read -# Remove full-hitory osm file, keep only history-latest.osh.pbf files -rm $osm_tmp_file + elif [ "$CLOUDPROVIDER" == "gcp" ]; then + echo "https://storage.cloud.google.com/$GCP_STORAGE_BUCKET/$cloud_planetPBFFile" > "$stateFile" + gsutil cp -a public-read "$local_planetPBFFile" "$GCP_STORAGE_BUCKET/$cloud_planetPBFFile" + gsutil cp -a public-read "$stateFile" "$GCP_STORAGE_BUCKET/planet/state.txt" + fi +} -# AWS -if [ $CLOUDPROVIDER == "aws" ]; then - AWS_URL=${AWS_S3_BUCKET/s3:\/\//http:\/\/} - echo "$AWS_URL.s3.amazonaws.com/$cloud_fullHistoryFile" >$stateFile - # Upload history-planet.osm.pbf - aws s3 cp $local_fullHistoryFile $AWS_S3_BUCKET/$cloud_fullHistoryFile --acl public-read - # Upload state.txt - aws s3 cp $stateFile $AWS_S3_BUCKET/planet/full-history/state.txt --acl public-read -fi +# =============================== +# Generate planet file +# =============================== -# Google Storage -if [ $CLOUDPROVIDER == "gcp" ]; then - echo "https://storage.cloud.google.com/$GCP_STORAGE_BUCKET/$cloud_fullHistoryFile" >$stateFile - # Upload history-planet.osm.pbf - gsutil cp -a public-read $local_fullHistoryFile $GCP_STORAGE_BUCKET/$cloud_fullHistoryFile - # Upload state.txt - gsutil cp -a public-read $stateFile $GCP_STORAGE_BUCKET/planet/full-history/state.txt -fi +if [ "$PLANET_EXPORT_METHOD" == "planet-dump-ng" ]; then + download_dump_file + echo "Generating history planet file with planet-dump-ng..." + export PLANET_EPOCH_DATE="$PLANET_EPOCH_DATE" + planet-dump-ng \ + --dump-file "$dumpFile" \ + --history-pbf "$local_planetPBFFile" -# Azure -if [ $CLOUDPROVIDER == "azure" ]; then - # Save the path file - echo "https://$AZURE_STORAGE_ACCOUNT.blob.core.windows.net/$AZURE_CONTAINER_NAME/$cloud_fullHistoryFile" >$stateFile - # Upload history-planet.osm.pbf - az storage blob upload \ - --container-name $AZURE_CONTAINER_NAME \ - --file $local_fullHistoryFile \ - --name $cloud_fullHistoryFile \ - --output table - # Upload state.txt - az storage blob upload \ - --container-name $AZURE_CONTAINER_NAME \ - --file $stateFile \ - --name planet/full-history/state.txt \ - --output table +elif [ "$PLANET_EXPORT_METHOD" == "osmosis" ]; then + echo "Generating history planet file with osmosis..." + # Creating full history + osmosis --read-apidb-change \ + host=$POSTGRES_HOST \ + database=$POSTGRES_DB \ + user=$POSTGRES_USER \ + password=$POSTGRES_PASSWORD \ + validateSchemaVersion=no \ + readFullHistory=yes \ + --write-xml-change \ + compressionMethod=auto \ + $local_planetPBFFile +else + echo "Error: Unknown PLANET_EXPORT_METHOD value. Use 'planet-dump-ng' or 'osmosis'." + exit 1 fi + +# Upload results +upload_planet_file diff --git a/images/planet-dump/Dockerfile b/images/planet-dump/Dockerfile index ef89f12c..5abb17ac 100644 --- a/images/planet-dump/Dockerfile +++ b/images/planet-dump/Dockerfile @@ -1,4 +1,4 @@ -FROM developmentseed/osmseed-osm-processor:0.1.0-n802.h0d9f574 +FROM developmentseed/osmseed-osm-processor:0.1.0-0.dev.git.962.hee57c16 VOLUME /mnt/data COPY ./start.sh / diff --git a/images/planet-dump/start.sh b/images/planet-dump/start.sh index 7218f206..498a6d30 100755 --- a/images/planet-dump/start.sh +++ b/images/planet-dump/start.sh @@ -15,6 +15,7 @@ date=$(date '+%y%m%d_%H%M') local_planetPBFFile=$VOLUME_DIR/planet-${date}.osm.pbf cloud_planetPBFFile=planet/planet-${date}.osm.pbf stateFile="$VOLUME_DIR/state.txt" +dumpFile="$VOLUME_DIR/input-latest.dump" # If overwrite flag is enabled, use fixed filenames if [ "$OVERWRITE_PLANET_FILE" == "true" ]; then @@ -26,16 +27,22 @@ fi # Download db .dump file # =============================== download_dump_file() { - local_dumpFile="$VOLUME_DIR/input-latest.dump" echo "Downloading db .dump file from cloud..." - if [ "$CLOUDPROVIDER" == "aws" ]; then - aws s3 cp "$DUMP_CLOUD_URL" "$local_dumpFile" + if [[ "$DUMP_CLOUD_URL" == *.txt ]]; then + temp_txt="$VOLUME_DIR/tmp_dump_url.txt" + aws s3 cp "$DUMP_CLOUD_URL" "$temp_txt" + first_line=$(head -n 1 "$temp_txt") + aws s3 cp "$first_line" "$dumpFile" + else + aws s3 cp "$DUMP_CLOUD_URL" "$dumpFile" + fi elif [ "$CLOUDPROVIDER" == "gcp" ]; then - gsutil cp "$DUMP_CLOUD_URL" "$local_dumpFile" + gsutil cp "$DUMP_CLOUD_URL" "$dumpFile" fi } + # =============================== # Upload planet + state # =============================== @@ -63,7 +70,7 @@ if [ "$PLANET_EXPORT_METHOD" == "planet-dump-ng" ]; then download_dump_file echo "Generating planet file with planet-dump-ng..." planet-dump-ng \ - --dump-file "$VOLUME_DIR/input-latest.dump" \ + --dump-file "$dumpFile" \ --pbf "$local_planetPBFFile" elif [ "$PLANET_EXPORT_METHOD" == "osmosis" ]; then echo "Generating planet file with osmosis..." diff --git a/osm-seed/templates/jobs/full-history-job.yaml b/osm-seed/templates/jobs/full-history-job.yaml index bc526087..87c7353a 100644 --- a/osm-seed/templates/jobs/full-history-job.yaml +++ b/osm-seed/templates/jobs/full-history-job.yaml @@ -45,6 +45,12 @@ spec: value: {{ .Values.fullHistory.env.OVERWRITE_FHISTORY_FILE | quote}} - name: CLOUDPROVIDER value: {{ .Values.cloudProvider }} + - name: PLANET_EXPORT_METHOD + value: {{ .Values.fullHistory.env.PLANET_EXPORT_METHOD | quote}} + - name: DUMP_CLOUD_URL + value: {{ .Values.fullHistory.env.DUMP_CLOUD_URL | quote}} + - name: PLANET_EPOCH_DATE + value: {{ .Values.fullHistory.env.PLANET_EPOCH_DATE | quote}} # In case cloudProvider=aws {{- if eq .Values.cloudProvider "aws" }} - name: AWS_S3_BUCKET diff --git a/osm-seed/templates/jobs/planet-dump-job.yaml b/osm-seed/templates/jobs/planet-dump-job.yaml index 832eb1b0..e0bef29a 100644 --- a/osm-seed/templates/jobs/planet-dump-job.yaml +++ b/osm-seed/templates/jobs/planet-dump-job.yaml @@ -49,6 +49,12 @@ spec: value: {{ .Values.cloudProvider }} - name: OVERWRITE_PLANET_FILE value: {{ .Values.planetDump.env.OVERWRITE_PLANET_FILE | quote}} + - name: PLANET_EXPORT_METHOD + value: {{ .Values.planetDump.env.PLANET_EXPORT_METHOD | quote}} + - name: DUMP_CLOUD_URL + value: {{ .Values.planetDump.env.DUMP_CLOUD_URL | quote}} + - name: PLANET_EPOCH_DATE + value: {{ .Values.planetDump.env.PLANET_EPOCH_DATE | quote}} # In case cloudProvider=aws {{- if eq .Values.cloudProvider "aws" }} - name: AWS_S3_BUCKET diff --git a/osm-seed/values.yaml b/osm-seed/values.yaml index 46de79a6..ec944474 100644 --- a/osm-seed/values.yaml +++ b/osm-seed/values.yaml @@ -260,6 +260,8 @@ fullHistory: schedule: "* * * * *" env: OVERWRITE_FHISTORY_FILE: false + PLANET_EXPORT_METHOD: osmosis + DUMP_CLOUD_URL: s3://osm-seed/db.dump resources: enabled: false requests: @@ -343,6 +345,7 @@ planetDump: env: OVERWRITE_PLANET_FILE: false DUMP_CLOUD_URL : s3://osm-seed/db.dump + PLANET_EXPORT_METHOD: osmosis resources: enabled: false requests: From 5e9b11bf3f9ea61b48461b257f19470e2888035c Mon Sep 17 00:00:00 2001 From: Rub21 Date: Tue, 1 Jul 2025 11:32:13 -0500 Subject: [PATCH 4/7] Update planet-dump-ng version --- images/full-history/start.sh | 20 +++++----- images/osm-processor/Dockerfile | 65 ++++++++++++++++++--------------- 2 files changed, 46 insertions(+), 39 deletions(-) diff --git a/images/full-history/start.sh b/images/full-history/start.sh index 533ea641..2a655b10 100755 --- a/images/full-history/start.sh +++ b/images/full-history/start.sh @@ -13,16 +13,16 @@ export VOLUME_DIR=/mnt/data export PLANET_EPOCH_DATE="${PLANET_EPOCH_DATE:-2004-01-01}" date=$(date '+%y%m%d_%H%M') -local_planetPBFFile=$VOLUME_DIR/planet-history-${date}.osm.pbf -cloud_planetPBFFile=planet/planet-history-${date}.osm.pbf +local_planetHistoryPBFFile=$VOLUME_DIR/planet-history-${date}.osm.pbf +cloud_planetHistoryPBFFile=planet/full-history/planet-history-${date}.osm.pbf stateFile="$VOLUME_DIR/state.txt" dumpFile="$VOLUME_DIR/input-latest.dump" # If overwrite flag is enabled, use fixed filenames if [ "$OVERWRITE_PLANET_FILE" == "true" ]; then - local_planetPBFFile=$VOLUME_DIR/planet-history-latest.osm.pbf - cloud_planetPBFFile=planet/planet-history-latest.osm.pbf + local_planetHistoryPBFFile=$VOLUME_DIR/planet-history-latest.osm.pbf + cloud_planetHistoryPBFFile=planet/planet-history-latest.osm.pbf fi # =============================== @@ -52,13 +52,13 @@ upload_planet_file() { if [ "$CLOUDPROVIDER" == "aws" ]; then AWS_URL=${AWS_S3_BUCKET/s3:\/\//http:\/\/} - echo "$AWS_URL.s3.amazonaws.com/$cloud_planetPBFFile" > "$stateFile" - aws s3 cp "$local_planetPBFFile" "$AWS_S3_BUCKET/$cloud_planetPBFFile" --acl public-read + echo "$AWS_URL.s3.amazonaws.com/$cloud_planetHistoryPBFFile" > "$stateFile" + aws s3 cp "$local_planetHistoryPBFFile" "$AWS_S3_BUCKET/$cloud_planetHistoryPBFFile" --acl public-read aws s3 cp "$stateFile" "$AWS_S3_BUCKET/planet/state.txt" --acl public-read elif [ "$CLOUDPROVIDER" == "gcp" ]; then - echo "https://storage.cloud.google.com/$GCP_STORAGE_BUCKET/$cloud_planetPBFFile" > "$stateFile" - gsutil cp -a public-read "$local_planetPBFFile" "$GCP_STORAGE_BUCKET/$cloud_planetPBFFile" + echo "https://storage.cloud.google.com/$GCP_STORAGE_BUCKET/$cloud_planetHistoryPBFFile" > "$stateFile" + gsutil cp -a public-read "$local_planetHistoryPBFFile" "$GCP_STORAGE_BUCKET/$cloud_planetHistoryPBFFile" gsutil cp -a public-read "$stateFile" "$GCP_STORAGE_BUCKET/planet/state.txt" fi } @@ -73,7 +73,7 @@ if [ "$PLANET_EXPORT_METHOD" == "planet-dump-ng" ]; then export PLANET_EPOCH_DATE="$PLANET_EPOCH_DATE" planet-dump-ng \ --dump-file "$dumpFile" \ - --history-pbf "$local_planetPBFFile" + --history-pbf "$local_planetHistoryPBFFile" elif [ "$PLANET_EXPORT_METHOD" == "osmosis" ]; then echo "Generating history planet file with osmosis..." @@ -87,7 +87,7 @@ elif [ "$PLANET_EXPORT_METHOD" == "osmosis" ]; then readFullHistory=yes \ --write-xml-change \ compressionMethod=auto \ - $local_planetPBFFile + $local_planetHistoryPBFFile else echo "Error: Unknown PLANET_EXPORT_METHOD value. Use 'planet-dump-ng' or 'osmosis'." exit 1 diff --git a/images/osm-processor/Dockerfile b/images/osm-processor/Dockerfile index 62ed99be..041b01be 100644 --- a/images/osm-processor/Dockerfile +++ b/images/osm-processor/Dockerfile @@ -1,32 +1,11 @@ -FROM debian:bookworm-slim -ENV workdir /mnt/data -WORKDIR $workdir - -# Installs osmosis v0.48.3, osmium-tool v1.15.0, and PostgreSQL client -RUN set -ex \ - && apt-get update \ - && DEBIAN_FRONTEND=noninteractive apt-get install \ - -y --no-install-recommends \ - osmosis \ - osmium-tool \ - # Cloud provider CLIs - awscli \ - gsutil \ - azure-cli \ - # PostgreSQL client - postgresql-client \ - # Other useful packages - rsync \ - pyosmium \ - tmux \ - zsh \ - git \ - && rm -rf /var/lib/apt/lists/* +# Stage 1: builder +FROM debian:bookworm-slim AS builder +WORKDIR /opt/planet-dump-ng -# Install planet-dump-ng RUN set -ex \ && apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + ca-certificates \ build-essential \ automake \ autoconf \ @@ -42,11 +21,39 @@ RUN set -ex \ libprotobuf-dev \ pkg-config \ git \ - && git clone -b timestamp_epoch https://github.com/OpenHistoricalMap/planet-dump-ng.git /opt/planet-dump-ng \ - && cd /opt/planet-dump-ng \ + && git clone -b planet_epoch_date https://github.com/OpenHistoricalMap/planet-dump-ng.git . \ && ./autogen.sh \ && ./configure \ && make \ - && ln -s /opt/planet-dump-ng/planet-dump-ng /usr/local/bin/planet-dump-ng \ + && strip planet-dump-ng + +FROM debian:bookworm-slim +ENV workdir /mnt/data +WORKDIR $workdir + +RUN set -ex \ + && apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + osmosis \ + osmium-tool \ + awscli \ + gsutil \ + azure-cli \ + postgresql-client \ + rsync \ + pyosmium \ + tmux \ + zsh \ + git \ + libxml2 \ + libboost-filesystem1.74.0 \ + libboost-program-options1.74.0 \ + libboost-thread1.74.0 \ + libboost-iostreams1.74.0 \ + libboost-date-time1.74.0 \ + libprotobuf32 \ + libprotobuf-lite32 \ + libosmpbf1 \ && rm -rf /var/lib/apt/lists/* - \ No newline at end of file + +COPY --from=builder /opt/planet-dump-ng/planet-dump-ng /usr/local/bin/planet-dump-ng \ No newline at end of file From 13993b8361f979e5192d5ea0e4578b9f938e8fc7 Mon Sep 17 00:00:00 2001 From: Rub21 Date: Tue, 1 Jul 2025 12:34:47 -0500 Subject: [PATCH 5/7] Update docker base image for planet dump --- images/full-history/Dockerfile | 2 +- images/planet-dump/Dockerfile | 2 +- osm-seed/values.yaml | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/images/full-history/Dockerfile b/images/full-history/Dockerfile index e459c334..7f2875c6 100644 --- a/images/full-history/Dockerfile +++ b/images/full-history/Dockerfile @@ -1,4 +1,4 @@ -FROM developmentseed/osmseed-osm-processor:0.1.0-0.dev.git.962.hee57c16 +FROM developmentseed/osmseed-osm-processor:0.1.0-0.dev.git.964.h5e9b11b VOLUME /mnt/data COPY ./start.sh / diff --git a/images/planet-dump/Dockerfile b/images/planet-dump/Dockerfile index 5abb17ac..7b6c91aa 100644 --- a/images/planet-dump/Dockerfile +++ b/images/planet-dump/Dockerfile @@ -1,4 +1,4 @@ -FROM developmentseed/osmseed-osm-processor:0.1.0-0.dev.git.962.hee57c16 +FROM developmentseed/osmseed-osm-processor:0.1.0-0.dev.git.964.h5e9b11b VOLUME /mnt/data COPY ./start.sh / diff --git a/osm-seed/values.yaml b/osm-seed/values.yaml index ec944474..37a447d2 100644 --- a/osm-seed/values.yaml +++ b/osm-seed/values.yaml @@ -262,6 +262,7 @@ fullHistory: OVERWRITE_FHISTORY_FILE: false PLANET_EXPORT_METHOD: osmosis DUMP_CLOUD_URL: s3://osm-seed/db.dump + PLANET_EPOCH_DATE: '1970-01-01' resources: enabled: false requests: From 7eb533b3754480236f9d91b304d9327b20d04d70 Mon Sep 17 00:00:00 2001 From: Rub21 Date: Mon, 7 Jul 2025 17:43:30 -0500 Subject: [PATCH 6/7] Compress dump files to reduce size fo the files --- images/backup-restore/start.sh | 27 ++++++++--------- images/planet-dump/start.sh | 53 +++++++++++++++++++++++++--------- 2 files changed, 54 insertions(+), 26 deletions(-) diff --git a/images/backup-restore/start.sh b/images/backup-restore/start.sh index c23bc2e5..9d749ddd 100755 --- a/images/backup-restore/start.sh +++ b/images/backup-restore/start.sh @@ -35,21 +35,22 @@ cloudStorageOps() { } backupDB() { - local LOCAL_BACKUP_FILE=${BACKUP_CLOUD_FILE}.dump - local CLOUD_BACKUP_FILE="${BACKUP_CLOUD_FOLDER}/${BACKUP_CLOUD_FILE}.dump" - if [ "$SET_DATE_AT_NAME" == "true" ]; then - local CURRENT_DATE=$(date '+%Y%m%d-%H%M') - LOCAL_BACKUP_FILE="${BACKUP_CLOUD_FILE}-${CURRENT_DATE}.dump" - CLOUD_BACKUP_FILE="${BACKUP_CLOUD_FOLDER}/${BACKUP_CLOUD_FILE}-${CURRENT_DATE}.dump" - fi + local LOCAL_BACKUP_FILE="${BACKUP_CLOUD_FILE}.dump" + local LOCAL_BACKUP_FILE_GZIP="${BACKUP_CLOUD_FILE}.dump.gz" + local CLOUD_BACKUP_FILE="${BACKUP_CLOUD_FOLDER}/${BACKUP_CLOUD_FILE}.dump.gz" - # Backup database with max compression - echo "Backing up DB ${POSTGRES_DB} into ${LOCAL_BACKUP_FILE}" - # pg_dump -h ${POSTGRES_HOST} -U ${POSTGRES_USER} ${POSTGRES_DB} | gzip -9 >${LOCAL_BACKUP_FILE} - pg_dump -h ${POSTGRES_HOST} -U ${POSTGRES_USER} -Fc -f ${LOCAL_BACKUP_FILE} ${POSTGRES_DB} + if [ "$SET_DATE_AT_NAME" == "true" ]; then + local CURRENT_DATE + CURRENT_DATE=$(date '+%Y%m%d-%H%M') + LOCAL_BACKUP_FILE="${BACKUP_CLOUD_FILE}-${CURRENT_DATE}.dump" + LOCAL_BACKUP_FILE_GZIP="${BACKUP_CLOUD_FILE}-${CURRENT_DATE}.dump.gz" + CLOUD_BACKUP_FILE="${BACKUP_CLOUD_FOLDER}/${BACKUP_CLOUD_FILE}-${CURRENT_DATE}.dump.gz" + fi - # Handle cloud storage based on the provider - cloudStorageOps "${LOCAL_BACKUP_FILE}" "${CLOUD_BACKUP_FILE}" + # Backup database with pg_dump custom format (-Fc) + gzip + echo "Backing up DB ${POSTGRES_DB} into ${LOCAL_BACKUP_FILE_GZIP}" + pg_dump -h "${POSTGRES_HOST}" -U "${POSTGRES_USER}" -Fc "${POSTGRES_DB}" | gzip -9 > "${LOCAL_BACKUP_FILE}.gz" + cloudStorageOps "${LOCAL_BACKUP_FILE_GZIP}" "${CLOUD_BACKUP_FILE}" } restoreDB() { diff --git a/images/planet-dump/start.sh b/images/planet-dump/start.sh index 498a6d30..2142ffd4 100755 --- a/images/planet-dump/start.sh +++ b/images/planet-dump/start.sh @@ -23,23 +23,50 @@ if [ "$OVERWRITE_PLANET_FILE" == "true" ]; then cloud_planetPBFFile=planet/planet-latest.osm.pbf fi +# =============================== +# Download db .dump file +# =============================== # =============================== # Download db .dump file # =============================== download_dump_file() { - echo "Downloading db .dump file from cloud..." - if [ "$CLOUDPROVIDER" == "aws" ]; then - if [[ "$DUMP_CLOUD_URL" == *.txt ]]; then - temp_txt="$VOLUME_DIR/tmp_dump_url.txt" - aws s3 cp "$DUMP_CLOUD_URL" "$temp_txt" - first_line=$(head -n 1 "$temp_txt") - aws s3 cp "$first_line" "$dumpFile" - else - aws s3 cp "$DUMP_CLOUD_URL" "$dumpFile" - fi - elif [ "$CLOUDPROVIDER" == "gcp" ]; then - gsutil cp "$DUMP_CLOUD_URL" "$dumpFile" - fi + echo "Downloading db .dump file from cloud..." + if [ "$CLOUDPROVIDER" == "aws" ]; then + if [[ "$DUMP_CLOUD_URL" == *.txt ]]; then + # Download the .txt file containing the URL + temp_txt="$VOLUME_DIR/tmp_dump_url.txt" + aws s3 cp "$DUMP_CLOUD_URL" "$temp_txt" + + # Get the first line (S3 URL to the .dump or .dump.gz file) + first_line=$(head -n 1 "$temp_txt") + echo "Found dump URL in txt: $first_line" + + aws s3 cp "$first_line" "$dumpFile" + + # Check if it's compressed (.gz) and decompress + if [[ "$first_line" == *.gz ]]; then + echo "Decompressing gzip file..." + gunzip -f "$dumpFile" + dumpFile="${dumpFile%.gz}" + fi + else + aws s3 cp "$DUMP_CLOUD_URL" "$dumpFile" + # If it's compressed, decompress + if [[ "$DUMP_CLOUD_URL" == *.gz ]]; then + echo "Decompressing gzip file..." + gunzip -f "$dumpFile" + dumpFile="${dumpFile%.gz}" + fi + fi + + elif [ "$CLOUDPROVIDER" == "gcp" ]; then + gsutil cp "$DUMP_CLOUD_URL" "$dumpFile" + else + echo "Unsupported CLOUDPROVIDER: $CLOUDPROVIDER" + exit 1 + fi + + echo "Dump file ready at: $dumpFile" } From 0a631897f34e87414d0839a9747e3529a78c27bb Mon Sep 17 00:00:00 2001 From: Rub21 Date: Mon, 7 Jul 2025 18:33:43 -0500 Subject: [PATCH 7/7] Fix download dump function to support gz files --- images/full-history/start.sh | 59 ++++++++++++++++++++++++++++-------- images/planet-dump/start.sh | 24 +++++++++------ 2 files changed, 61 insertions(+), 22 deletions(-) diff --git a/images/full-history/start.sh b/images/full-history/start.sh index 2a655b10..acd82174 100755 --- a/images/full-history/start.sh +++ b/images/full-history/start.sh @@ -25,23 +25,56 @@ if [ "$OVERWRITE_PLANET_FILE" == "true" ]; then cloud_planetHistoryPBFFile=planet/planet-history-latest.osm.pbf fi + # =============================== # Download db .dump file # =============================== download_dump_file() { - echo "Downloading db .dump file from cloud..." - if [ "$CLOUDPROVIDER" == "aws" ]; then - if [[ "$DUMP_CLOUD_URL" == *.txt ]]; then - temp_txt="$VOLUME_DIR/tmp_dump_url.txt" - aws s3 cp "$DUMP_CLOUD_URL" "$temp_txt" - first_line=$(head -n 1 "$temp_txt") - aws s3 cp "$first_line" "$dumpFile" - else - aws s3 cp "$DUMP_CLOUD_URL" "$dumpFile" - fi - elif [ "$CLOUDPROVIDER" == "gcp" ]; then - gsutil cp "$DUMP_CLOUD_URL" "$dumpFile" - fi + echo "Downloading db .dump file from cloud..." + + if [ "$CLOUDPROVIDER" == "aws" ]; then + if [[ "$DUMP_CLOUD_URL" == *.txt ]]; then + temp_txt="$VOLUME_DIR/tmp_dump_url.txt" + aws s3 cp "$DUMP_CLOUD_URL" "$temp_txt" + + # Get the first line (S3 URL to the .dump or .dump.gz file) + first_line=$(head -n 1 "$temp_txt") + echo "Found dump URL in txt: $first_line" + + # Set dump file name based on extension + if [[ "$first_line" == *.gz ]]; then + dumpFile="${dumpFile}.gz" + fi + + aws s3 cp "$first_line" "$dumpFile" + if [[ "$dumpFile" == *.gz ]]; then + echo "Decompressing gzip file..." + gunzip -f "$dumpFile" + dumpFile="${dumpFile%.gz}" + fi + rm -f "$temp_txt" + + else + # Set dump file name based on extension + if [[ "$DUMP_CLOUD_URL" == *.gz ]]; then + dumpFile="${dumpFile}.gz" + fi + aws s3 cp "$DUMP_CLOUD_URL" "$dumpFile" + if [[ "$dumpFile" == *.gz ]]; then + echo "Decompressing gzip file..." + gunzip -f "$dumpFile" + dumpFile="${dumpFile%.gz}" + fi + fi + + elif [ "$CLOUDPROVIDER" == "gcp" ]; then + gsutil cp "$DUMP_CLOUD_URL" "$dumpFile" + else + echo "Unsupported CLOUDPROVIDER: $CLOUDPROVIDER" + exit 1 + fi + + echo "Dump file ready at: $dumpFile" } # =============================== diff --git a/images/planet-dump/start.sh b/images/planet-dump/start.sh index 2142ffd4..f48085d8 100755 --- a/images/planet-dump/start.sh +++ b/images/planet-dump/start.sh @@ -23,17 +23,15 @@ if [ "$OVERWRITE_PLANET_FILE" == "true" ]; then cloud_planetPBFFile=planet/planet-latest.osm.pbf fi -# =============================== -# Download db .dump file -# =============================== + # =============================== # Download db .dump file # =============================== download_dump_file() { echo "Downloading db .dump file from cloud..." + if [ "$CLOUDPROVIDER" == "aws" ]; then if [[ "$DUMP_CLOUD_URL" == *.txt ]]; then - # Download the .txt file containing the URL temp_txt="$VOLUME_DIR/tmp_dump_url.txt" aws s3 cp "$DUMP_CLOUD_URL" "$temp_txt" @@ -41,18 +39,26 @@ download_dump_file() { first_line=$(head -n 1 "$temp_txt") echo "Found dump URL in txt: $first_line" - aws s3 cp "$first_line" "$dumpFile" - - # Check if it's compressed (.gz) and decompress + # Set dump file name based on extension if [[ "$first_line" == *.gz ]]; then + dumpFile="${dumpFile}.gz" + fi + + aws s3 cp "$first_line" "$dumpFile" + if [[ "$dumpFile" == *.gz ]]; then echo "Decompressing gzip file..." gunzip -f "$dumpFile" dumpFile="${dumpFile%.gz}" fi + rm -f "$temp_txt" + else - aws s3 cp "$DUMP_CLOUD_URL" "$dumpFile" - # If it's compressed, decompress + # Set dump file name based on extension if [[ "$DUMP_CLOUD_URL" == *.gz ]]; then + dumpFile="${dumpFile}.gz" + fi + aws s3 cp "$DUMP_CLOUD_URL" "$dumpFile" + if [[ "$dumpFile" == *.gz ]]; then echo "Decompressing gzip file..." gunzip -f "$dumpFile" dumpFile="${dumpFile%.gz}"