From b41d7d2d7daa0b604876fe60f6c28f4cbf8b1f29 Mon Sep 17 00:00:00 2001 From: dataroaring <98214048+dataroaring@users.noreply.github.com> Date: Mon, 25 Apr 2022 17:01:01 +0800 Subject: [PATCH] =?UTF-8?q?avoiding=20a=20corrupt=20image=20file=20when=20?= =?UTF-8?q?there=20is=20image.ckpt=20with=20non-zero=20=E2=80=A6=20(#9180)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * avoiding a corrupt image file when there is image.ckpt with non-zero size For now, saveImage writes data to image.ckpt via an append FileOutputStream, when there is a non-zero size file named image.ckpt, a disaster would happen due to a corrupt image file. Even worse, fe only keeps the lastest image file and removes others. BTW, image file should be synced to disk. It is dangerous to only keep the latest image file, because an image file is validated when generating the next image file. Then we keep an non validated image file but remove validated ones. So I will issue a pr which keeps at least 2 image file. * append other data after MetaHeader * use channel.force instead of sync --- .../src/main/java/org/apache/doris/catalog/Catalog.java | 9 +++++++-- .../main/java/org/apache/doris/common/MetaFooter.java | 1 + .../main/java/org/apache/doris/common/MetaHeader.java | 5 +++++ .../main/java/org/apache/doris/common/MetaWriter.java | 8 +++++--- 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java index a670dd2aebf67b..86d6763b241d61 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java @@ -1966,8 +1966,13 @@ public void saveImage() throws IOException { } public void saveImage(File curFile, long replayedJournalId) throws IOException { - if (!curFile.exists()) { - curFile.createNewFile(); + if (curFile.exists()) { + if (!curFile.delete()) { + throw new IOException(curFile.getName() + " can not be deleted."); + } + } + if (!curFile.createNewFile()) { + throw new IOException(curFile.getName() + " can not be created."); } MetaWriter.write(curFile, this); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/MetaFooter.java b/fe/fe-core/src/main/java/org/apache/doris/common/MetaFooter.java index 9df82aa9b79394..426cd7b3d8215f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/MetaFooter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/MetaFooter.java @@ -98,6 +98,7 @@ public static void write(File imageFile, List metaIndices, long check long endIndex = raf.length(); raf.writeLong(endIndex - startIndex); MetaMagicNumber.write(raf); + raf.getChannel.force(true); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/MetaHeader.java b/fe/fe-core/src/main/java/org/apache/doris/common/MetaHeader.java index 5617a854483174..ba91b04b5df8fa 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/MetaHeader.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/MetaHeader.java @@ -75,10 +75,15 @@ public static MetaHeader read(File imageFile) throws IOException { } public static long write(File imageFile) throws IOException { + if (imageFile.length() != 0) { + throw new IOException("Meta header has to be written to an empty file."); + } + try (RandomAccessFile raf = new RandomAccessFile(imageFile, "rw")) { raf.seek(0); MetaMagicNumber.write(raf); MetaJsonHeader.write(raf); + raf.getChannel.force(true); return raf.getFilePointer(); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/MetaWriter.java b/fe/fe-core/src/main/java/org/apache/doris/common/MetaWriter.java index 6b9d9aa79106ea..387a50be3dd201 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/MetaWriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/MetaWriter.java @@ -94,14 +94,15 @@ public long doWork(String name, WriteMethod method) throws IOException { public static void write(File imageFile, Catalog catalog) throws IOException { // save image does not need any lock. because only checkpoint thread will call this method. - LOG.info("start save image to {}. is ckpt: {}", imageFile.getAbsolutePath(), Catalog.isCheckpointThread()); - + LOG.info("start to save image to {}. is ckpt: {}", imageFile.getAbsolutePath(), Catalog.isCheckpointThread()); final Reference checksum = new Reference<>(0L); long saveImageStartTime = System.currentTimeMillis(); + // MetaHeader should use output stream in the future. long startPosition = MetaHeader.write(imageFile); List metaIndices = Lists.newArrayList(); + FileOutputStream imageFileOut = new FileOutputStream(imageFile, true); try (CountingDataOutputStream dos = new CountingDataOutputStream(new BufferedOutputStream( - new FileOutputStream(imageFile, true)), startPosition)) { + imageFileOut), startPosition)) { writer.setDelegate(dos, metaIndices); long replayedJournalId = catalog.getReplayedJournalId(); checksum.setRef(writer.doWork("header", () -> catalog.saveHeader(dos, replayedJournalId, checksum.getRef()))); @@ -128,6 +129,7 @@ public static void write(File imageFile, Catalog catalog) throws IOException { checksum.setRef(writer.doWork("plugins", () -> catalog.savePlugins(dos, checksum.getRef()))); checksum.setRef(writer.doWork("deleteHandler", () -> catalog.saveDeleteHandler(dos, checksum.getRef()))); checksum.setRef(writer.doWork("sqlBlockRule", () -> catalog.saveSqlBlockRule(dos, checksum.getRef()))); + imageFileOut.getChannel().force(true); } MetaFooter.write(imageFile, metaIndices, checksum.getRef());