From ff86d145513999b4c689d41b01d7f687c04b58c1 Mon Sep 17 00:00:00 2001 From: Tigran Mkrtchyan Date: Tue, 7 Mar 2023 22:28:29 +0100 Subject: [PATCH] chimera: drop nlink ref count for directory tags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Motivation: To remove unused directory tags chimera keeps ref count (nlink) of tags. This approach creates a 'hot' record that serializes all updates to given top level tag. However, a simple query by tagId can be used to find out are there any other references. Modification: Use conditional DELETE with query on t_tags. Drop nlink ref count tracing. Result: Better directory creation throughout (x3). 64 threads creating sub-directories in different directories (to avoid hot parent inode) that inherite the same tag. Benchmark (ref) Mode Cnt Score Error Units CreateBenchmark.benchmarkCreateDir nlink thrpt 21 220.353 ± 41.578 ops/s CreateBenchmark.benchmarkCreateDir new code thrpt 25 653.666 ± 36.616 ops/s Acked-by: Paul Millar Target: master Require-book: no Require-notes: yes --- .../java/org/dcache/chimera/FsSqlDriver.java | 25 ++------- .../org/dcache/chimera/H2FsSqlDriver.java | 20 ------- .../org/dcache/chimera/HsqlDBFsSqlDriver.java | 44 ---------------- .../dcache/chimera/PgSQL95FsSqlDriver.java | 48 ----------------- .../java/org/dcache/chimera/JdbcFsTest.java | 52 +++++++++++++++++-- 5 files changed, 52 insertions(+), 137 deletions(-) diff --git a/modules/chimera/src/main/java/org/dcache/chimera/FsSqlDriver.java b/modules/chimera/src/main/java/org/dcache/chimera/FsSqlDriver.java index 40fd70d821d..5a7085aaa9c 100644 --- a/modules/chimera/src/main/java/org/dcache/chimera/FsSqlDriver.java +++ b/modules/chimera/src/main/java/org/dcache/chimera/FsSqlDriver.java @@ -1291,23 +1291,14 @@ int pushTag(FsInode dir, String tagName) throws FileNotFoundChimeraFsException { _jdbc.update("INSERT INTO t_tags (inumber,itagid,isorign,itagname) VALUES (?, ?, 0, ?)", id, tagid, tagName); }); - - _jdbc.update("UPDATE t_tags_inodes SET inlink = inlink + ? WHERE itagid=?", subtrees.size(), - tagid); return subtrees.size(); } - void incTagNlink(long tagId) { - _jdbc.update("UPDATE t_tags_inodes SET inlink = inlink + 1 WHERE itagid=?", tagId); - } - void decTagNlinkOrRemove(long tagId) { - // shortcut: delete right away, if there is only one reference left - int n = _jdbc.update("DELETE FROM t_tags_inodes WHERE itagid=? AND inlink = 1", tagId); - // if delete didn't happen, then just indicate that one reference in gone - if (n == 0) { - _jdbc.update("UPDATE t_tags_inodes SET inlink = inlink - 1 WHERE itagid=?", tagId); - } + // delete tag record, if not referenced anymore + _jdbc.update( + "DELETE FROM t_tags_inodes WHERE itagid = ? AND NOT EXISTS (SELECT inumber FROM t_tags WHERE itagid=? LIMIT 1)", + tagId, tagId); } void removeTag(FsInode dir, String tag) { @@ -1443,15 +1434,9 @@ void createTags(FsInode inode, int uid, int gid, int mode, Map t * @param destination */ void copyTags(FsInode orign, FsInode destination) { - int n = _jdbc.update( + _jdbc.update( "INSERT INTO t_tags (inumber,itagid,isorign,itagname) (SELECT ?,itagid,0,itagname from t_tags WHERE inumber=?)", destination.ino(), orign.ino()); - if (n > 0) { - // if tags was copied, then bump the reference counts. - _jdbc.update( - "UPDATE t_tags_inodes SET inlink = inlink + 1 WHERE itagid IN (SELECT itagid from t_tags where inumber=?)", - destination.ino()); - } } void setTagOwner(FsInode_TAG tagInode, int newOwner) throws FileNotFoundChimeraFsException { diff --git a/modules/chimera/src/main/java/org/dcache/chimera/H2FsSqlDriver.java b/modules/chimera/src/main/java/org/dcache/chimera/H2FsSqlDriver.java index c1ed2575120..826923dfcba 100644 --- a/modules/chimera/src/main/java/org/dcache/chimera/H2FsSqlDriver.java +++ b/modules/chimera/src/main/java/org/dcache/chimera/H2FsSqlDriver.java @@ -151,26 +151,6 @@ long createTagInode(int uid, int gid, int mode, byte[] value) { return (Long) keyHolder.getKey(); } - /** - * copy all directory tags from origin directory to destination. New copy marked as inherited. - * - * @param orign - * @param destination - */ - @Override - void copyTags(FsInode orign, FsInode destination) { - int n = _jdbc.update( - "INSERT INTO t_tags (inumber,itagid,isorign,itagname) (SELECT " + destination.ino() - + ",itagid,0,itagname from t_tags WHERE inumber=?)", - orign.ino()); - if (n > 0) { - // if tags was copied, then bump the reference counts. - _jdbc.update( - "UPDATE t_tags_inodes SET inlink = inlink + 1 WHERE itagid IN (SELECT itagid from t_tags where inumber=?)", - destination.ino()); - } - } - @Override void copyAcl(FsInode source, FsInode inode, RsType type, EnumSet mask, EnumSet flags) { diff --git a/modules/chimera/src/main/java/org/dcache/chimera/HsqlDBFsSqlDriver.java b/modules/chimera/src/main/java/org/dcache/chimera/HsqlDBFsSqlDriver.java index 8e2eb938165..7c066df3c16 100644 --- a/modules/chimera/src/main/java/org/dcache/chimera/HsqlDBFsSqlDriver.java +++ b/modules/chimera/src/main/java/org/dcache/chimera/HsqlDBFsSqlDriver.java @@ -17,7 +17,6 @@ package org.dcache.chimera; import java.util.EnumSet; -import java.util.List; import javax.sql.DataSource; import org.dcache.acl.enums.AceFlags; import org.dcache.acl.enums.RsType; @@ -28,49 +27,6 @@ public HsqlDBFsSqlDriver(DataSource dataSource) throws ChimeraFsException { super(dataSource); } - @Override - void removeTag(FsInode dir) { - /* Get the tag IDs of the tag links to be removed. - */ - List ids = _jdbc.queryForList("SELECT itagid FROM t_tags WHERE inumber=?", - String.class, dir.ino()); - if (!ids.isEmpty()) { - /* Remove the links. - */ - _jdbc.update("DELETE FROM t_tags WHERE inumber=?", dir.ino()); - - /* Remove any tag inode of of the tag links removed above, which are - * not referenced by any other links either. - * - * We ought to maintain the link count in the inode, but Chimera has - * not done so in the past. In the interest of avoiding costly schema - * corrections in patch level releases, the current solution queries - * for the existence of other links instead. - * - * The statement below relies on concurrent transactions not deleting - * other links to affected tag inodes. Otherwise we could come into a - * situation in which two concurrent transactions remove two links to - * the same inode, yet none of them realize that the inode is left - * without links (as there is another link). - * - * One way to ensure this would be to use repeatable read transaction - * isolation, but PostgreSQL doesn't support changing the isolation level - * in the middle of a transaction. Always running any operation that - * might call this method with repeatable read was deemed unacceptable. - * Another solution would be to lock the tag inode at the beginning of - * this method using SELECT FOR UPDATE. This would be fairly expensive - * way of solving this race. - * - * For now we decide to ignore the race: It seems unlikely to run into - * and even if one does, the consequence is merely an orphaned inode. - */ - _jdbc.batchUpdate("DELETE FROM t_tags_inodes i WHERE itagid = ? " + - "AND NOT EXISTS (SELECT 1 FROM t_tags t WHERE t.itagid=i.itagid LIMIT 1)", - ids, ids.size(), - (ps, tagid) -> ps.setString(1, tagid)); - } - } - @Override void copyAcl(FsInode source, FsInode inode, RsType type, EnumSet mask, EnumSet flags) { diff --git a/modules/chimera/src/main/java/org/dcache/chimera/PgSQL95FsSqlDriver.java b/modules/chimera/src/main/java/org/dcache/chimera/PgSQL95FsSqlDriver.java index 8b14cc94dab..bab3e0fecff 100644 --- a/modules/chimera/src/main/java/org/dcache/chimera/PgSQL95FsSqlDriver.java +++ b/modules/chimera/src/main/java/org/dcache/chimera/PgSQL95FsSqlDriver.java @@ -402,54 +402,6 @@ void addLabel(FsInode inode, String labelname) throws ChimeraFsException { } - @Override - void copyTags(FsInode orign, FsInode destination) { - _jdbc.queryForList( - "INSERT INTO t_tags (inumber,itagid,isorign,itagname) (SELECT ?,itagid,0,itagname FROM t_tags WHERE inumber=?) RETURNING itagid", - Long.class, destination.ino(), orign.ino()). - forEach(tagId -> { - _jdbc.update("UPDATE t_tags_inodes SET inlink = inlink + 1 WHERE itagid=?", - tagId); - }); - } - - @Override - void removeTag(FsInode dir) { - _jdbc.queryForList("DELETE FROM t_tags WHERE inumber=? RETURNING itagid", Long.class, - dir.ino()) - .forEach(tagId -> { - // shortcut: delete right away, if there is only one reference left - int n = _jdbc.update("DELETE FROM t_tags_inodes WHERE itagid=? AND inlink = 1", - tagId); - // if delete didn't happen, then just indicate that one reference in gone - if (n == 0) { - _jdbc.update("UPDATE t_tags_inodes SET inlink = inlink - 1 WHERE itagid=?", - tagId); - } - }); - } - - @Override - void removeTag(FsInode dir, String tag) { - - Long tagId = _jdbc.query("DELETE FROM t_tags WHERE inumber=? AND itagname=? RETURNING *", - ps -> { - ps.setLong(1, dir.ino()); - ps.setString(2, tag); - }, - (ResultSet rs) -> rs.next() ? rs.getLong("itagid") : null); - - // TODO: explore a possibility to perform DELETE+UPDATE with single query - if (tagId != null) { - // shortcut: delete right away, if there is only one reference left - int n = _jdbc.update("DELETE FROM t_tags_inodes WHERE itagid=? AND inlink = 1", tagId); - // if delete didn't happen, then just indicate that one reference in gone - if (n == 0) { - _jdbc.update("UPDATE t_tags_inodes SET inlink = inlink - 1 WHERE itagid=?", tagId); - } - } - } - @Override void setStorageInfo(FsInode inode, InodeStorageInformation storageInfo) { _jdbc.update("INSERT INTO t_storageinfo VALUES (?,?,?,?) " + diff --git a/modules/chimera/src/test/java/org/dcache/chimera/JdbcFsTest.java b/modules/chimera/src/test/java/org/dcache/chimera/JdbcFsTest.java index 6c52171fdec..96d26a66268 100644 --- a/modules/chimera/src/test/java/org/dcache/chimera/JdbcFsTest.java +++ b/modules/chimera/src/test/java/org/dcache/chimera/JdbcFsTest.java @@ -967,6 +967,53 @@ public void testNameTooMoveLong() throws Exception { _fs.rename(inode, _rootInode, "testNameTooMoveLong", _rootInode, tooLongName); } + @Test + public void testTagPropagation() throws ChimeraFsException { + + var tagName = "aTag"; + + _fs.createTag(_rootInode, tagName); + FsInode tagInode = new FsInode_TAG(_fs, _rootInode.ino(), tagName); + byte[] data = "data".getBytes(UTF_8); + tagInode.write(0, data, 0, data.length); + + var dir = _fs.mkdir(_rootInode, "dir.0", 0, 0, 0755); + _fs.statTag(dir, tagName); + } + + @Test(expected = FileNotFoundChimeraFsException.class) + public void testStatMissingTag() throws ChimeraFsException { + var dir = _fs.mkdir(_rootInode, "dir.0", 0, 0, 0755); + _fs.statTag(dir, "aTag"); + } + + @Test + public void testTagDisposal() throws ChimeraFsException, SQLException { + + var tagName = "aTag"; + + var dir = _fs.mkdir(_rootInode, "dir.0", 0, 0, 0755); + _fs.createTag(dir, tagName); + + FsInode tagInode = new FsInode_TAG(_fs, dir.ino(), tagName); + byte[] data = "data".getBytes(UTF_8); + tagInode.write(0, data, 0, data.length); + + long tagId = tagInode.stat().getIno(); + + try (var conn = _dataSource.getConnection()) { + var found = conn.createStatement().executeQuery("SELECT * FROM t_tags_inodes WHERE itagid="+tagId).next(); + assertTrue("tag inodes is not populated with a new entry", found); + } + + _fs.remove(_rootInode, "dir.0", dir); + + try (var conn = _dataSource.getConnection()) { + var found = conn.createStatement().executeQuery("SELECT * FROM t_tags_inodes WHERE itagid="+tagId).next(); + assertFalse("tag is not disposed on last reference removal", found); + } + } + @Test public void testChangeTagOwner() throws Exception { @@ -1229,12 +1276,7 @@ public void testTagDeletionOnDirectoryRemove() throws ChimeraFsException, SQLExc assertEquals("Tag ref count mismatch", 1, tagInode.stat().getNlink()); FsInode sub = top.mkdir("sub"); - assertEquals("Tag ref count not incremented on create", 2, tagInode.stat().getNlink()); - _fs.remove(top, "sub", sub); - assertEquals("Tag ref count decremented on remove", 1, tagInode.stat().getNlink()); - - // remove last reference _fs.remove(_rootInode, "top", top); // as we don't have a way to access tags, use direct SQL