Skip to content

Commit

Permalink
chimera: drop nlink ref count for directory tags
Browse files Browse the repository at this point in the history
Motivation:
To remove unused directory tags chimera keeps ref count (nlink) of tags.
This approach creates a 'hot' record that serializes all updates to
given top level tag. However, a simple query by tagId can be used to
find out are there any other references.

Modification:
Use conditional DELETE with query on t_tags. Drop nlink ref count
tracing.

Result:
Better directory creation throughout (x3).

64 threads creating sub-directories in different directories (to avoid
hot parent inode) that inherite the same tag.

Benchmark                                (ref)   Mode  Cnt    Score    Error  Units
CreateBenchmark.benchmarkCreateDir       nlink  thrpt   21  220.353 ± 41.578  ops/s
CreateBenchmark.benchmarkCreateDir    new code  thrpt   25  653.666 ± 36.616  ops/s

Acked-by: Paul Millar
Target: master
Require-book: no
Require-notes: yes
  • Loading branch information
kofemann committed Aug 3, 2023
1 parent 2acfaf8 commit ff86d14
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 137 deletions.
25 changes: 5 additions & 20 deletions modules/chimera/src/main/java/org/dcache/chimera/FsSqlDriver.java
Expand Up @@ -1291,23 +1291,14 @@ int pushTag(FsInode dir, String tagName) throws FileNotFoundChimeraFsException {
_jdbc.update("INSERT INTO t_tags (inumber,itagid,isorign,itagname) VALUES (?, ?, 0, ?)",
id, tagid, tagName);
});

_jdbc.update("UPDATE t_tags_inodes SET inlink = inlink + ? WHERE itagid=?", subtrees.size(),
tagid);
return subtrees.size();
}

void incTagNlink(long tagId) {
_jdbc.update("UPDATE t_tags_inodes SET inlink = inlink + 1 WHERE itagid=?", tagId);
}

void decTagNlinkOrRemove(long tagId) {
// shortcut: delete right away, if there is only one reference left
int n = _jdbc.update("DELETE FROM t_tags_inodes WHERE itagid=? AND inlink = 1", tagId);
// if delete didn't happen, then just indicate that one reference in gone
if (n == 0) {
_jdbc.update("UPDATE t_tags_inodes SET inlink = inlink - 1 WHERE itagid=?", tagId);
}
// delete tag record, if not referenced anymore
_jdbc.update(
"DELETE FROM t_tags_inodes WHERE itagid = ? AND NOT EXISTS (SELECT inumber FROM t_tags WHERE itagid=? LIMIT 1)",
tagId, tagId);
}

void removeTag(FsInode dir, String tag) {
Expand Down Expand Up @@ -1443,15 +1434,9 @@ void createTags(FsInode inode, int uid, int gid, int mode, Map<String, byte[]> t
* @param destination
*/
void copyTags(FsInode orign, FsInode destination) {
int n = _jdbc.update(
_jdbc.update(
"INSERT INTO t_tags (inumber,itagid,isorign,itagname) (SELECT ?,itagid,0,itagname from t_tags WHERE inumber=?)",
destination.ino(), orign.ino());
if (n > 0) {
// if tags was copied, then bump the reference counts.
_jdbc.update(
"UPDATE t_tags_inodes SET inlink = inlink + 1 WHERE itagid IN (SELECT itagid from t_tags where inumber=?)",
destination.ino());
}
}

void setTagOwner(FsInode_TAG tagInode, int newOwner) throws FileNotFoundChimeraFsException {
Expand Down
Expand Up @@ -151,26 +151,6 @@ long createTagInode(int uid, int gid, int mode, byte[] value) {
return (Long) keyHolder.getKey();
}

/**
* copy all directory tags from origin directory to destination. New copy marked as inherited.
*
* @param orign
* @param destination
*/
@Override
void copyTags(FsInode orign, FsInode destination) {
int n = _jdbc.update(
"INSERT INTO t_tags (inumber,itagid,isorign,itagname) (SELECT " + destination.ino()
+ ",itagid,0,itagname from t_tags WHERE inumber=?)",
orign.ino());
if (n > 0) {
// if tags was copied, then bump the reference counts.
_jdbc.update(
"UPDATE t_tags_inodes SET inlink = inlink + 1 WHERE itagid IN (SELECT itagid from t_tags where inumber=?)",
destination.ino());
}
}

@Override
void copyAcl(FsInode source, FsInode inode, RsType type, EnumSet<AceFlags> mask,
EnumSet<AceFlags> flags) {
Expand Down
Expand Up @@ -17,7 +17,6 @@
package org.dcache.chimera;

import java.util.EnumSet;
import java.util.List;
import javax.sql.DataSource;
import org.dcache.acl.enums.AceFlags;
import org.dcache.acl.enums.RsType;
Expand All @@ -28,49 +27,6 @@ public HsqlDBFsSqlDriver(DataSource dataSource) throws ChimeraFsException {
super(dataSource);
}

@Override
void removeTag(FsInode dir) {
/* Get the tag IDs of the tag links to be removed.
*/
List<String> ids = _jdbc.queryForList("SELECT itagid FROM t_tags WHERE inumber=?",
String.class, dir.ino());
if (!ids.isEmpty()) {
/* Remove the links.
*/
_jdbc.update("DELETE FROM t_tags WHERE inumber=?", dir.ino());

/* Remove any tag inode of of the tag links removed above, which are
* not referenced by any other links either.
*
* We ought to maintain the link count in the inode, but Chimera has
* not done so in the past. In the interest of avoiding costly schema
* corrections in patch level releases, the current solution queries
* for the existence of other links instead.
*
* The statement below relies on concurrent transactions not deleting
* other links to affected tag inodes. Otherwise we could come into a
* situation in which two concurrent transactions remove two links to
* the same inode, yet none of them realize that the inode is left
* without links (as there is another link).
*
* One way to ensure this would be to use repeatable read transaction
* isolation, but PostgreSQL doesn't support changing the isolation level
* in the middle of a transaction. Always running any operation that
* might call this method with repeatable read was deemed unacceptable.
* Another solution would be to lock the tag inode at the beginning of
* this method using SELECT FOR UPDATE. This would be fairly expensive
* way of solving this race.
*
* For now we decide to ignore the race: It seems unlikely to run into
* and even if one does, the consequence is merely an orphaned inode.
*/
_jdbc.batchUpdate("DELETE FROM t_tags_inodes i WHERE itagid = ? " +
"AND NOT EXISTS (SELECT 1 FROM t_tags t WHERE t.itagid=i.itagid LIMIT 1)",
ids, ids.size(),
(ps, tagid) -> ps.setString(1, tagid));
}
}

@Override
void copyAcl(FsInode source, FsInode inode, RsType type, EnumSet<AceFlags> mask,
EnumSet<AceFlags> flags) {
Expand Down
Expand Up @@ -402,54 +402,6 @@ void addLabel(FsInode inode, String labelname) throws ChimeraFsException {

}

@Override
void copyTags(FsInode orign, FsInode destination) {
_jdbc.queryForList(
"INSERT INTO t_tags (inumber,itagid,isorign,itagname) (SELECT ?,itagid,0,itagname FROM t_tags WHERE inumber=?) RETURNING itagid",
Long.class, destination.ino(), orign.ino()).
forEach(tagId -> {
_jdbc.update("UPDATE t_tags_inodes SET inlink = inlink + 1 WHERE itagid=?",
tagId);
});
}

@Override
void removeTag(FsInode dir) {
_jdbc.queryForList("DELETE FROM t_tags WHERE inumber=? RETURNING itagid", Long.class,
dir.ino())
.forEach(tagId -> {
// shortcut: delete right away, if there is only one reference left
int n = _jdbc.update("DELETE FROM t_tags_inodes WHERE itagid=? AND inlink = 1",
tagId);
// if delete didn't happen, then just indicate that one reference in gone
if (n == 0) {
_jdbc.update("UPDATE t_tags_inodes SET inlink = inlink - 1 WHERE itagid=?",
tagId);
}
});
}

@Override
void removeTag(FsInode dir, String tag) {

Long tagId = _jdbc.query("DELETE FROM t_tags WHERE inumber=? AND itagname=? RETURNING *",
ps -> {
ps.setLong(1, dir.ino());
ps.setString(2, tag);
},
(ResultSet rs) -> rs.next() ? rs.getLong("itagid") : null);

// TODO: explore a possibility to perform DELETE+UPDATE with single query
if (tagId != null) {
// shortcut: delete right away, if there is only one reference left
int n = _jdbc.update("DELETE FROM t_tags_inodes WHERE itagid=? AND inlink = 1", tagId);
// if delete didn't happen, then just indicate that one reference in gone
if (n == 0) {
_jdbc.update("UPDATE t_tags_inodes SET inlink = inlink - 1 WHERE itagid=?", tagId);
}
}
}

@Override
void setStorageInfo(FsInode inode, InodeStorageInformation storageInfo) {
_jdbc.update("INSERT INTO t_storageinfo VALUES (?,?,?,?) " +
Expand Down
52 changes: 47 additions & 5 deletions modules/chimera/src/test/java/org/dcache/chimera/JdbcFsTest.java
Expand Up @@ -967,6 +967,53 @@ public void testNameTooMoveLong() throws Exception {
_fs.rename(inode, _rootInode, "testNameTooMoveLong", _rootInode, tooLongName);
}

@Test
public void testTagPropagation() throws ChimeraFsException {

var tagName = "aTag";

_fs.createTag(_rootInode, tagName);
FsInode tagInode = new FsInode_TAG(_fs, _rootInode.ino(), tagName);
byte[] data = "data".getBytes(UTF_8);
tagInode.write(0, data, 0, data.length);

var dir = _fs.mkdir(_rootInode, "dir.0", 0, 0, 0755);
_fs.statTag(dir, tagName);
}

@Test(expected = FileNotFoundChimeraFsException.class)
public void testStatMissingTag() throws ChimeraFsException {
var dir = _fs.mkdir(_rootInode, "dir.0", 0, 0, 0755);
_fs.statTag(dir, "aTag");
}

@Test
public void testTagDisposal() throws ChimeraFsException, SQLException {

var tagName = "aTag";

var dir = _fs.mkdir(_rootInode, "dir.0", 0, 0, 0755);
_fs.createTag(dir, tagName);

FsInode tagInode = new FsInode_TAG(_fs, dir.ino(), tagName);
byte[] data = "data".getBytes(UTF_8);
tagInode.write(0, data, 0, data.length);

long tagId = tagInode.stat().getIno();

try (var conn = _dataSource.getConnection()) {
var found = conn.createStatement().executeQuery("SELECT * FROM t_tags_inodes WHERE itagid="+tagId).next();
assertTrue("tag inodes is not populated with a new entry", found);
}

_fs.remove(_rootInode, "dir.0", dir);

try (var conn = _dataSource.getConnection()) {
var found = conn.createStatement().executeQuery("SELECT * FROM t_tags_inodes WHERE itagid="+tagId).next();
assertFalse("tag is not disposed on last reference removal", found);
}
}

@Test
public void testChangeTagOwner() throws Exception {

Expand Down Expand Up @@ -1229,12 +1276,7 @@ public void testTagDeletionOnDirectoryRemove() throws ChimeraFsException, SQLExc
assertEquals("Tag ref count mismatch", 1, tagInode.stat().getNlink());

FsInode sub = top.mkdir("sub");
assertEquals("Tag ref count not incremented on create", 2, tagInode.stat().getNlink());

_fs.remove(top, "sub", sub);
assertEquals("Tag ref count decremented on remove", 1, tagInode.stat().getNlink());

// remove last reference
_fs.remove(_rootInode, "top", top);

// as we don't have a way to access tags, use direct SQL
Expand Down

0 comments on commit ff86d14

Please sign in to comment.