Skip to content

Commit

Permalink
pool: Refactor handling of replica size in the repository
Browse files Browse the repository at this point in the history
Motivation:

The MetaDataRecord implementations cache the replica file size in memory. The
cached value is filled during pool startup. During upload the replica file size
changes and thus the write handler sets a file size repeatedly. This breaks
isolation as the write handler assumes the replica by an actual file.

Modification:

Make the replica size a truly cached value that cannot be updated from the
outside of the meta data store. The meta data record implementations use the
entry state to determine whether the replica file size may change or not.
Upon moving the entry to a non-uploading state, the size is cached.

This affects the replica file size reported by `rep ls` during upload -
previously the pre-allocated size was reported, while now the actual size on
disk is reported.  Previously, the reported size could be larger than the final
file size as pre-allocation may overallocate.

Renamed the getSize method to getReplicaSize to make it clear that this isn't
the logical file size as stored in Chimera.

Result:

Reported file size during uploads now reflects actual file size rather than
preallocated file size.

Target: trunk
Require-notes: no
Require-book: no
Acked-by: Paul Millar <paul.millar@desy.de>
Acked-by: Tigran Mkrtchyan <tigran.mkrtchyan@desy.de>

Reviewed at https://rb.dcache.org/r/9447/
  • Loading branch information
gbehrmann committed Jun 28, 2016
1 parent 4a7a21d commit 79138be
Show file tree
Hide file tree
Showing 9 changed files with 54 additions and 89 deletions.
Expand Up @@ -188,7 +188,7 @@ private boolean isBroken(MetaDataRecord entry) throws CacheException
FileAttributes attributes = entry.getFileAttributes();
EntryState state = entry.getState();
return !attributes.isDefined(FileAttribute.SIZE) ||
attributes.getSize() != entry.getSize() ||
attributes.getSize() != entry.getReplicaSize() ||
state != EntryState.CACHED && state != EntryState.PRECIOUS;
}

Expand All @@ -208,7 +208,7 @@ private MetaDataRecord rebuildEntry(MetaDataRecord entry)
* as it is a lot cheaper than the checksum check and we
* may thus safe some time for incomplete files.
*/
long length = entry.getDataFile().length();
long length = entry.getReplicaSize();
if (attributesInNameSpace.isDefined(FileAttribute.SIZE) && attributesInNameSpace.getSize() != length) {
String message = String.format(BAD_SIZE_MSG,
id,
Expand Down
Expand Up @@ -2,13 +2,25 @@

public enum EntryState
{
NEW,
FROM_CLIENT,
FROM_POOL,
FROM_STORE,
BROKEN,
CACHED,
PRECIOUS,
REMOVED,
DESTROYED
NEW(true),
FROM_CLIENT(true),
FROM_POOL(true),
FROM_STORE(true),
BROKEN(false),
CACHED(false),
PRECIOUS(false),
REMOVED(false),
DESTROYED(false);

boolean isMutable;

EntryState(boolean isMutable)
{
this.isMutable = isMutable;
}

public boolean isMutable()
{
return isMutable;
}
}
Expand Up @@ -171,24 +171,10 @@ public PnfsId getPnfsId()
}

@Override
public void setSize(long size)
public long getReplicaSize()
{
try {
_record.setSize(size);
} catch (IllegalArgumentException e) {
throw e;
} catch (RuntimeException e) {
_faultListener.faultOccurred(
new FaultEvent("repository", FaultAction.DEAD, "Internal repository error", e));
throw e;
}
}

@Override
public long getSize()
{
try {
return _record.getSize();
return _record.getReplicaSize();
} catch (RuntimeException e) {
_faultListener.faultOccurred(
new FaultEvent("repository", FaultAction.DEAD, "Internal repository error", e));
Expand Down
Expand Up @@ -22,25 +22,10 @@ public interface MetaDataRecord
PnfsId getPnfsId();

/**
* Set the size of this entry. An entry has a size which normally
* corresponds to the size of the file on disk. While the file is
* created there may be a mismatch between the entry size and the
* physical size.
*
* For a healthy entry and complete, the entry size will match the
* file size stored in PNFS. For broken entries or while the file
* is created, the two may not match.
*
* The size stored in the entries StorageInfo record is a cached
* copy of the size stored in PNFS.
*/
void setSize(long size);

/**
* Get the size of this entry. May be different from the size of
* the on-disk file.
* Get the size of this replica. May be different from the size
* registered in Chimera.
*/
long getSize();
long getReplicaSize();

FileAttributes getFileAttributes() throws CacheException;

Expand Down
Expand Up @@ -31,6 +31,7 @@

import static com.google.common.collect.Iterables.elementsEqual;
import static com.google.common.collect.Iterables.filter;
import static org.dcache.pool.repository.EntryState.*;

/**
* Berkeley DB aware implementation of CacheRepositoryEntry interface.
Expand Down Expand Up @@ -66,7 +67,7 @@ public CacheRepositoryEntryImpl(BerkeleyDBMetaDataRepository repository, PnfsId
{
_repository = repository;
_pnfsId = pnfsId;
_state = EntryState.NEW;
_state = NEW;
_sticky = ImmutableList.of();
_lastAccess = _creationTime;
}
Expand Down Expand Up @@ -101,7 +102,7 @@ public synchronized int decrementLinkCount()
public synchronized int incrementLinkCount()
{
EntryState state = getState();
if (state == EntryState.REMOVED || state == EntryState.DESTROYED) {
if (state == REMOVED || state == DESTROYED) {
throw new IllegalStateException("Entry is marked as removed");
}
_linkCount++;
Expand Down Expand Up @@ -142,18 +143,9 @@ public synchronized void setLastAccessTime(long time) throws CacheException
}

@Override
public synchronized void setSize(long size)
public synchronized long getReplicaSize()
{
if (size < 0) {
throw new IllegalArgumentException("Negative entry size is not allowed");
}
_size = size;
}

@Override
public synchronized long getSize()
{
return _size;
return _state.isMutable() ? getDataFile().length() : _size;
}

private synchronized StorageInfo getStorageInfo()
Expand Down Expand Up @@ -317,7 +309,7 @@ static CacheRepositoryEntryImpl load(BerkeleyDBMetaDataRepository repository, Pn
}
_log.warn(e.toString());
}
return new CacheRepositoryEntryImpl(repository, pnfsId, EntryState.BROKEN, ImmutableList.of(), attributes);
return new CacheRepositoryEntryImpl(repository, pnfsId, BROKEN, ImmutableList.of(), attributes);
}

private class UpdatableRecordImpl implements UpdatableRecord
Expand All @@ -327,7 +319,7 @@ private class UpdatableRecordImpl implements UpdatableRecord
@Override
public boolean setSticky(String owner, long expire, boolean overwrite) throws CacheException
{
if (_state == EntryState.REMOVED) {
if (_state == REMOVED) {
throw new CacheException("Entry in removed state");
}
Predicate<StickyRecord> subsumes =
Expand All @@ -347,6 +339,9 @@ public boolean setSticky(String owner, long expire, boolean overwrite) throws Ca
public Void setState(EntryState state) throws CacheException
{
if (_state != state) {
if (_state.isMutable() && !state.isMutable()) {
_size = getDataFile().length();
}
_state = state;
_stateModified = true;
}
Expand Down
Expand Up @@ -137,16 +137,9 @@ public synchronized PnfsId getPnfsId() {
}

@Override
public synchronized void setSize(long size) {
if (size < 0) {
throw new IllegalArgumentException("Negative entry size is not allowed");
}
_size = size;
}

@Override
public synchronized long getSize() {
return _size;
public synchronized long getReplicaSize()
{
return _state.getState().isMutable() ? getDataFile().length() : _size;
}

@Override
Expand All @@ -165,6 +158,9 @@ public synchronized Void setState(EntryState state)
throws CacheException
{
try {
if (_state.getState().isMutable() && !state.isMutable()) {
_size = getDataFile().length();
}
_state.setState(state);
} catch (IOException e) {
throw new DiskErrorCacheException(e.getMessage(), e);
Expand Down
Expand Up @@ -26,7 +26,7 @@ public class CacheEntryImpl implements CacheEntry
public CacheEntryImpl(MetaDataRecord entry) throws CacheException
{
synchronized (entry) {
_size = entry.getSize();
_size = entry.getReplicaSize();
_created_at = entry.getCreationTime();
_accessed_at = entry.getLastAccessTime();
_linkCount = entry.getLinkCount();
Expand Down
Expand Up @@ -202,7 +202,6 @@ public void allocate(long size)

synchronized (this) {
_allocated += size;
_entry.setSize(_allocated);
}
}

Expand Down Expand Up @@ -238,7 +237,6 @@ public boolean allocateNow(long size)
if (isAllocated) {
synchronized (this) {
_allocated += size;
_entry.setSize(_allocated);
}
}
return isAllocated;
Expand Down Expand Up @@ -270,7 +268,6 @@ private synchronized void adjustReservation(long length)
_allocator.free(_allocated - length);
}
_allocated = length;
_entry.setSize(length);
} catch (InterruptedException e) {
/* Space allocation is broken now. The entry size
* matches up with what was actually allocated,
Expand Down Expand Up @@ -324,7 +321,7 @@ public synchronized void commit()
try {
_entry.setLastAccessTime((_atime == null) ? System.currentTimeMillis() : _atime);

long length = getFile().length();
long length = _entry.getReplicaSize();
adjustReservation(length);
verifyFileSize(length);
_fileAttributes.setSize(length);
Expand Down Expand Up @@ -372,7 +369,7 @@ public synchronized void commit()
*/
private synchronized void fail()
{
long length = getFile().length();
long length = _entry.getReplicaSize();
try {
adjustReservation(length);
} catch (InterruptedException e) {
Expand Down
Expand Up @@ -103,13 +103,7 @@ public synchronized int getLinkCount()
}

@Override
public synchronized void setSize(long size)
{
_size = size;
}

@Override
public synchronized long getSize()
public synchronized long getReplicaSize()
{
return _size;
}
Expand Down Expand Up @@ -197,12 +191,12 @@ public FileAttributes getFileAttributes()
@Override
public synchronized String toString()
{
return _pnfsId.toString()+
" <"+_state.toString()+"-"+
"(0)"+
"["+getLinkCount()+"]> "+
getSize()+
" si={"+(_fileAttributes.isDefined(FileAttribute.STORAGECLASS) ? "<unknown>" : _fileAttributes.getStorageClass())+"}" ;
return _pnfsId.toString() +
" <" + _state.toString() + "-" +
"(0)" +
"[" + getLinkCount() + "]> " +
getReplicaSize() +
" si={" + (_fileAttributes.isDefined(FileAttribute.STORAGECLASS) ? "<unknown>" : _fileAttributes.getStorageClass()) + "}" ;
}

}
Expand Down

0 comments on commit 79138be

Please sign in to comment.