Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Implements EUCA-3583. Available space checks for Walrus bukkits direc…

…tory

Added a new fault for troubleshooting Walrus bukkits directory running low on space.
Modified the implementation for disk space checks. Periodic checks can be carried out on file locations.
A fault is logged when the disk runs low on space. After recording a fault on a particular location,
subsequent faults for the same location are not logged until the state is reset.
A state reset occurs when the location passes the space check.
  • Loading branch information...
commit 8f7b796974303c6ca37c92a502ea257361d9e3c3 1 parent cbc8455
@euca-nightfury euca-nightfury authored
View
2  clc/modules/module-inc.order
@@ -76,8 +76,8 @@ cluster-manager
notifications
storage-common
reporting
-walrus
troubleshooting
+walrus
storage-controller
dns
www
View
10 ...s/troubleshooting/src/main/java/com/eucalyptus/bootstrap/TroubleshootingBootstrapper.java
@@ -83,6 +83,8 @@
import com.eucalyptus.troubleshooting.fault.FaultSubsystem;
import com.eucalyptus.troubleshooting.resourcefaults.DBResourceCheck;
import com.eucalyptus.troubleshooting.resourcefaults.DiskResourceCheck;
+import com.eucalyptus.troubleshooting.resourcefaults.DiskResourceCheck.Checker;
+import com.eucalyptus.troubleshooting.resourcefaults.DiskResourceCheck.LocationInfo;
import com.eucalyptus.troubleshooting.resourcefaults.MXBeanMemoryResourceCheck;
import com.eucalyptus.troubleshooting.resourcefaults.SimpleMemoryResourceCheck;
@Provides(Empyrean.class)
@@ -100,11 +102,13 @@ public boolean load( ) throws Exception {
@Override
public boolean start( ) throws Exception {
LOG.info( "Starting troubleshooting interface." );
- DiskResourceCheck check = new DiskResourceCheck();
+ //DiskResourceCheck check = new DiskResourceCheck();
// TOOD: we should use a property, but for now use 2% of the log directory
File logFileDir = BaseDirectory.LOG.getFile();
- check.addLocationInfo(logFileDir, (long) (0.02 * logFileDir.getTotalSpace()));
- check.start();
+ //check.addLocationInfo(logFileDir, (long) (0.02 * logFileDir.getTotalSpace()));
+ //check.start();
+ LocationInfo location = new LocationInfo(logFileDir, (long) (0.02 * logFileDir.getTotalSpace()));
+ DiskResourceCheck.start(new Checker(location));
new DBResourceCheck().start();
// new SimpleMemoryResourceCheck(1).start(512 * 1024).start(); // 512K left, also arbitrary
//new MXBeanMemoryResourceCheck().start(); // 512K left, also arbitrary
View
203 ...ooting/src/main/java/com/eucalyptus/troubleshooting/resourcefaults/DiskResourceCheck.java
@@ -1,66 +1,201 @@
package com.eucalyptus.troubleshooting.resourcefaults;
import java.io.File;
-import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.ScheduledFuture;
+import java.util.concurrent.TimeUnit;
import org.apache.log4j.Logger;
+import com.eucalyptus.component.ComponentId;
import com.eucalyptus.component.id.Eucalyptus;
import com.eucalyptus.troubleshooting.fault.FaultSubsystem;
-public class DiskResourceCheck extends Thread {
+/**
+ * <p>
+ * DiskResourceCheck can be used by any eucalyptus component (walrus, SC, NC etc...) to perform periodic checks on disk space and warn the user when the system
+ * runs low on space. This class provides a static method to {@link #start(Checker) start} the disk resource check for a particular location at a specified
+ * interval.
+ * </p>
+ * <p>
+ * {@link ScheduledExecutorService} is used for scheduling the disk space checks at configurable intervals. The thread pool size is limited to 1
+ * </p>
+ * <p>
+ * If the system is running low on disk space a fault is recorded in the log file for the specified component. Subsequent faults for the same location are not
+ * logged until the state is reset for that location. A state reset occurs when the file location has enough free space
+ * </p>
+ */
+public class DiskResourceCheck {
private final static Logger LOG = Logger.getLogger(DiskResourceCheck.class);
- private final static long POLL_TIME = 5 * 1000;
+
+ private static final ScheduledExecutorService pool = Executors.newSingleThreadScheduledExecutor();
private static final int OUT_OF_DISK_SPACE_FAULT_ID = 1003;
- private boolean started = false;
- private Set<LocationInfo> alreadyFaulted = new HashSet<LocationInfo>();
- public class LocationInfo {
+ private final static long DEFAULT_POLL_INTERVAL = 5 * 1000;
+ private static final ComponentId DEFAULT_COMPONENT_ID = Eucalyptus.INSTANCE;
+
+ /**
+ * Marking the constructor private on purpose, so that no code can instantiate an object this class
+ */
+ private DiskResourceCheck() {
+
+ }
+
+ /**
+ * <p>
+ * Kicks off an infinite series of disk resource checks with a delay in between consecutive checks. {@link ScheduledExecutorService#scheduleWithFixedDelay
+ * Executor service framework} is used for scheduling the worker thread, {@link Checker checker}, at regular intervals. The time delay, file location, logic
+ * for disk space check and other configuration is provided by checker
+ * </p>
+ *
+ * <p>
+ * This method returns a {@link ScheduledFuture} object that can be used by the caller to cancel the execution. Thread execution can also be cancelled by
+ * shutting down the executor service
+ * </p>
+ *
+ * @param checker
+ * @return ScheduledFuture
+ */
+ public static ScheduledFuture<?> start(Checker checker) {
+ return pool.scheduleWithFixedDelay(checker, 0, checker.pollInterval, TimeUnit.MILLISECONDS);
+ }
+
+ // Someone should be calling this, currently no one is. Might be a nice thing to say hello to in the service shutdown hooks. Although might complicate stuff
+ // when multiple services using it
+ public static void shutdown() {
+ pool.shutdownNow();
+ }
+
+ public static class LocationInfo {
private File file;
- private long minimumFreeSpace;
+ private Long minimumFreeSpace;
+ private Double percentFreeSpace;
+
public File getFile() {
return file;
}
- public long getMinimumFreeSpace() {
- return minimumFreeSpace;
+
+ public Long getMinimumFreeSpace() {
+ if (null != this.minimumFreeSpace) {
+ return this.minimumFreeSpace;
+ } else {
+ return (long) (this.file.getTotalSpace() * this.percentFreeSpace / 100);
+ }
}
- public LocationInfo(File file, long minimumFreeSpace) {
+
+ /**
+ * Constructor to be used when free space is an absolute quantity in bytes
+ *
+ * @param file
+ * @param minimumFreeSpace
+ */
+ public LocationInfo(File file, Long minimumFreeSpace) {
super();
this.file = file;
this.minimumFreeSpace = minimumFreeSpace;
}
- }
- private List<LocationInfo> locations = new ArrayList<LocationInfo>();
- // TODO: consolidate locations
- public void addLocationInfo(File location, long minimumFreeSpace) {
- if (started) {
- throw new IllegalStateException("Can not add location info after thread has started");
+ /**
+ * Constructor to be used when free space is a percentage of the total space available
+ *
+ * @param file
+ * @param percentFreeSpace
+ */
+ public LocationInfo(File file, Double percentFreeSpace) {
+ super();
+ this.file = file;
+ this.percentFreeSpace = percentFreeSpace;
+ }
+
+ // Added hashCode() and equals() since we do Set related operations
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + ((file == null) ? 0 : file.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ LocationInfo other = (LocationInfo) obj;
+ if (file == null) {
+ if (other.file != null)
+ return false;
+ } else if (!file.equals(other.file))
+ return false;
+ return true;
}
- locations.add(new LocationInfo(location, minimumFreeSpace));
}
- @Override
- public void run() {
- this.started = true;
- while (true) {
- for (LocationInfo location: locations) {
- if (!alreadyFaulted.contains(location)) {
- long usableSpace = location.getFile().getUsableSpace();
- LOG.debug("Checking disk space for " + location.getFile() + " usableSpace = " + usableSpace + ", minimumFreeSpace = " + location.getMinimumFreeSpace());
- if (usableSpace < location.getMinimumFreeSpace()) {
- FaultSubsystem.forComponent(Eucalyptus.INSTANCE).havingId(OUT_OF_DISK_SPACE_FAULT_ID).withVar("component", "eucalyptus").withVar("file", location.getFile().getAbsolutePath()).log();
- alreadyFaulted.add(location);
+
+ /**
+ * Worker thread that holds the logic for disk space checks and all the relevant information required. An instance of this class is fed to
+ * {@link ScheduledExecutorService#scheduleWithFixedDelay} method
+ *
+ */
+ public static class Checker implements Runnable {
+
+ private Set<LocationInfo> locations = new HashSet<LocationInfo>();
+ private long pollInterval;
+ private ComponentId componentId;
+
+ private Set<LocationInfo> alreadyFaulted = new HashSet<LocationInfo>();
+
+ public Checker(LocationInfo locationInfo) {
+ this.locations.add(locationInfo);
+ this.pollInterval = DEFAULT_POLL_INTERVAL;
+ this.componentId = DEFAULT_COMPONENT_ID;
+ }
+
+ public Checker(LocationInfo locationInfo, ComponentId componentId, long pollTime) {
+ this.locations.add(locationInfo);
+ this.componentId = componentId;
+ this.pollInterval = pollTime;
+ }
+
+ public Checker(List<LocationInfo> locations, ComponentId componentId, long pollTime) {
+ this.locations.addAll(locations);
+ this.componentId = componentId;
+ this.pollInterval = pollTime;
+ }
+
+ @Override
+ public void run() {
+ if (null != locations) {
+ for (LocationInfo location : this.locations) {
+ // Enclose everything between try catch because nothing should throw an exception to the executor upstream or it may halt subsequent tasks
+ try {
+ long usableSpace = location.getFile().getUsableSpace();
+ if (usableSpace < location.getMinimumFreeSpace()) {
+ if (!this.alreadyFaulted.contains(location)) {
+ FaultSubsystem.forComponent(this.componentId).havingId(OUT_OF_DISK_SPACE_FAULT_ID)
+ .withVar("component", this.componentId.getName()).withVar("file", location.getFile().getAbsolutePath()).log();
+ this.alreadyFaulted.add(location);
+ } else {
+ // fault has already been logged. do nothing
+ }
+ } else {
+ // Remove this location from the already faulted set. If the location is not in the set, this call will simply return false. no harm
+ // done. another if condition is just one unnecessary step
+ this.alreadyFaulted.remove(location);
+ }
+ } catch (Exception ex) {
+ // what to do when an exception is caught? should we remove the location off the list?
+ LOG.error("Disk resource check failed for " + location.getFile().getAbsolutePath(), ex);
}
}
- }
- try {
- Thread.sleep(POLL_TIME);
- } catch (InterruptedException ex) {
- LOG.warn("Polling thread interrupted");
+ } else {
+ // nothing to check
}
}
}
-
}
View
14 clc/modules/walrus/src/main/java/edu/ucsb/eucalyptus/cloud/ws/WalrusControl.java
@@ -62,8 +62,11 @@
package edu.ucsb.eucalyptus.cloud.ws;
+import java.io.File;
import java.util.ArrayList;
import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ScheduledFuture;
+
import org.apache.log4j.Logger;
import com.eucalyptus.component.ComponentIds;
import com.eucalyptus.component.Components;
@@ -73,6 +76,9 @@
import com.eucalyptus.configurable.ConfigurableClass;
import com.eucalyptus.configurable.ConfigurableProperty;
import com.eucalyptus.configurable.PropertyDirectory;
+import com.eucalyptus.troubleshooting.resourcefaults.DiskResourceCheck;
+import com.eucalyptus.troubleshooting.resourcefaults.DiskResourceCheck.Checker;
+import com.eucalyptus.troubleshooting.resourcefaults.DiskResourceCheck.LocationInfo;
import com.eucalyptus.util.EucalyptusCloudException;
import com.eucalyptus.util.WalrusProperties;
import edu.ucsb.eucalyptus.cloud.AccessDeniedException;
@@ -206,6 +212,14 @@ public static void configure() {
} catch(EucalyptusCloudException ex) {
LOG.error("Error starting storage backend: " + ex);
}
+
+ // Implementation for EUCA-3583. Check for available space in Walrus bukkits directory and throw a fault when less than 10% of total space is available
+ try {
+ ScheduledFuture<?> future = DiskResourceCheck.start(new Checker(
+ new LocationInfo(new File(WalrusInfo.getWalrusInfo().getStorageDir()), (double) 10), new Walrus(), (long) 300000));
+ } catch (Exception ex) {
+ LOG.error("Error starting disk space check for Walrus storage directory.", ex);
+ }
}
public WalrusControl() {}
Please sign in to comment.
Something went wrong with that request. Please try again.