Permalink
Browse files

Disambiguate ssd errors and disk errors on IO completion. On IO

completion for the !WRITEBACK case, issue an uncached disk IO in the
case of a SSD error.
Patch submitted by Mohit Saxena.

Summary:

Test Plan:

Reviewers:

CC:

Task ID: #

Blame Rev:
  • Loading branch information...
1 parent c4bddf6 commit 9cb6e3a8f2f7900185b6884ebe56cd35f3a3b202 Mohan Srinivasan committed Nov 3, 2012
Showing with 19 additions and 5 deletions.
  1. +4 −0 src/flashcache_conf.c
  2. +15 −5 src/flashcache_main.c
View
@@ -1251,10 +1251,12 @@ flashcache_dtr_stats_print(struct cache_c *dmc)
/* All modes */
DMINFO("\tdisk reads(%lu), disk writes(%lu) ssd reads(%lu) ssd writes(%lu)\n" \
"\tuncached reads(%lu), uncached writes(%lu), uncached IO requeue(%lu)\n" \
+ "\tdisk read errors(%d), disk write errors(%d) ssd read errors(%d) ssd write errors(%d)\n" \
"\tuncached sequential reads(%lu), uncached sequential writes(%lu)\n" \
"\tpid_adds(%lu), pid_dels(%lu), pid_drops(%lu) pid_expiry(%lu)",
stats->disk_reads, stats->disk_writes, stats->ssd_reads, stats->ssd_writes,
stats->uncached_reads, stats->uncached_writes, stats->uncached_io_requeue,
+ dmc->flashcache_errors.disk_read_errors, dmc->flashcache_errors.disk_write_errors, dmc->flashcache_errors.ssd_read_errors, dmc->flashcache_errors.ssd_write_errors,
stats->uncached_sequential_reads, stats->uncached_sequential_writes,
stats->pid_adds, stats->pid_dels, stats->pid_drops, stats->expiry);
if (dmc->size > 0) {
@@ -1431,10 +1433,12 @@ flashcache_status_info(struct cache_c *dmc, status_type_t type,
/* All modes */
DMEMIT("\tdisk reads(%lu), disk writes(%lu) ssd reads(%lu) ssd writes(%lu)\n" \
"\tuncached reads(%lu), uncached writes(%lu), uncached IO requeue(%lu)\n" \
+ "\tdisk read errors(%d), disk write errors(%d) ssd read errors(%d) ssd write errors(%d)\n" \
"\tuncached sequential reads(%lu), uncached sequential writes(%lu)\n" \
"\tpid_adds(%lu), pid_dels(%lu), pid_drops(%lu) pid_expiry(%lu)",
stats->disk_reads, stats->disk_writes, stats->ssd_reads, stats->ssd_writes,
stats->uncached_reads, stats->uncached_writes, stats->uncached_io_requeue,
+ dmc->flashcache_errors.disk_read_errors, dmc->flashcache_errors.disk_write_errors, dmc->flashcache_errors.ssd_read_errors, dmc->flashcache_errors.ssd_write_errors,
stats->uncached_sequential_reads, stats->uncached_sequential_writes,
stats->pid_adds, stats->pid_dels, stats->pid_drops, stats->expiry);
if (dmc->sysctl_io_latency_hist) {
View
@@ -180,6 +180,7 @@ flashcache_io_callback(unsigned long error, void *context)
unsigned long flags;
int index = job->index;
struct cacheblock *cacheblk = &dmc->cache[index];
+ unsigned long disk_error = 0;
VERIFY(index != -1);
bio = job->bio;
@@ -212,8 +213,10 @@ flashcache_io_callback(unsigned long error, void *context)
push_io(job);
schedule_work(&_kcached_wq);
return;
- } else
+ } else {
+ disk_error = -EIO;
dmc->flashcache_errors.disk_read_errors++;
+ }
break;
case READCACHE:
DPRINTK("flashcache_io_callback: READCACHE %d",
@@ -296,6 +299,7 @@ flashcache_io_callback(unsigned long error, void *context)
* the IO to succeed as long as the disk write suceeded.
* and invalidate the cache block.
*/
+ disk_error = -EIO;
dmc->flashcache_errors.disk_write_errors++;
}
break;
@@ -308,8 +312,13 @@ flashcache_io_callback(unsigned long error, void *context)
* work. (a) we cannot fall back to disk when a ssd read of a dirty
* cacheblock fails (b) we'd need to handle ssd metadata write
* failures as well and fall back to disk in those cases as well.
+ *
+ * We track disk errors separately. If we get a disk error (in
+ * writethru or writearound modes) end the IO right here.
*/
- if (likely(error == 0) || (dmc->cache_mode == FLASHCACHE_WRITE_BACK)) {
+ if (likely(error == 0) ||
+ (dmc->cache_mode == FLASHCACHE_WRITE_BACK) ||
+ disk_error != 0) {
flashcache_bio_endio(bio, error, dmc, &job->io_start_time);
job->bio = NULL;
}
@@ -354,8 +363,9 @@ flashcache_free_pending_jobs(struct cache_c *dmc, struct cacheblock *cacheblk,
/*
* Common error handling for everything.
* 1) If the block isn't dirty, invalidate it.
- * 2) Error all pending IOs that totally or partly overlap this block.
- * 3) Free the job.
+ * 2) De-link all pending IOs that totally or partly overlap this block.
+ * 3) If it was an SSD error (bio != NULL), issue the invalidated block IO and other de-linked pending IOs uncached to disk.
+ * 4) Free the job.
*/
static void
flashcache_do_pending_error(struct kcached_job *job)
@@ -383,7 +393,7 @@ flashcache_do_pending_error(struct kcached_job *job)
VERIFY(dmc->cache_mode == FLASHCACHE_WRITE_BACK);
cacheblk->cache_state &= ~(BLOCK_IO_INPROG);
/*
- * In case of an error in writeback or writearound modes, if there
+ * In case of an error in writethrough or writearound modes, if there
* are pending jobs, de-link them from the cacheblock so we can issue disk
* IOs below.
*/

0 comments on commit 9cb6e3a

Please sign in to comment.