Skip to content

Commit

Permalink
Added batch processing for importing comics [#852]
Browse files Browse the repository at this point in the history
 * Added the comixed-batch submodule.
 * Removed the process comics task and encoder.
 * Changed SonarCloud to exclude classes named "*Configuration"
 * Renamed ComicStateMachineConfig => ComicStateMachineConfiguration
 * Changed the comic state machine to add more granular processing steps.
  • Loading branch information
mcpierce committed Sep 6, 2021
1 parent 7644d98 commit 56756f0
Show file tree
Hide file tree
Showing 75 changed files with 2,806 additions and 923 deletions.
1 change: 1 addition & 0 deletions CONTRIBUTING.md
Expand Up @@ -143,6 +143,7 @@ The application is composed of several modules:
* **comixed-services**: the business logic layer,
* **comixed-scrapers**: the library for building comic data scrapers,
* **comixed-state**: state management layer,
* **comixed-batch**: batch processing layer,
* **comixed-tasks**: the set of worker tasks that run on the server,
* **comixed-auth**: the authentication layer,
* **comixed-rest**: the REST controllers,
Expand Down
Expand Up @@ -155,10 +155,9 @@ public void testFilename() {
assertEquals(TEST_FILENAME, this.comic.getFilename());
}

@Test
public void testFilenameCanBeNull() {
@Test(expected = NullPointerException.class)
public void testFilenameCannotNull() {
this.comic.setFilename(null);
assertNull(this.comic.getFilename());
}

@Test
Expand Down
5 changes: 5 additions & 0 deletions comixed-app/pom.xml
Expand Up @@ -23,6 +23,11 @@
<artifactId>comixed-plugins</artifactId>
<version>0.10.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.comixedproject</groupId>
<artifactId>comixed-batch</artifactId>
<version>0.10.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.comixedproject</groupId>
<artifactId>comixed-rest</artifactId>
Expand Down
Expand Up @@ -18,9 +18,13 @@

package org.comixedproject;

import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.transaction.annotation.EnableTransactionManagement;

@Configuration
@EnableTransactionManagement
@EnableBatchProcessing
@EnableScheduling
public class ComiXedConfiguration {}
4 changes: 4 additions & 0 deletions comixed-app/src/main/resources/application.properties
Expand Up @@ -31,6 +31,10 @@ spring.datasource.hikari.pool-name=CX-Conn-Pool
spring.session.store-type=none
# spring.session.jdbc.initialize-schema=always

# Batch processing
spring.batch.initialize-schema=always
batch.chunk-size=10

# Liquibase changelog
spring.liquibase.change-log=classpath:db/liquibase-changelog.xml

Expand Down
42 changes: 42 additions & 0 deletions comixed-batch/pom.xml
@@ -0,0 +1,42 @@
<?xml version="1.0"?>
<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.comixedproject</groupId>
<artifactId>comixed</artifactId>
<version>0.10.0-SNAPSHOT</version>
<relativePath>..</relativePath>
</parent>
<groupId>org.comixedproject</groupId>
<artifactId>comixed-batch</artifactId>
<version>0.10.0-SNAPSHOT</version>
<name>comixed-batch</name>
<url>http://maven.apache.org</url>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-batch</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.batch</groupId>
<artifactId>spring-batch-test</artifactId>
</dependency>
<dependency>
<groupId>org.comixedproject</groupId>
<artifactId>comixed-model</artifactId>
<version>0.10.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.comixedproject</groupId>
<artifactId>comixed-adaptors</artifactId>
<version>0.10.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.comixedproject</groupId>
<artifactId>comixed-services</artifactId>
<version>0.10.0-SNAPSHOT</version>
</dependency>
</dependencies>
</project>
@@ -0,0 +1,156 @@
/*
* ComiXed - A digital comic book library management application.
* Copyright (C) 2021, The ComiXed Project
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses>
*/

package org.comixedproject.batch.comicbooks;

import lombok.extern.log4j.Log4j2;
import org.comixedproject.batch.comicbooks.processors.ContentsProcessedProcessor;
import org.comixedproject.batch.comicbooks.processors.LoadFileContentsProcessor;
import org.comixedproject.batch.comicbooks.processors.LoadFileDetailsProcessor;
import org.comixedproject.batch.comicbooks.processors.MarkBlockedPagesProcessor;
import org.comixedproject.batch.comicbooks.readers.ContentsProcessedReader;
import org.comixedproject.batch.comicbooks.readers.LoadFileContentsReader;
import org.comixedproject.batch.comicbooks.readers.LoadFileDetailsReader;
import org.comixedproject.batch.comicbooks.readers.MarkBlockedPagesReader;
import org.comixedproject.batch.comicbooks.writers.ContentsProcessedWriter;
import org.comixedproject.batch.comicbooks.writers.LoadFileContentsWriter;
import org.comixedproject.batch.comicbooks.writers.LoadFileDetailsWriter;
import org.comixedproject.batch.comicbooks.writers.MarkBlockedPagesWriter;
import org.comixedproject.model.comic.Comic;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobParametersBuilder;
import org.springframework.batch.core.JobParametersInvalidException;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.batch.core.launch.support.RunIdIncrementer;
import org.springframework.batch.core.repository.JobExecutionAlreadyRunningException;
import org.springframework.batch.core.repository.JobInstanceAlreadyCompleteException;
import org.springframework.batch.core.repository.JobRestartException;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.annotation.Scheduled;

/**
* <code>ProcessComicsConfiguration</code> defines the batch process for importing comics.
*
* @author Darryl L. Pierce
*/
@Configuration
@Log4j2
public class ProcessComicsConfiguration {
private static final String KEY_STARTED = "job.started";

@Autowired public JobBuilderFactory jobBuilderFactory;
@Autowired public StepBuilderFactory stepBuilderFactory;
@Autowired private JobLauncher jobLauncher;

@Autowired private LoadFileContentsReader loadFileContentsReader;
@Autowired private LoadFileContentsProcessor loadFileContentsProcessor;
@Autowired private LoadFileContentsWriter loadFileContentsWriter;
@Autowired private MarkBlockedPagesReader markBlockedPagesReader;
@Autowired private MarkBlockedPagesProcessor markBlockedPagesProcessor;
@Autowired private MarkBlockedPagesWriter markBlockedPagesWriter;
@Autowired private LoadFileDetailsReader loadFileDetailsReader;
@Autowired private LoadFileDetailsProcessor loadFileDetailsProcessor;
@Autowired private LoadFileDetailsWriter loadFileDetailsWriter;
@Autowired private ContentsProcessedReader contentsProcessedReader;
@Autowired private ContentsProcessedProcessor contentsProcessedProcessor;
@Autowired private ContentsProcessedWriter contentsProcessedWriter;

@Value("${batch.chunk-size}")
private int batchChunkSize = 10;

@Bean
public Job importComicsJob() {
return this.jobBuilderFactory
.get("importComicsJob")
.incrementer(new RunIdIncrementer())
.start(loadFileContentsStep())
.next(markBlockedPagesStep())
.next(loadFileDetailsStep())
.next(contentsProcessedStep())
.build();
}

@Bean
public Step loadFileContentsStep() {
return this.stepBuilderFactory
.get("loadFileContentsStep")
.<Comic, Comic>chunk(this.batchChunkSize)
.reader(loadFileContentsReader)
.processor(loadFileContentsProcessor)
.writer(loadFileContentsWriter)
.build();
}

@Bean
public Step markBlockedPagesStep() {
return this.stepBuilderFactory
.get("markBlockedPagesStep")
.<Comic, Comic>chunk(this.batchChunkSize)
.reader(markBlockedPagesReader)
.processor(markBlockedPagesProcessor)
.writer(markBlockedPagesWriter)
.build();
}

@Bean
public Step loadFileDetailsStep() {
return this.stepBuilderFactory
.get("loadFileDetailsStep")
.<Comic, Comic>chunk(this.batchChunkSize)
.reader(loadFileDetailsReader)
.processor(loadFileDetailsProcessor)
.writer(loadFileDetailsWriter)
.build();
}

@Bean
public Step contentsProcessedStep() {
return this.stepBuilderFactory
.get("contentsProcessedStep")
.<Comic, Comic>chunk(this.batchChunkSize)
.reader(contentsProcessedReader)
.processor(contentsProcessedProcessor)
.writer(contentsProcessedWriter)
.build();
}

/**
* Runs the comic processing job.
*
* @throws JobInstanceAlreadyCompleteException if an error occurs
* @throws JobExecutionAlreadyRunningException if an error occurs
* @throws JobParametersInvalidException if an error occurs
* @throws JobRestartException if an error occurs
*/
@Scheduled(fixedDelay = 1000)
public void performJob()
throws JobInstanceAlreadyCompleteException, JobExecutionAlreadyRunningException,
JobParametersInvalidException, JobRestartException {
this.jobLauncher.run(
importComicsJob(),
new JobParametersBuilder()
.addLong(KEY_STARTED, System.currentTimeMillis())
.toJobParameters());
}
}
@@ -0,0 +1,40 @@
/*
* ComiXed - A digital comic book library management application.
* Copyright (C) 2021, The ComiXed Project
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses>
*/

package org.comixedproject.batch.comicbooks.processors;

import lombok.extern.log4j.Log4j2;
import org.comixedproject.model.comic.Comic;
import org.springframework.batch.item.ItemProcessor;
import org.springframework.stereotype.Component;

/**
* <code>ContentsProcessedProcessor</code> performs any final work needed after a comic is
* processed.
*
* @author Darryl L. Pierce
*/
@Component
@Log4j2
public class ContentsProcessedProcessor implements ItemProcessor<Comic, Comic> {
@Override
public Comic process(final Comic comic) {
log.trace("Nothing to do");
return comic;
}
}
@@ -0,0 +1,45 @@
/*
* ComiXed - A digital comic book library management application.
* Copyright (C) 2021, The ComiXed Project
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses>
*/

package org.comixedproject.batch.comicbooks.processors;

import lombok.extern.log4j.Log4j2;
import org.comixedproject.handlers.ComicFileHandler;
import org.comixedproject.model.comic.Comic;
import org.springframework.batch.item.ItemProcessor;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

/**
* <code>LoadFileContentsProcessor</code> loads metadata for a comic.
*
* @author Darryl L. Pierce
*/
@Component
@Log4j2
public class LoadFileContentsProcessor implements ItemProcessor<Comic, Comic> {
@Autowired private ComicFileHandler comicFileHandler;

@Override
public Comic process(final Comic comic) throws Exception {
log.trace("Loading comic file contents");
this.comicFileHandler.loadComic(comic);
log.trace("Returning updated comic");
return comic;
}
}
@@ -0,0 +1,49 @@
/*
* ComiXed - A digital comic book library management application.
* Copyright (C) 2021, The ComiXed Project
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses>
*/

package org.comixedproject.batch.comicbooks.processors;

import java.io.FileInputStream;
import lombok.extern.log4j.Log4j2;
import org.comixedproject.model.comic.Comic;
import org.comixedproject.model.comic.ComicFileDetails;
import org.comixedproject.utils.Utils;
import org.springframework.batch.item.ItemProcessor;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

/**
* <code>LoadFileDetailsProcessor</code> loads the file details for a comic.
*
* @author Darryl L. Pierce
*/
@Component
@Log4j2
public class LoadFileDetailsProcessor implements ItemProcessor<Comic, Comic> {
@Autowired private Utils utils;

@Override
public Comic process(final Comic comic) throws Exception {
log.trace("Creating file details container");
final ComicFileDetails fileDetails = new ComicFileDetails(comic);
comic.setFileDetails(fileDetails);
log.trace("Getting comic file hash");
fileDetails.setHash(this.utils.createHash(new FileInputStream(comic.getFilename())));
return comic;
}
}

0 comments on commit 56756f0

Please sign in to comment.