Skip to content

Commit

Permalink
✨ Provide a CSV output for duplicates #14
Browse files Browse the repository at this point in the history
  • Loading branch information
evrignaud committed Oct 27, 2017
1 parent 9221aca commit aa77944
Show file tree
Hide file tree
Showing 13 changed files with 419 additions and 18 deletions.
6 changes: 6 additions & 0 deletions pom.xml
Expand Up @@ -26,6 +26,7 @@
<guava.version>23.0</guava.version>
<commons-lang3.version>3.6</commons-lang3.version>
<commons-cli.version>1.4</commons-cli.version>
<commons-csv.version>1.5</commons-csv.version>
<commons-io.version>2.6</commons-io.version>
<bds-common-framework.version>1.6.7</bds-common-framework.version>
<jackson-databind.version>2.9.2</jackson-databind.version>
Expand Down Expand Up @@ -121,6 +122,11 @@
<artifactId>commons-cli</artifactId>
<version>${commons-cli.version}</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
<version>${commons-csv.version}</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
Expand Down
29 changes: 24 additions & 5 deletions src/main/java/org/fim/Fim.java
Expand Up @@ -48,8 +48,9 @@
import org.fim.model.Command.FimReposConstraint;
import org.fim.model.Context;
import org.fim.model.FilePattern;
import org.fim.model.SortMethod;
import org.fim.model.Ignored;
import org.fim.model.OutputType;
import org.fim.model.SortMethod;
import org.fim.util.Logger;

import java.io.PrintWriter;
Expand Down Expand Up @@ -136,13 +137,17 @@ private Options buildOptions() {
opts.addOption(buildOption("v", "version", "Prints the Fim version").build());
opts.addOption(buildOption("y", "always-yes", "Always yes to every questions").build());
opts.addOption(buildOption(null, "sort", "How to sort duplicate results.\n" +
"You can sort on (default value is 'wasted'):\n" +
"- wasted: wasted size\n" +
"You can sort on:\n" +
"- wasted: wasted size (default)\n" +
"- number: number of files in the duplicated set\n" +
"- size: size of duplicated file").hasArg().build());
opts.addOption(buildOption(null, "order", "Sort order of duplicate results. Default is 'desc'. Can be 'asc' or 'desc'").hasArg().build());
opts.addOption(buildOption(null, "include", "Include some directories/filetype while searching for duplicates. Separated by ':'").hasArg().build());
opts.addOption(buildOption(null, "exclude", "Exclude some directories/filetype while searching for duplicates. Separated by ':'").hasArg().build());
opts.addOption(buildOption(null, "output-type", "Output type used by 'fdup' to display duplicates. Supported types are:\n" +
"- human: display duplicates in human readable messages (default)\n" +
"- csv: display duplicates in CSV format\n" +
"- json: display duplicates in JSON format").hasArg().build());
return opts;
}

Expand Down Expand Up @@ -221,7 +226,7 @@ protected void run(String[] args, Context context) throws Exception {
if (commandLine.hasOption("sort")) {
String sort = commandLine.getOptionValue("sort");
try {
context.setSortMethod(SortMethod.valueOf(sort));
context.setSortMethod(SortMethod.valueOf(sort.toLowerCase()));
} catch (IllegalArgumentException ex) {
Logger.error(String.format("Unsupported sort method '%s'", sort));
throw new BadFimUsageException();
Expand All @@ -230,7 +235,7 @@ protected void run(String[] args, Context context) throws Exception {

if (commandLine.hasOption("order")) {
String order = commandLine.getOptionValue("order");
switch (order) {
switch (order.toLowerCase()) {
case "asc":
context.setSortAscending(true);
break;
Expand Down Expand Up @@ -261,6 +266,20 @@ protected void run(String[] args, Context context) throws Exception {
context.setExcludePatterns(excludePatterns);
}

if (commandLine.hasOption("output-type")) {
String outputType = commandLine.getOptionValue("output-type");
try {
context.setOutputType(OutputType.valueOf(outputType.toLowerCase()));
} catch (IllegalArgumentException ex) {
Logger.error(String.format("Unsupported output type '%s'", outputType));
throw new BadFimUsageException();
}
if (context.getOutputType() != OutputType.human) {
context.setVerbose(false);
Logger.level = Logger.Level.warning.ordinal();
}
}

if (commandLine.hasOption('h')) {
command = new HelpCommand(this);
} else if (commandLine.hasOption('v')) {
Expand Down
7 changes: 7 additions & 0 deletions src/main/java/org/fim/command/FindDuplicatesCommand.java
Expand Up @@ -18,12 +18,14 @@
*/
package org.fim.command;

import org.fim.command.exception.BadFimUsageException;
import org.fim.command.exception.DontWantToContinueException;
import org.fim.internal.DuplicateFinder;
import org.fim.internal.StateGenerator;
import org.fim.internal.StateManager;
import org.fim.model.Context;
import org.fim.model.DuplicateResult;
import org.fim.model.OutputType;
import org.fim.model.State;
import org.fim.util.Logger;

Expand All @@ -49,6 +51,11 @@ public Object execute(Context context) throws Exception {

fileContentHashingMandatory(context);

if (context.getOutputType() != OutputType.human && context.isRemoveDuplicates()) {
Logger.error("You cannot display duplicates in a non human format and remove them");
throw new BadFimUsageException();
}

if (context.isRemoveDuplicates() && context.isAlwaysYes() && !context.isCalledFromTest()) {
explicitlyConfirmAutomaticRemoval(context);
}
Expand Down
117 changes: 117 additions & 0 deletions src/main/java/org/fim/internal/DuplicateOutputGenerator.java
@@ -0,0 +1,117 @@
/*
* This file is part of Fim - File Integrity Manager
*
* Copyright (C) 2017 Etienne Vrignaud
*
* Fim is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Fim is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Fim. If not, see <https://www.gnu.org/licenses/>.
*/
package org.fim.internal;

import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.fim.model.Context;
import org.fim.model.DuplicateResult;
import org.fim.model.DuplicateSet;
import org.fim.model.FileState;
import org.fim.model.output.DuplicatedFile;
import org.fim.model.output.DuplicatedFiles;
import org.fim.util.JsonIO;
import org.fim.util.Logger;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class DuplicateOutputGenerator {
private Context context;

public DuplicateOutputGenerator(Context context) {
this.context = context;
}

public void generate(DuplicateResult duplicateResult) {
List<DuplicatedFiles> duplicates = generateDuplicatedFiles(duplicateResult);
switch (context.getOutputType()) {
case csv:
generateCSV(duplicates);
break;

case json:
generateJson(duplicates);
break;
}
}

private List<DuplicatedFiles> generateDuplicatedFiles(DuplicateResult duplicateResult) {
List<DuplicatedFiles> duplicateList = new ArrayList<>();
for (DuplicateSet duplicateSet : duplicateResult.getDuplicateSets()) {
DuplicatedFiles duplicatedFiles = new DuplicatedFiles();
duplicatedFiles.setWastedSpace(duplicateSet.getWastedSpace());
for (FileState fileState : duplicateSet.getDuplicatedFiles()) {
DuplicatedFile file = new DuplicatedFile();
file.setName(fileState.getFileName());
file.setLength(fileState.getFileLength());
file.setPath(getPath(fileState.getFileName()));
file.setType(getExtension(fileState.getFileName()));
duplicatedFiles.getFileList().add(file);
}
duplicateList.add(duplicatedFiles);
}
return duplicateList;
}

private String getPath(String fileName) {
int index = fileName.lastIndexOf("/");
if (index == -1) {
return "";
}
return fileName.substring(0, index);
}

private String getExtension(String fileName) {
int index = fileName.lastIndexOf(".");
if (index == -1) {
return "";
}
return fileName.substring(index + 1).toLowerCase();
}

private void generateCSV(List<DuplicatedFiles> duplicates) {
CSVFormat format = CSVFormat.DEFAULT.withHeader("SetIndex", "FileIndex", "WastedSpace", "FilePath", "FileName", "FileLength", "FileType");
try (CSVPrinter csvPrinter = new CSVPrinter(Logger.out, format)) {
int setIndex = 0;
for (DuplicatedFiles files : duplicates) {
setIndex++;
int fileIndex = 0;
for (DuplicatedFile file : files.getFileList()) {
fileIndex++;
csvPrinter.printRecord(setIndex, fileIndex, files.getWastedSpace(), file.getPath(), file.getName(), file.getLength(), file.getType());
}
}
csvPrinter.flush();
} catch (IOException ex) {
Logger.error("Error displaying duplicates in CSV format", ex, context.isDisplayStackTrace());
}
}

private void generateJson(List<DuplicatedFiles> duplicates) {
JsonIO jsonIO = new JsonIO();
try {
jsonIO.getObjectWriter().writeValue(Logger.out, duplicates);
Logger.out.println("");
} catch (IOException ex) {
Logger.error("Error displaying duplicates in JSON format", ex, context.isDisplayStackTrace());
}
}
}
10 changes: 10 additions & 0 deletions src/main/java/org/fim/model/Context.java
Expand Up @@ -53,6 +53,7 @@ public class Context {
private SortMethod sortMethod;
private ArrayList<FilePattern> includePatterns;
private ArrayList<FilePattern> excludePatterns;
private OutputType outputType;

public Context() {
setInvokedFromSubDirectory(false);
Expand All @@ -73,6 +74,7 @@ public Context() {
setCalledFromTest(false);
setSortAscending(false);
setSortMethod(SortMethod.wasted);
setOutputType(OutputType.human);
}

public boolean isInvokedFromSubDirectory() {
Expand Down Expand Up @@ -263,6 +265,14 @@ public ArrayList<FilePattern> getExcludePatterns() {
return excludePatterns;
}

public void setOutputType(OutputType outputType) {
this.outputType = outputType;
}

public OutputType getOutputType() {
return outputType;
}

@Override
public Context clone() {
return CLONER.deepClone(this);
Expand Down
7 changes: 7 additions & 0 deletions src/main/java/org/fim/model/DuplicateResult.java
Expand Up @@ -18,6 +18,7 @@
*/
package org.fim.model;

import org.fim.internal.DuplicateOutputGenerator;
import org.fim.util.Logger;

import java.util.ArrayList;
Expand Down Expand Up @@ -79,6 +80,12 @@ private Comparator<DuplicateSet> createDuplicateSetComparator() {
}

public DuplicateResult displayAndRemoveDuplicates() {
if (context.getOutputType() != OutputType.human) {
DuplicateOutputGenerator generator = new DuplicateOutputGenerator(context);
generator.generate(this);
return this;
}

if (context.isVerbose() || context.isRemoveDuplicates()) {
for (DuplicateSet duplicateSet : duplicateSets) {
manageDuplicateSet(duplicateSet);
Expand Down
25 changes: 25 additions & 0 deletions src/main/java/org/fim/model/OutputType.java
@@ -0,0 +1,25 @@
/*
* This file is part of Fim - File Integrity Manager
*
* Copyright (C) 2017 Etienne Vrignaud
*
* Fim is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Fim is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Fim. If not, see <https://www.gnu.org/licenses/>.
*/
package org.fim.model;

public enum OutputType {
human,
csv,
json
}
58 changes: 58 additions & 0 deletions src/main/java/org/fim/model/output/DuplicatedFile.java
@@ -0,0 +1,58 @@
/*
* This file is part of Fim - File Integrity Manager
*
* Copyright (C) 2017 Etienne Vrignaud
*
* Fim is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Fim is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Fim. If not, see <https://www.gnu.org/licenses/>.
*/
package org.fim.model.output;

public class DuplicatedFile {
private String path;
private String name;
private long length;
private String type;

public String getPath() {
return path;
}

public void setPath(String path) {
this.path = path;
}

public String getName() {
return name;
}

public void setName(String name) {
this.name = name;
}

public long getLength() {
return length;
}

public void setLength(long length) {
this.length = length;
}

public String getType() {
return type;
}

public void setType(String type) {
this.type = type;
}
}

0 comments on commit aa77944

Please sign in to comment.