Skip to content

Commit

Permalink
improves to new version see Readme
Browse files Browse the repository at this point in the history
  • Loading branch information
WolfgangFahl committed Aug 18, 2014
1 parent 2094d05 commit 643248a
Show file tree
Hide file tree
Showing 11 changed files with 225 additions and 44 deletions.
18 changes: 17 additions & 1 deletion README.md
Expand Up @@ -78,4 +78,20 @@ PDF text from the University of Notthingham about how to publish journals using
## Version history
0.0.3 - 2013 : first published version
0.0.4 - 2013 : adds text extract feature
0.0.5 - 2014-05-31: fixes template - fixes this readme - allows positional command line arguments
0.0.5 - 2014-05-31: fixes template - fixes this README - allows positional command line arguments
0.0.6 - 2014-08-18: fixes bug - adds Apache License to README - adds github as maven repository

## Copyright
Copyright 2013-2014 BITPlan GmbH

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
9 changes: 8 additions & 1 deletion dependency-reduced-pom.xml
Expand Up @@ -4,7 +4,7 @@
<groupId>com.bitplan.pdfindex</groupId>
<artifactId>com.bitplan.pdfindex</artifactId>
<name>com.bitplan.pdfindex</name>
<version>0.0.5</version>
<version>0.0.6</version>
<description>pdf index creator</description>
<build>
<sourceDirectory>src/main/java</sourceDirectory>
Expand Down Expand Up @@ -72,5 +72,12 @@
</exclusions>
</dependency>
</dependencies>
<distributionManagement>
<repository>
<id>com.bitplan.pdfindexer.repo</id>
<name>Temporary Staging Repository for pdfindexer</name>
<url>file://${project.build.directory}/mvn-repo</url>
</repository>
</distributionManagement>
</project>

54 changes: 52 additions & 2 deletions pom.xml
@@ -1,11 +1,22 @@
<!--
Copyright 2013-2014 BITPlan GmbH
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.bitplan.pdfindex</groupId>
<artifactId>com.bitplan.pdfindex</artifactId>
<version>0.0.5</version>
<version>0.0.6</version>
<name>com.bitplan.pdfindex</name>
<description>pdf index creator</description>
<!-- see http://stackoverflow.com/questions/14013644/hosting-a-maven-repository-on-github
on
how you can access this project via maven -->
<properties>
<!-- github server corresponds to entry in ~/.m2/settings.xml -->
<github.global.server>github</github.global.server>
</properties>

<dependencies>
<!--apache pdfbox with lucene -->
<dependency>
Expand Down Expand Up @@ -44,14 +55,21 @@
<version>4.8.1</version>
<scope>test</scope>
</dependency>
<!-- HtmlCleaner during test -->
<!-- HtmlCleaner during test -->
<dependency>
<groupId>net.sourceforge.htmlcleaner</groupId>
<artifactId>htmlcleaner</artifactId>
<version>2.2</version>
<scope>test</scope>
</dependency>
</dependencies>
<distributionManagement>
<repository>
<id>com.bitplan.pdfindexer.repo</id>
<name>Temporary Staging Repository for pdfindexer</name>
<url>file://${project.build.directory}/mvn-repo</url>
</repository>
</distributionManagement>
<build>
<sourceDirectory>src/main/java</sourceDirectory>
<plugins>
Expand Down Expand Up @@ -94,6 +112,38 @@
</execution>
</executions>
</plugin>
<!-- maven deployment -->
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.1</version>
<configuration>
<altDeploymentRepository>internal.repo::default::file://${project.build.directory}/mvn-repo</altDeploymentRepository>
</configuration>
</plugin>
<!-- github maven deployment -->
<plugin>
<groupId>com.github.github</groupId>
<artifactId>site-maven-plugin</artifactId>
<version>0.9</version>
<configuration>
<message>Maven artifacts for ${project.version}</message> <!-- git commit message -->
<noJekyll>true</noJekyll> <!-- disable webpage processing -->
<outputDirectory>${project.build.directory}/mvn-repo</outputDirectory> <!-- matches distribution management repository url above -->
<branch>refs/heads/mvn-repo</branch> <!-- remote branch name -->
<includes><include>**/*</include></includes>
<repositoryName>pdfindexer</repositoryName> <!-- github repo name -->
<repositoryOwner>WolfgangFahl</repositoryOwner> <!-- github username -->
</configuration>
<executions>
<!-- run site-maven-plugin's 'site' target as part of the build's normal 'deploy' phase -->
<execution>
<goals>
<goal>site</goal>
</goals>
<phase>deploy</phase>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
36 changes: 24 additions & 12 deletions src/main/java/com/bitplan/pdfindex/PageComparator.java
@@ -1,11 +1,22 @@
/**
* Copyright (C) 2013 BITPlan GmbH
* Copyright (C) 2013-2014 BITPlan GmbH
*
* Pater-Delp-Str. 1
* D-47877 Willich-Schiefbahn
*
* http://www.bitplan.com
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.bitplan.pdfindex;
Expand All @@ -16,25 +27,26 @@

/**
* compare pagenumbers of Lucene Documents
*
* @author wf
*
*
*/
public class PageComparator implements Comparator<Document> {

@Override
public int compare(Document doc1, Document doc2) {
String source1=doc1.get("SOURCE");
String source2=doc2.get("SOURCE");
int cmp=source1.compareTo(source2);
if (cmp!=0)
String source1 = doc1.get("SOURCE");
String source2 = doc2.get("SOURCE");
int cmp = source1.compareTo(source2);
if (cmp != 0)
return cmp;
String page1s=doc1.get("pagenumber");
String page2s=doc2.get("pagenumber");
int page1=Integer.parseInt(page1s.trim());
int page2=Integer.parseInt(page2s.trim());
if (page1>page2)
String page1s = doc1.get("pagenumber");
String page2s = doc2.get("pagenumber");
int page1 = Integer.parseInt(page1s.trim());
int page2 = Integer.parseInt(page2s.trim());
if (page1 > page2)
return 1;
else if (page1<page2)
else if (page1 < page2)
return -1;
else
return 0;
Expand Down
32 changes: 24 additions & 8 deletions src/main/java/com/bitplan/pdfindex/Pdfindexer.java
@@ -1,11 +1,22 @@
/**
* Copyright (C) 2013 BITPlan GmbH
* Copyright (C) 2013-2014 BITPlan GmbH
*
* Pater-Delp-Str. 1
* D-47877 Willich-Schiefbahn
*
* http://www.bitplan.com
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.bitplan.pdfindex;

Expand Down Expand Up @@ -324,7 +335,7 @@ public IndexWriter getIndexWriter() throws Exception {
}

/**
* allow different Document Sources
* allow different Document Sources e.g. from file or from uri
*
* @author wf
*
Expand All @@ -334,7 +345,7 @@ public class DocumentSource {
URL uri;

/**
* create DocumentSource from pFile
* create DocumentSource from the given File pFile
*
* @param pFile
*/
Expand All @@ -343,7 +354,7 @@ public DocumentSource(File pFile) {
}

/**
* create a DocumentSource from an URL
* create a DocumentSource from the given URL pUrl
*
* @param pUrl
*/
Expand All @@ -352,9 +363,9 @@ public DocumentSource(URL pUrl) {
}

/**
* get the Document
* get the PDDocument for this DocmentSource
*
* @return
* @return the PDDocument for my source or null if there is none
* @throws IOException
*/
public PDDocument getDocument() throws IOException {
Expand Down Expand Up @@ -422,9 +433,14 @@ private void addToIndex(DocumentSource source) throws Exception {
pddDocument.close();
}

/**
* close
* @throws Exception
*/
private void close() throws Exception {
getIndexWriter().optimize();
getIndexWriter().close();
IndexWriter indexWriter = getIndexWriter();
indexWriter.optimize();
indexWriter.close();
}

/**
Expand Down
13 changes: 12 additions & 1 deletion src/main/java/com/bitplan/pdfindex/SearchResult.java
@@ -1,11 +1,22 @@
/**
* Copyright (C) 2013 BITPlan GmbH
* Copyright (C) 2013-2014 BITPlan GmbH
*
* Pater-Delp-Str. 1
* D-47877 Willich-Schiefbahn
*
* http://www.bitplan.com
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.bitplan.pdfindex;

Expand Down
@@ -1,11 +1,22 @@
/**
* Copyright (C) 2012 BITPlan GmbH
* Copyright (C) 2012-2014 BITPlan GmbH
*
* Pater-Delp-Str. 1
* D-47877 Willich-Schiefbahn
*
* http://www.bitplan.com
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.bitplan.rest.freemarker;

Expand Down
13 changes: 12 additions & 1 deletion src/main/java/com/bitplan/rest/freemarker/TemplateClass.java
@@ -1,11 +1,22 @@
/**
* Copyright (C) 2012 BITPlan GmbH
* Copyright (C) 2012-2014 BITPlan GmbH
*
* Pater-Delp-Str. 1
* D-47877 Willich-Schiefbahn
*
* http://www.bitplan.com
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.bitplan.rest.freemarker;

Expand Down
13 changes: 12 additions & 1 deletion src/test/java/com/bitplan/pdfindex/TestFreemarker.java
@@ -1,11 +1,22 @@
/**
* Copyright (C) 2012 BITPlan GmbH
* Copyright (C) 2012-2014 BITPlan GmbH
*
* Pater-Delp-Str. 1
* D-47877 Willich-Schiefbahn
*
* http://www.bitplan.com
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.bitplan.pdfindex;

Expand Down

0 comments on commit 643248a

Please sign in to comment.