Skip to content

Commit

Permalink
licences and stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
thorstenv committed May 5, 2013
1 parent 565e5c8 commit a380ef5
Show file tree
Hide file tree
Showing 83 changed files with 4,105 additions and 379 deletions.
1 change: 1 addition & 0 deletions AXIS2/nbproject/project.properties
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
is.autoload=true
javac.compilerargs=-Xlint -Xlint:-serial
javac.source=1.7
license.file=../lizenzen/Apache License, Version 2.0.htm
nbm.homepage=http://axis.apache.org/axis2/java/core/
nbm.module.author=Apache Team
nbm.needs.restart=true
Expand Down
3 changes: 2 additions & 1 deletion Actions/nbproject/project.properties
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
javac.source=1.7
javac.compilerargs=-Xlint -Xlint:-serial
nbm.homepage=http://ocr.cis.uni-muenchen.de/
license.file=../LICENSE.md
nbm.homepage=http://thorstenv.github.io/PoCoTo
nbm.module.author=Thorsten Vobl (thorsten.vobl@googlemail.com)
nbm.needs.restart=true
spec.version.base=1.0.1
75 changes: 33 additions & 42 deletions Actions/src/jav/gui/actions/OCRErrorKonkordance.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,54 +10,45 @@
import org.openide.util.Exceptions;

/**
*Copyright (c) 2012, IMPACT working group at the Centrum für Informations- und Sprachverarbeitung, University of Munich.
*All rights reserved.
*Redistribution and use in source and binary forms, with or without
*modification, are permitted provided that the following conditions are met:
*Redistributions of source code must retain the above copyright
*notice, this list of conditions and the following disclaimer.
*Redistributions in binary form must reproduce the above copyright
*notice, this list of conditions and the following disclaimer in the
*documentation and/or other materials provided with the distribution.
*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
*IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
*PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
*HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
*SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
*LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
*DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
*THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
*(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
*OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* This file is part of the ocr-postcorrection tool developed
* by the IMPACT working group at the Centrum für Informations- und Sprachverarbeitung, University of Munich.
* For further information and contacts visit http://ocr.cis.uni-muenchen.de/
*
* Copyright (c) 2012, IMPACT working group at the Centrum für Informations- und
* Sprachverarbeitung, University of Munich. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer. Redistributions in binary
* form must reproduce the above copyright notice, this list of conditions and
* the following disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* This file is part of the ocr-postcorrection tool developed by the IMPACT
* working group at the Centrum für Informations- und Sprachverarbeitung,
* University of Munich. For further information and contacts visit
* http://ocr.cis.uni-muenchen.de/
*
* @author thorsten (thorsten.vobl@googlemail.com)
*/
public final class OCRErrorKonkordance implements ActionListener {

@Override
public void actionPerformed(ActionEvent e) {
if( MainController.findInstance().getDocument() != null) {
try {
File tempFile = File.createTempFile("document", ".ocrcxml");
tempFile.deleteOnExit();

MainController.findInstance().getDocument().exportAsDocXML(tempFile.getCanonicalPath(), false);

MainController.findInstance().getCorrectionSystem().clearCandidates();
// new OCRXMLImporter().importCandidates(MainController.findInstance().getDocument(), "C:\\Users\\Scampi_Joe\\AppData\\Local\\Temp\\document38524588653394750.ocrcxml");
// LevDistance_1_Filter f = new LevDistance_1_Filter("OCRErrors");
// MainController.findInstance().applyFilter(f);
} catch (IOException ex) {
Exceptions.printStackTrace(ex);
}
if (MainController.findInstance().getDocument() != null) {
new OCRXMLImporter().importCandidates(MainController.findInstance().getDocument(), "C:\\Users\\Scampi_Joe\\AppData\\Local\\Temp\\document38524588653394750.ocrcxml");
LevDistance_1_Filter f = new LevDistance_1_Filter("OCRErrors");
MainController.findInstance().applyFilter(f);
}
}
}
9 changes: 4 additions & 5 deletions Actions/src/jav/gui/actions/layer.xml
Original file line number Diff line number Diff line change
Expand Up @@ -101,17 +101,16 @@
<attr name="originalFile" stringvalue="Actions/File/jav-gui-actions-MRUFilesAction.instance"/>
<attr name="position" intvalue="700"/>
</file>
<folder name="newproj">
<attr name="displayName" bundlevalue="jav.gui.actions.Bundle#newproj"/>
<!-- <folder name="newproj">-->
<file name="jav-gui-wizard-newProject-NewProjectWizardAction.shadow">
<attr name="originalFile" stringvalue="Actions/File/jav-gui-wizard-newProject-NewProjectWizardAction.instance"/>
<attr name="position" intvalue="0"/>
</file>
<file name="jav-gui-wizard-importDocument-ImportDocumentWizardAction.shadow">
<!-- <file name="jav-gui-wizard-importDocument-ImportDocumentWizardAction.shadow">
<attr name="originalFile" stringvalue="Actions/File/jav-gui-wizard-importDocument-ImportDocumentWizardAction.instance"/>
<attr name="position" intvalue="0"/>
</file>
</folder>
</file>-->
<!-- </folder>-->
<folder name="Export">
<attr name="position" intvalue="1150"/>
<file name="jav-gui-actions-ExportAsDocXML.shadow">
Expand Down
3 changes: 2 additions & 1 deletion CompleteImageView/nbproject/project.properties
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
javac.source=1.7
javac.compilerargs=-Xlint -Xlint:-serial
nbm.homepage=http://ocr.cis.uni-muenchen.de/
license.file=../LICENSE.md
nbm.homepage=http://thorstenv.github.io/PoCoTo
nbm.module.author=Thorsten Vobl (thorsten.vobl@googlemail.com)
nbm.needs.restart=true
spec.version.base=1.0.1
1 change: 0 additions & 1 deletion Concordance/manifest.mf
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
Manifest-Version: 1.0
OpenIDE-Module: jav.concordance
OpenIDE-Module-Implementation-Version: 02052013
OpenIDE-Module-Localizing-Bundle: jav/concordance/Bundle.properties
Expand Down
6 changes: 5 additions & 1 deletion Concordance/nbproject/project.properties
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
javac.source=1.7
javac.compilerargs=-Xlint -Xlint:-serial
spec.version.base=1.0
license.file=../LICENSE.md
nbm.homepage=http://thorstenv.github.io/PoCoTo
nbm.module.author=Thorsten Vobl (thorsten.vobl@googlemail.com)
nbm.needs.restart=true
spec.version.base=1.0.1
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ public void init(ArrayList<Token> t, String n) {
} else {
temp.setCandidateString("");
}
tokens.put(tok.getIndexInDocument(), temp);
tokens.put(tok.getID(), temp);
}

instance = this;
Expand Down Expand Up @@ -307,7 +307,7 @@ public void itemStateChanged(ItemEvent ie) {
if (!cce.isCorrected() && !cce.isDisabled() && cce.isSelected()) {

if(!tvRegistry.contains(tok)) {
// if (tok.getIndexInDocument() < actualPage * tokensPerPage || tok.getIndexInDocument() > actualPage * tokensPerPage + tokensInPage) {
// if (tok.getID() < actualPage * tokensPerPage || tok.getID() > actualPage * tokensPerPage + tokensInPage) {
removeSelected(1);
}

Expand Down
3 changes: 2 additions & 1 deletion Cookies/nbproject/project.properties
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
javac.source=1.7
javac.compilerargs=-Xlint -Xlint:-serial
nbm.homepage=http://ocr.cis.uni-muenchen.de/
license.file=../LICENSE.md
nbm.homepage=http://thorstenv.github.io/PoCoTo
nbm.module.author=Thorsten Vobl (thorsten.vobl@googlemail.com)
nbm.needs.restart=true
spec.version.base=1.0.1
5 changes: 4 additions & 1 deletion CorrectionBackend/nbproject/project.properties
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
javac.source=1.7
javac.compilerargs=-Xlint -Xlint:-serial
license.file=../../lizenzen/jade_display/LICENSE.txt
license.file=../LICENSE.md
nbm.homepage=http://thorstenv.github.io/PoCoTo
nbm.module.author=Thorsten Vobl (thorsten.vobl@googlemail.com)
nbm.needs.restart=true
spec.version.base=1.0.1
14 changes: 2 additions & 12 deletions CorrectionBackend/src/jav/correctionBackend/CorrectionSystem.java
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ private int newDocDatabase(String dbPath) {
// s.execute("CREATE TABLE token( tokenID IDENTITY(0), indexInDocument BIGINT, orig_id INTEGER, wOCR VARCHAR(60), wCorr VARCHAR(60), isNormal BOOLEAN, isCorrected BOOLEAN, numCands SMALLINT, cleft SMALLINT, cright SMALLINT, ctop SMALLINT, cbottom SMALLINT, special_seq VARCHAR(20), imageFile VARCHAR(200), isSuspicious BOOLEAN, pageIndex SMALLINT, topSuggestion VARCHAR(50), topCandDLev SMALLINT)");
s.execute("CREATE TABLE token( tokenID BIGINT GENERATED BY DEFAULT AS IDENTITY (START WITH 0, INCREMENT BY 1) PRIMARY KEY, indexInDocument INTEGER, orig_id INTEGER, wOCR VARCHAR(60), wCorr VARCHAR(60), isNormal BOOLEAN, isCorrected BOOLEAN, numCands SMALLINT, cleft SMALLINT, cright SMALLINT, ctop SMALLINT, cbottom SMALLINT, special_seq VARCHAR(20), imageFile VARCHAR(200), isSuspicious BOOLEAN, pageIndex SMALLINT, topSuggestion VARCHAR(50), topCandDLev SMALLINT)");
// s.execute("CREATE TABLE page( index SMALLINT GENERATED BY DEFAULT AS IDENTITY (START WITH 0, INCREMENT BY 1) PRIMARY KEY, token_index_from INTEGER, token_index_to INTEGER, imageFile VARCHAR(200))");
s.execute("CREATE TABLE candidate( tokenID BIGINT, rank SMALLINT, suggestion VARCHAR(50), interpretation VARCHAR(200), voteweight REAL, dlev TINYINT, PRIMARY KEY (tokenID, rank))");
s.execute("CREATE TABLE candidate( tokenID INTEGER, rank SMALLINT, suggestion VARCHAR(50), interpretation VARCHAR(200), voteweight REAL, dlev TINYINT, PRIMARY KEY (tokenID, rank))");
s.execute("CREATE TABLE pattern (patternID INTEGER GENERATED BY DEFAULT AS IDENTITY (START WITH 0, INCREMENT BY 1) PRIMARY KEY, leftpart VARCHAR(5), rightpart VARCHAR(5), freq INTEGER, corrected INTEGER)");
s.execute("CREATE TABLE patternoccurrence (patternID INTEGER, part INTEGER, PRIMARY KEY (patternID, part), wocr_lc VARCHAR(50), wsuggestion VARCHAR(50), freq INTEGER, corrected INTEGER)");
s.execute("CREATE TABLE undoredo( operation_id SMALLINT, part SMALLINT, type VARCHAR(10), PRIMARY KEY(operation_id, part, type), edit_type VARCHAR(20), sql_command VARCHAR(100))");
Expand All @@ -102,17 +102,7 @@ private int newDocDatabase(String dbPath) {
ex.printStackTrace();
}
return retval;
}

public void clearCandidates() {
try {
Statement s = jcp.getConnection().createStatement();
s.execute("TRUNCATE TABLE candidate");
s.close();
} catch (SQLException ex) {
Logger.getLogger(CorrectionSystem.class.getName()).log(Level.SEVERE, null, ex);
}
}
}

public int newDocumentFromOCRCXML( String dbPath, String ocrcxmlfile, String imagedir, ProgressHandle ph ) {
int retval = -1;
Expand Down
18 changes: 10 additions & 8 deletions CorrectionBackend/src/jav/correctionBackend/DefaultDocument.java
Original file line number Diff line number Diff line change
Expand Up @@ -136,29 +136,31 @@ protected void loadNumberOfTokensFromDB() {
}

@Override
public ArrayList<Integer> deleteToken(int iDFrom, int iDAfterTo) throws SQLException {
public ArrayList<Integer> deleteToken(int iDFrom, int iDTo) throws SQLException {

Connection conn = null;
PreparedStatement setIndex = null;
PreparedStatement moveIndex = null;
PreparedStatement undo_redo = null;

Token from = this.getTokenByID(iDFrom);
Token to = this.getTokenByID(iDTo);

int indexFrom = this.getTokenByID(iDFrom).getIndexInDocument();
int indexAfterTo = this.getTokenByID(iDAfterTo).getIndexInDocument();
int indexFrom = from.getIndexInDocument();
int indexTo = to.getIndexInDocument();

try {
ArrayList<Integer> retval = new ArrayList<>();
if (indexAfterTo < indexFrom) {
if (indexTo < indexFrom) {
return null;
// throw new OCRCException("JAV.DOCUMENT.DELETETOKEN invalid range");
}

if (indexFrom == indexAfterTo) {
if (indexFrom == indexTo) {
return null;
}

int thisPageIndex = this.getTokenByIndex(indexFrom).getPageIndex();
if (thisPageIndex != this.getTokenByIndex(indexAfterTo - 1).getPageIndex()) {
if ( from.getPageIndex() != to.getPageIndex()) {
return null;
// throw new OCRCException("JAV.DOCUMENT.DELETETOKEN: cannot erase across page borders");
}
Expand All @@ -172,7 +174,7 @@ public ArrayList<Integer> deleteToken(int iDFrom, int iDAfterTo) throws SQLExcep
undo_redo = conn.prepareStatement("INSERT INTO undoredo VALUES( ?,?,?,?,? )");

int i;
for (i = indexFrom; i < indexAfterTo; i++) {
for (i = indexFrom; i <= indexTo; i++) {
Token temp = this.getTokenByIndex(i);
retval.add(temp.getID());

Expand Down
36 changes: 26 additions & 10 deletions CorrectionBackend/src/jav/correctionBackend/Document.java
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ protected void addCandidate(Candidate c) {
Connection conn = null;
try {
conn = jcp.getConnection();
PreparedStatement prep = conn.prepareStatement("INSERT INTO CANDIDATE VALUES( ?,?,?,?,?,? )");
PreparedStatement prep = conn.prepareStatement("INSERT INTO candidate VALUES( ?,?,?,?,?,? )");
prep.setInt(1, c.getTokenID());
prep.setInt(2, c.getRank());
prep.setString(3, c.getSuggestion());
Expand All @@ -102,6 +102,8 @@ protected void addCandidate(Candidate c) {

prep.addBatch();
prep.executeBatch();
prep.close();
conn.close();
} catch (SQLException ex) {
}
}
Expand All @@ -110,7 +112,7 @@ protected void addPattern(Pattern p) {
Connection conn = null;
try {
conn = jcp.getConnection();
PreparedStatement prep = conn.prepareStatement("INSERT INTO PATTERN VALUES( null, ?, ?, ?, ? )");
PreparedStatement prep = conn.prepareStatement("INSERT INTO pattern VALUES( null, ?, ?, ?, ? )");
prep.setString(1, p.getLeft());
prep.setString(2, p.getRight());
prep.setInt(3, p.getOccurencesN());
Expand All @@ -128,7 +130,7 @@ protected void addPatternOccurrence(PatternOccurrence po) {
Connection conn = null;
try {
conn = jcp.getConnection();
PreparedStatement prep = conn.prepareStatement("INSERT INTO PATTERNOCCURRENCE VALUES( ?, ?, ?, ?, ?, ? )");
PreparedStatement prep = conn.prepareStatement("INSERT INTO patternoccurrence VALUES( ?, ?, ?, ?, ?, ? )");
prep.setInt(1, po.getPatternID());
prep.setInt(2, po.getPart());
prep.setString(3, po.getWOCR_LC());
Expand All @@ -143,8 +145,21 @@ protected void addPatternOccurrence(PatternOccurrence po) {
} catch (SQLException ex) {
}
}

public void clearPatterns() {
Connection conn = null;
try {
conn = jcp.getConnection();
Statement s = conn.createStatement();
s.executeUpdate("TRUNCATE TABLE pattern");
s.executeUpdate("TRUNCATE TABLE patternoccurrence");
s.close();
conn.close();
} catch (SQLException ex) {
}
}

public void truncateCandidates() {
public void clearCandidates() {
Connection conn = null;
try {
conn = jcp.getConnection();
Expand Down Expand Up @@ -1516,14 +1531,15 @@ public ArrayList<Integer> mergeRightward(int iD) throws SQLException {
if (next == null) {
return null;
}

boolean skipSpace = false;
// decide if immediate neighbour should be skipped,
// e.g. if it contains just whitespace
if (next.getWDisplay().equals(" ")) {
end = this.getNextToken(next.getID());
if (end == null) {
try {
// delete whitespace at end of document (token after whitespace == null)
this.deleteToken(next.getID());
} catch (SQLException ex) {
}
Expand All @@ -1535,7 +1551,7 @@ public ArrayList<Integer> mergeRightward(int iD) throws SQLException {
return this.mergeRightward(iD, (skipSpace ? 2 : 1));
}

public abstract ArrayList<Integer> mergeRightward(int iD, int numTok) throws SQLException;
public abstract ArrayList<Integer> mergeRightward(int iD, int numToMerge) throws SQLException;

public void setSuspicious(int tokenID, String val) {
try {
Expand Down Expand Up @@ -1779,7 +1795,7 @@ public boolean setCorrected(ArrayList<Integer> tokenIDs, boolean b) throws SQLEx
}

public HashMap<String, OCRErrorInfo> computeErrorFreqList() {
HashMap<String, OCRErrorInfo> freqList = new HashMap<String, OCRErrorInfo>();
HashMap<String, OCRErrorInfo> freqList = new HashMap<>();
Iterator<Token> it = this.tokenIterator();
while (it.hasNext()) {
Token tok = it.next();
Expand Down Expand Up @@ -1967,9 +1983,9 @@ protected CandidateIterator(Connection c, int tokenID) {
try {
conn = c;
s = conn.createStatement();
rs = s.executeQuery("SELECT * FROM CANDIDATE WHERE tokenID =" + tokenID + "ORDER BY rank ASC");
rs = s.executeQuery("SELECT * FROM candidate WHERE tokenID=" + tokenID + " ORDER BY rank ASC");
} catch (SQLException ex) {
Logger.getLogger(TokenIterator.class.getName()).log(Level.SEVERE, null, ex);
ex.printStackTrace();
}
}

Expand Down Expand Up @@ -2167,7 +2183,7 @@ protected PatternOccurrenceIterator(Connection c, int patternID) {
try {
conn = c;
s = conn.createStatement();
rs = s.executeQuery("SELECT * FROM PATTERNOCCURRENCE WHERE patternID =" + patternID + " ORDER BY freq ASC");
rs = s.executeQuery("SELECT * FROM PATTERNOCCURRENCE WHERE patternID=" + patternID + " ORDER BY freq ASC");
} catch (SQLException ex) {
Logger.getLogger(TokenIterator.class.getName()).log(Level.SEVERE, null, ex);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ public void parse(Document d, String f) {

if (d != null) {
this.doc = d;
this.doc.truncateCandidates();
this.doc.clearCandidates();
XMLReader xr;
try {
xr = XMLReaderFactory.createXMLReader();
Expand Down Expand Up @@ -169,6 +169,7 @@ public void endElement(String uri, String nname, String qname) {
if (matcher.matches()) {
rank++;
tempcand = new Candidate(tokenID, rank, matcher.group(1), matcher.group(2), Double.parseDouble(matcher.group(3)), Integer.parseInt(matcher.group(4)));
System.out.println("Adding candidate: " + tokenID + " " + rank + " " + matcher.group(1) + " " + matcher.group(2));
doc.addCandidate(tempcand);
if (rank == 1) {
doc.setTopCandDLev(tokenID, Integer.parseInt(matcher.group(4)));
Expand Down
Loading

0 comments on commit a380ef5

Please sign in to comment.