Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
16 changed files
with
490 additions
and
104 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
15 changes: 15 additions & 0 deletions
15
src/main/java/de/grundid/twiki/jpa/HibernateJpaVendorAdapter.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
package de.grundid.twiki.jpa; | ||
|
||
import java.util.Map; | ||
|
||
import org.hibernate.cfg.ImprovedNamingStrategy; | ||
|
||
public class HibernateJpaVendorAdapter extends org.springframework.orm.jpa.vendor.HibernateJpaVendorAdapter { | ||
|
||
@Override | ||
public Map<String, Object> getJpaPropertyMap() { | ||
Map<String, Object> map = super.getJpaPropertyMap(); | ||
map.put("hibernate.ejb.naming_strategy", ImprovedNamingStrategy.class.getName()); | ||
return map; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
package de.grundid.twiki.jpa; | ||
|
||
import javax.persistence.Entity; | ||
import javax.persistence.GeneratedValue; | ||
import javax.persistence.Id; | ||
|
||
@Entity | ||
public class WikiEntry { | ||
|
||
@Id | ||
@GeneratedValue | ||
private Integer wikiEntryId; | ||
private String source; | ||
private String title; | ||
private String category; | ||
private String entry; | ||
|
||
public Integer getWikiEntryId() { | ||
return wikiEntryId; | ||
} | ||
|
||
public void setWikiEntryId(Integer wikiEntryId) { | ||
this.wikiEntryId = wikiEntryId; | ||
} | ||
|
||
public String getSource() { | ||
return source; | ||
} | ||
|
||
public void setSource(String source) { | ||
this.source = source; | ||
} | ||
|
||
public String getTitle() { | ||
return title; | ||
} | ||
|
||
public void setTitle(String title) { | ||
this.title = title; | ||
} | ||
|
||
public String getCategory() { | ||
return category; | ||
} | ||
|
||
public void setCategory(String category) { | ||
this.category = category; | ||
} | ||
|
||
public String getEntry() { | ||
return entry; | ||
} | ||
|
||
public void setEntry(String entry) { | ||
this.entry = entry; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
package de.grundid.twiki.jpa; | ||
|
||
import org.springframework.data.jpa.repository.JpaRepository; | ||
|
||
public interface WikiEntryRepository extends JpaRepository<WikiEntry, Integer> { | ||
|
||
} |
46 changes: 0 additions & 46 deletions
46
src/main/java/de/grundid/twiki/parser/DbWriterConsumer.java
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
package de.grundid.twiki.parser; | ||
|
||
import java.util.HashSet; | ||
import java.util.Set; | ||
|
||
public class WiktionaryData { | ||
|
||
public static String[] partsOfSpeech = { "Substantiv", "KonjugierteForm", "DeklinierteForm", "PartizipI", | ||
"Adjektiv", "Verb", "Abkürzung", "Redewendung", "Adverb", "PartizipII", "Wortverbindung", | ||
"ErweiterterInfinitiv", "Numerale", "Toponym", "Nachname", "Personalpronomen" }; | ||
|
||
public static String[] partsOfSpeechAll = { "Substantiv", "KonjugierteForm", "DeklinierteForm", "PartizipI", | ||
"Adjektiv", "Verb", "Abkürzung", "Redewendung", "Adverb", "PartizipII", "Wortverbindung", | ||
"ErweiterterInfinitiv", "Numerale", "Toponym", "Nachname", "Personalpronomen", "Interjektion", | ||
"Präposition", "Sprichwort", "Konjunktion", "GebundenesLexem", "Präfix", "Gerundium", "Eigenname", | ||
"Suffix", "Vorname", "Ortsnamen-Grundwort", "Hiragana", "Indefinitpronomen", "Grußformel", "Pronomen", | ||
"Demonstrativpronomen", "Zahlzeichen", "Possessivpronomen", "Artikel", "Partikel", "Buchstabe", | ||
"Interrogativpronomen", "Kontraktion", "Subjunktion", "Symbol", "Reflexivpronomen", "Komparativ", | ||
"Onomatopoetikum", "Umschrift", "Hilfsverb", "Interrogativadverb", "Relativpronomen", "Zahl", | ||
"Gradpartikel", "Pronominaladverb", "Merkspruch", "Affix", "Antwortpartikel", "Konjunktionaladverb", | ||
"Superlativ", "Modalpartikel", "Negationspartikel", "Postposition", "Präfixoid", | ||
"ReflexivesPersonalpronomen", "Satzzeichen", "Fokuspartikel", "Hanzi", "Katakana", | ||
"KontraktionausPräpositionundArtikel", "ReflexivesPossessivpronomen", "Reziprokpronomen", | ||
"Singularetantum", "Suffixoid", "Zahladjektiv", "Zahlklassifikator" }; | ||
|
||
public static Set<String> ignorePrefixes = new HashSet<String>(); | ||
static { | ||
ignorePrefixes.add("MediaWiki:"); | ||
ignorePrefixes.add("Wiktionary:"); | ||
|
||
ignorePrefixes.add("Datei:"); | ||
ignorePrefixes.add("Hilfe:"); | ||
ignorePrefixes.add("Vorlage:"); | ||
ignorePrefixes.add("Kategorie:"); | ||
ignorePrefixes.add("Thesaurus:"); | ||
ignorePrefixes.add("Verzeichnis:"); | ||
|
||
ignorePrefixes.add("Appendix:"); | ||
ignorePrefixes.add("Category:"); | ||
ignorePrefixes.add("Help:"); | ||
ignorePrefixes.add("Template:"); | ||
ignorePrefixes.add("Rhymes:"); | ||
ignorePrefixes.add("Rhmyes:"); | ||
ignorePrefixes.add("Sign gloss:"); | ||
ignorePrefixes.add("Summary:"); | ||
ignorePrefixes.add("Thread:"); | ||
ignorePrefixes.add("Transwiki:"); | ||
ignorePrefixes.add("Unsupported titles/"); | ||
ignorePrefixes.add("Wikisaurus:"); | ||
ignorePrefixes.add("Citations:"); | ||
ignorePrefixes.add("Concordance:"); | ||
ignorePrefixes.add("File:"); | ||
ignorePrefixes.add("Glossary:"); | ||
ignorePrefixes.add("Index:"); | ||
ignorePrefixes.add("Wiktionary talk:"); | ||
} | ||
|
||
public static boolean isPrefixed(String title) { | ||
return getPrefixIfAny(title) != null; | ||
} | ||
|
||
public static String getPrefixIfAny(String title) { | ||
for (String prefix : WiktionaryData.ignorePrefixes) { | ||
if (title.startsWith(prefix)) { | ||
return prefix; | ||
} | ||
} | ||
return null; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2 changes: 1 addition & 1 deletion
2
...ava/de/grundid/twiki/parser/Consumer.java → ...undid/twiki/parser/consumer/Consumer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
42 changes: 42 additions & 0 deletions
42
src/main/java/de/grundid/twiki/parser/consumer/DbWriterConsumer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
package de.grundid.twiki.parser.consumer; | ||
|
||
import org.springframework.beans.factory.annotation.Autowired; | ||
import org.springframework.stereotype.Component; | ||
|
||
import de.grundid.twiki.jpa.WikiEntry; | ||
import de.grundid.twiki.jpa.WikiEntryRepository; | ||
import de.grundid.twiki.parser.WiktionaryData; | ||
import de.grundid.twiki.parser.WiktionaryEntry; | ||
|
||
@Component | ||
public class DbWriterConsumer extends Consumer<WiktionaryEntry> { | ||
|
||
@Autowired | ||
private WikiEntryRepository wikiEntryRepository; | ||
|
||
private String currentSource; | ||
|
||
@Override | ||
protected void consume(WiktionaryEntry element) { | ||
|
||
WikiEntry wikiEntry = new WikiEntry(); | ||
wikiEntry.setSource(currentSource); | ||
wikiEntry.setTitle(element.getTitle()); | ||
wikiEntry.setEntry(element.getText()); | ||
wikiEntry.setCategory("_word_"); | ||
|
||
for (String prefix : WiktionaryData.ignorePrefixes) { | ||
if (element.getTitle().startsWith(prefix)) { | ||
wikiEntry.setCategory(prefix); | ||
break; | ||
} | ||
} | ||
|
||
wikiEntryRepository.save(wikiEntry); | ||
|
||
} | ||
|
||
public void setCurrentSource(String currentSource) { | ||
this.currentSource = currentSource; | ||
} | ||
} |
Oops, something went wrong.