Permalink
Browse files

Add auxiliary json files for spacy

  • Loading branch information...
1 parent cb4c2c3 commit 58b898dec52320025cbb4197d166e934aac9b276 Matthew Honnibal committed Mar 8, 2016
Showing with 233 additions and 0 deletions.
  1. +194 −0 spacy_data/vocab/gazetteer.json
  2. +38 −0 spacy_data/vocab/lemma_rules.json
  3. +1 −0 spacy_data/vocab/oov_prob
@@ -0,0 +1,194 @@
+{
+ "Reddit": [
+ "PRODUCT",
+ {},
+ [
+ [{"lower": "reddit"}]
+ ]
+ ],
+ "SeptemberElevenAttacks": [
+ "EVENT",
+ {},
+ [
+ [
+ {"orth": "9/11"}
+ ],
+ [
+ {"lower": "september"},
+ {"orth": "11"}
+ ]
+ ]
+ ],
+ "Linux": [
+ "PRODUCT",
+ {},
+ [
+ [{"lower": "linux"}]
+ ]
+ ],
+ "Haskell": [
+ "PRODUCT",
+ {},
+ [
+ [{"lower": "haskell"}]
+ ]
+ ],
+ "HaskellCurry": [
+ "PERSON",
+ {},
+ [
+ [
+ {"lower": "haskell"},
+ {"lower": "curry"}
+ ]
+ ]
+ ],
+ "Javascript": [
+ "PRODUCT",
+ {},
+ [
+ [{"lower": "javascript"}]
+ ]
+ ],
+ "CSS": [
+ "PRODUCT",
+ {},
+ [
+ [{"lower": "css"}],
+ [{"lower": "css3"}]
+ ]
+ ],
+ "displaCy": [
+ "PRODUCT",
+ {},
+ [
+ [{"lower": "displacy"}]
+ ]
+ ],
+ "spaCy": [
+ "PRODUCT",
+ {},
+ [
+ [{"orth": "spaCy"}]
+ ]
+ ],
+
+ "HTML": [
+ "PRODUCT",
+ {},
+ [
+ [{"lower": "html"}],
+ [{"lower": "html5"}]
+ ]
+ ],
+ "Python": [
+ "PRODUCT",
+ {},
+ [
+ [{"orth": "Python"}]
+ ]
+ ],
+ "Ruby": [
+ "PRODUCT",
+ {},
+ [
+ [{"orth": "Ruby"}]
+ ]
+ ],
+ "Digg": [
+ "PRODUCT",
+ {},
+ [
+ [{"lower": "digg"}]
+ ]
+ ],
+ "FoxNews": [
+ "ORG",
+ {},
+ [
+ [{"orth": "Fox"}],
+ [{"orth": "News"}]
+ ]
+ ],
+ "Google": [
+ "ORG",
+ {},
+ [
+ [{"lower": "google"}]
+ ]
+ ],
+ "Mac": [
+ "PRODUCT",
+ {},
+ [
+ [{"lower": "mac"}]
+ ]
+ ],
+ "Wikipedia": [
+ "PRODUCT",
+ {},
+ [
+ [{"lower": "wikipedia"}]
+ ]
+ ],
+ "Windows": [
+ "PRODUCT",
+ {},
+ [
+ [{"orth": "Windows"}]
+ ]
+ ],
+ "Dell": [
+ "ORG",
+ {},
+ [
+ [{"lower": "dell"}]
+ ]
+ ],
+ "Facebook": [
+ "ORG",
+ {},
+ [
+ [{"lower": "facebook"}]
+ ]
+ ],
+ "Blizzard": [
+ "ORG",
+ {},
+ [
+ [{"orth": "Blizzard"}]
+ ]
+ ],
+ "Ubuntu": [
+ "ORG",
+ {},
+ [
+ [{"orth": "Ubuntu"}]
+ ]
+ ],
+ "Youtube": [
+ "PRODUCT",
+ {},
+ [
+ [{"lower": "youtube"}]
+ ]
+ ],
+ "false_positives": [
+ null,
+ {},
+ [
+ [{"orth": "Shit"}],
+ [{"orth": "Weed"}],
+ [{"orth": "Cool"}],
+ [{"orth": "Btw"}],
+ [{"orth": "Bah"}],
+ [{"orth": "Bullshit"}],
+ [{"orth": "Lol"}],
+ [{"orth": "Yo"}, {"lower": "dawg"}],
+ [{"orth": "Yay"}],
+ [{"orth": "Ahh"}],
+ [{"orth": "Yea"}],
+ [{"orth": "Bah"}]
+ ]
+ ]
+}
@@ -0,0 +1,38 @@
+{
+ "noun": [
+ ["s", ""],
+ ["ses", "s"],
+ ["ves", "f"],
+ ["xes", "x"],
+ ["zes", "z"],
+ ["ches", "ch"],
+ ["shes", "sh"],
+ ["men", "man"],
+ ["ies", "y"]
+ ],
+
+ "verb": [
+ ["s", ""],
+ ["ies", "y"],
+ ["es", "e"],
+ ["es", ""],
+ ["ed", "e"],
+ ["ed", ""],
+ ["ing", "e"],
+ ["ing", ""]
+ ],
+
+ "adj": [
+ ["er", ""],
+ ["est", ""],
+ ["er", "e"],
+ ["est", "e"]
+ ],
+
+ "punct": [
+ ["", "\""],
+ ["", "\""],
+ ["\u2018", "'"],
+ ["\u2019", "'"]
+ ]
+}
@@ -0,0 +1 @@
+-19.502029

0 comments on commit 58b898d

Please sign in to comment.