Browse files

Initial commit

  • Loading branch information...
0 parents commit 671edd31661aba78b33c1f9cca7069ead9864b95 @dakrone committed Sep 2, 2010
4 .gitignore
@@ -0,0 +1,4 @@
+pom.xml
+*jar
+lib
+classes
27 README.markdown
@@ -0,0 +1,27 @@
+# syndicate
+
+Fun with NLP and synonyms
+
+## Usage
+
+Replace the apikey var in src/syndicate/core.clj with your api key from http://words.bighugelabs.com/api.php
+
+ lein deps
+ lein repl
+
+ (use 'syndicate.core)
+ (replace-text "Once she got that second acceptance, she began to show little ways that her identification with Helga wasn't as complete as she said. She felt free, bit by bit, to acustom me to a personality that wasn't Helga's, but her own.")
+
+ "Once she got that second toleration , she began to render little structure that her personal identity with Helga was n't as complete as she said . She matte free , bit by public presentation , to acustom me to a attribute that was n't Helga 's , but her own ."
+
+Probably not the best example sentence, but the source is easy enough to figure out how it works.
+
+## Installation
+
+Why on earth would you want to install this? It's just for fun.
+
+## License
+
+Copyright (C) 2010 Lee Hinman
+
+Distributed under the BSD license.
BIN models/EnglishSD.bin.gz
Binary file not shown.
BIN models/EnglishTok.bin.gz
Binary file not shown.
BIN models/tag.bin.gz
Binary file not shown.
7 project.clj
@@ -0,0 +1,7 @@
+(defproject syndicate "1.0.0-SNAPSHOT"
+ :description "Fun with synonyms."
+ :dependencies [[org.clojure/clojure "1.2.0"]
+ [org.clojure/clojure-contrib "1.2.0"]
+ [clj-http "0.1.1"]
+ [org.danlarkin/clojure-json "1.1"]
+ [org.clojars.thnetos/opennlp "0.0.5"]])
77 src/syndicate/core.clj
@@ -0,0 +1,77 @@
+(ns syndicate.core
+ (:require [org.danlarkin.json :as json])
+ (:require [clj-http.client :as http])
+ (:require [opennlp.nlp :as nlp])
+ (:require [opennlp.tools.filters :as filters]))
+
+
+(def *apikey* "YOUR-API-KEY")
+
+(if (= *apikey* "YOUR-API-KEY")
+ (do
+ (println "Replace 'YOUR-API-KEY' in src/syndicate/core.clj with your api key. You can get an API key here: http://words.bighugelabs.com/api.php")
+ (flush)))
+
+(def *bighuge* (str "http://words.bighugelabs.com/api/2/" *apikey* "/"))
+(def *debug* false)
+
+(defn get-words
+ [word]
+ (if-not (nil? word)
+ (try
+ (json/decode-from-str (:body (http/get (str *bighuge* word "/json"))))
+ (catch Exception e
+ nil))))
+
+
+(defn replace-noun
+ [word]
+ (let [candidates (:syn (:noun (get-words word)))
+ new-word (get candidates (rand-int (count candidates)))]
+ (if *debug*
+ (println "swapping" word "for" new-word))
+ new-word))
+
+
+(defn replace-verb
+ [word]
+ (let [candidates (:syn (:verb (get-words word)))
+ new-word (get candidates (rand-int (count candidates)))]
+ (if *debug*
+ (println "swapping" word "for" new-word))
+ new-word))
+
+
+(defn replace-word
+ [word tag]
+ (if (> (count (str word)) 2)
+ (let [new-word (cond
+ (re-matches #"N.*" (str tag)) (replace-noun (str word))
+ (re-matches #"V.*" (str tag)) (replace-verb (str word))
+ :else word)]
+ (if new-word
+ new-word
+ word))
+ word))
+
+
+(defn replace-sentence
+ [sentence]
+ (map #(replace-word (first %) (second %)) sentence))
+
+
+(defn replace-text
+ [text]
+ (let [get-sentences (nlp/make-sentence-detector "models/EnglishSD.bin.gz")
+ tokenize (nlp/make-tokenizer "models/EnglishTok.bin.gz")
+ pos-tag (nlp/make-pos-tagger "models/tag.bin.gz")
+ sentences (get-sentences text)
+ _ (if *debug* (println sentences))
+ tokens (map tokenize sentences)
+ _ (if *debug* (println tokens))
+ taggedwords (map pos-tag tokens)
+ _ (if *debug* (println taggedwords))
+ newwords (map replace-sentence taggedwords)
+ _ (if *debug* (println newwords))]
+ (reduce #(str %1 " " %2) (flatten newwords))))
+
6 test/syndicate/test/core.clj
@@ -0,0 +1,6 @@
+(ns syndicate.test.core
+ (:use [syndicate.core] :reload)
+ (:use [clojure.test]))
+
+(deftest replace-me ;; FIXME: write
+ (is false "No tests have been written."))

0 comments on commit 671edd3

Please sign in to comment.