diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000..380d2e2
Binary files /dev/null and b/.DS_Store differ
diff --git a/ML-Crawler/.DS_Store b/ML-Crawler/.DS_Store
new file mode 100644
index 0000000..1ff656f
Binary files /dev/null and b/ML-Crawler/.DS_Store differ
diff --git a/ML-Crawler/ML-Crawler.jar b/ML-Crawler/ML-Crawler.jar
new file mode 100644
index 0000000..b3873f2
Binary files /dev/null and b/ML-Crawler/ML-Crawler.jar differ
diff --git a/ML-Crawler/configurations/.DS_Store b/ML-Crawler/configurations/.DS_Store
new file mode 100644
index 0000000..5008ddf
Binary files /dev/null and b/ML-Crawler/configurations/.DS_Store differ
diff --git a/ML-Crawler/configurations/sample-setup.xml b/ML-Crawler/configurations/sample-setup.xml
new file mode 100644
index 0000000..41b0b23
--- /dev/null
+++ b/ML-Crawler/configurations/sample-setup.xml
@@ -0,0 +1,40 @@
+
+
+
+
+
+
+
+
+ http://topsy.com/s?q=Nyquil
+ //a[starts-with(., 'next')]/@href
+ //span[@class="twitter-post-text translatable language-en"]
+ 10
+
+
+
+
+
+ ]]>
+
+
+
+
+
+
+ {normalize-space($desc)}
+ {normalize-space($user)}
+
+ ]]>
+
+
+
+ ]]>
+
+
+
\ No newline at end of file
diff --git a/ML-Crawler/readme.txt b/ML-Crawler/readme.txt
new file mode 100644
index 0000000..0eec362
--- /dev/null
+++ b/ML-Crawler/readme.txt
@@ -0,0 +1,61 @@
+================== ML Crawler ==================
+This crawler wrapes the functionality provided by open-source crawler.
+It takes crawling instructions in xml format and returns returns the
+crawling results in XML format for ML Ingestion
+ *** For internal use only ***
+
+Steps:
+1. Unzip the folder
+2. Folder Structure:
+ a) ML-Crawler.jar: Jar file which wrapes the functionality provided
+ by the open source crawler.
+ b) configurations:
+ setup files which should not be touched
+ i) functions.xml
+ ii) xquery.xml
+ iii) crawler.xml
+ Along with the setup files mentioned above, this folder is also the placeholder for the crawling instructions
+ This folder is the place-holder for the crawling-instruction (e.g. sample-setup.xml).
+ Crawler required the following items to be configured prior to executing, for specifying the items, you would need to do a 'view-source' on the html page
+
+
+
+
+
+
+
+
+ http://topsy.com/s?q=Nyquil
+ //a[starts-with(., 'next')]/@href
+ //span[@class="twitter-post-text translatable language-en"]
+ 10
+
+
+
+
+
+
+
+ ]]>
+
+
+
+
+
+
+ let $desc := data($item//*[@class='twitter-post-text translatable language-en'])
+ let $user := data($item//*[@class='author-name'])
+ return
+
+ {normalize-space($desc)}
+ {normalize-space($user)}
+
+ ]]>
+
+
+
+ ]]>
+
+
\ No newline at end of file
diff --git a/ML-Crawler/sample/.DS_Store b/ML-Crawler/sample/.DS_Store
new file mode 100644
index 0000000..7316d3b
Binary files /dev/null and b/ML-Crawler/sample/.DS_Store differ
diff --git a/ML-Crawler/sample/configurations/nyquil.xml b/ML-Crawler/sample/configurations/nyquil.xml
new file mode 100644
index 0000000..13ce730
--- /dev/null
+++ b/ML-Crawler/sample/configurations/nyquil.xml
@@ -0,0 +1,38 @@
+
+
+
+
+
+
+ http://topsy.com/s?q=Nyquil
+ //a[starts-with(., 'next')]/@href
+ //span[@class="twitter-post-text translatable language-en"]
+ 10
+
+
+
+
+
+ ]]>
+
+
+
+
+
+
+ {normalize-space($desc)}
+ {normalize-space($user)}
+
+ ]]>
+
+
+
+ ]]>
+
+
+
\ No newline at end of file
diff --git a/ML-Crawler/setup/crawler.xml b/ML-Crawler/setup/crawler.xml
new file mode 100644
index 0000000..430b22d
--- /dev/null
+++ b/ML-Crawler/setup/crawler.xml
@@ -0,0 +1,86 @@
+
+
+
+
+
+ http://web-harvest.sourceforge.net/index.php
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/ML-Crawler/setup/functions.xml b/ML-Crawler/setup/functions.xml
new file mode 100644
index 0000000..4c53097
--- /dev/null
+++ b/ML-Crawler/setup/functions.xml
@@ -0,0 +1,41 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ${sys.fullUrl(pageUrl.toString(), nextLinkUrl.toString())}
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/ML-Crawler/setup/xquery.xml b/ML-Crawler/setup/xquery.xml
new file mode 100644
index 0000000..207b2a3
--- /dev/null
+++ b/ML-Crawler/setup/xquery.xml
@@ -0,0 +1,80 @@
+
+
+
+
+
+
+ true
+ 12
+
+ 3
+ 7
+ 14
+ 18
+ 27
+
+ ABCDEFGH123456
+
+
+ one |
+ two |
+ tree |
+
+
+ four |
+ five |
+ six |
+
+
+ ]]>
+
+
+ { math:sqrt($num) + 1 }
+
+ };
+
+ (: resuting XML :)
+
+
+ { if ($logicvalue) then 1 else 2 },
+ { $logicvalue eq ($numbervalue gt 15) }
+
+
+ { $numbervalue * 2 + 10 },
+ { $numbervalue instance of xs:float },
+ { round($numbervalue) }
+
+
+ { concat($stringvalue, $logicvalue, $numbervalue) }
+
+
+ { fn:myFunc($intseq) }
+ { concat($intseq[1], "mama") }
+
+
+ { for $td in $doc//td return $td }
+
+
+ ]]>
+
+
+
+
\ No newline at end of file