forked from yql/yql-tables
/
deepdapper.xml
65 lines (65 loc) · 3.65 KB
/
deepdapper.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
<?xml version="1.0" encoding="UTF-8"?>
<table xmlns="http://query.yahooapis.com/v1/schema/table.xsd">
<meta>
<author>Vic Mortelmans</author>
<description>The table exctracts data from multiple linked webpages using user-provided dapper queries. Where url="the starting page" and listdapper="the name of a dapper that parses hyperlinks to datapages" and nextdapper="the name of a dapper that parses the link to the next page" and nextcount="the maximum number of pages that is accessed" and datadapper="the name of a dapper that parses the data the will be returned by the query". Providing nextdapper, nextcount and listdapper is optional.</description>
<sampleQuery>use 'http://github.com/vicmortelmans/yql-tables/raw/master/data/deepdapper.xml' as deepdapper;
select * from deepdapper where url = "http://developer.yahoo.net/forum/?s=50b9662690d907227e122678bdf36b1c&showforum=41&cookiecheckonly=1"
and datadapper = "ydnforumdata"
and nextdapper = "ydnforumnext"
and listdapper = "ydnforumlist"
and nextcount = 2</sampleQuery>
<documentationURL>http://docs.google.com/View?id=ddq89pzk_199f54jdwcb</documentationURL>
</meta>
<bindings>
<select produces="XML" itemPath="">
<urls>
<url></url>
</urls>
<inputs>
<key id="url" type="xs:string" paramType="variable" required="true"/>
<key id="datadapper" type="xs:string" paramType="variable" required="true"/>
<key id="nextdapper" type="xs:string" paramType="variable" required="false"/>
<key id="nextcount" type="xs:integer" paramType="variable" required="false"/>
<key id="listdapper" type="xs:string" paramType="variable" required="false"/>
</inputs>
<execute><![CDATA[
var datacalls = [];
var next = <><item href={url}/></>; // XML list
nextcount = nextcount?nextcount:Number.MAX_VALUE;
while (next.length() > 0 && nextcount-- > 0) {
if (listdapper) {
var listquery = 'select href,content from xml where itemPath="//item" and url="http://www.dapper.net/RunDapp?dappName=$dapper&v=1&applyToUrl=$url"';
listquery = listquery.replace("$dapper",escape(listdapper));
listquery = listquery.replace("$url",escape(next[0].@href));
var list = y.query(listquery).results.*; // XML list
for each (var linkitem in list) {
var dataquery = 'select * from xml where itemPath="//item" and url="http://www.dapper.net/RunDapp?dappName=$dapper&v=1&applyToUrl=$url"';
dataquery = dataquery.replace("$dapper",escape(datadapper));
dataquery = dataquery.replace("$url",escape(linkitem.@href));
datacalls.push(y.query(dataquery));
}
} else {
var dataquery = 'select * from xml where itemPath="//item" and url="http://www.dapper.net/RunDapp?dappName=$dapper&v=1&applyToUrl=$url"';
dataquery = dataquery.replace("$dapper",escape(datadapper));
dataquery = dataquery.replace("$url",escape(next[0].@href));
datacalls.push(y.query(dataquery));
}
if (nextdapper) {
var nextquery = 'select href,content from xml where itemPath="//item" and url="http://www.dapper.net/RunDapp?dappName=$dapper&v=1&applyToUrl=$url"';
nextquery = nextquery.replace("$dapper",escape(nextdapper));
nextquery = nextquery.replace("$url",escape(next[0].@href));
next = y.query(nextquery).results.*; // XML list
} else {
next = <></>;
}
}
var data = <data/>; // XML
for each (var datacall in datacalls) {
data.appendChild(datacall.results.*);
}
response.object = data;
]]></execute>
</select>
</bindings>
</table>