Skip to content

Commit

Permalink
Added popularity to html tags
Browse files Browse the repository at this point in the history
  • Loading branch information
atg committed Apr 26, 2012
1 parent 71f7792 commit c172902
Show file tree
Hide file tree
Showing 10 changed files with 1,316 additions and 11 deletions.
2 changes: 2 additions & 0 deletions build.sh
Expand Up @@ -8,3 +8,5 @@ cd docs && python compile.py && cd ..
cd completions && python compile.py && cd ..
cd SPLDB && python compiler.py && cd ..

cd extended-completions && python parseattributes.py >html.min.json && cd ..
cd extended-completions && python cssgen.py >css.min.json && cd ..
2 changes: 1 addition & 1 deletion docs/docs.json
@@ -1 +1 @@
{"scheme":[{"home":"http://docs.racket-lang.org/","google":"http://docs.racket-lang.org/reference","name":"Racket"}],"objc":[{"google":"developer.apple.com/library/mac","name":"Objective-C (Mac)"},{"google":"developer.apple.com/library/ios","name":"Objective-C (iOS)"}],"haskell":[{"home":"http://haskell.org/ghc/docs/7.0-latest/html/libraries/index.html","search":"http://www.haskell.org/hoogle/?hoogle=%%","name":"Hoogle"}],"io":[{"home":"http://www.iolanguage.com/scm/io/docs/IoGuide.html","google":"iolanguage.com/scm/io/docs/reference","name":"Io"}],"go":[{"google":"golang.org","name":"Go"}],"haml":[{"home":"haml-lang.com/docs/yardoc/file.HAML_REFERENCE.html","name":"Haml"}],"smalltalk":[{"google":"http://www.oldenbuettel.de/squeak-doku","name":"Squeak"}],"java":[{"google":"http://download.oracle.com/javase/6/docs/api/","name":"Java"}],"fortran":[{"google":"h21007.www2.hp.com/portal/download/files/unprot/Fortran/docs/","name":"Fortran"}],"mathematica":[{"search":"http://reference.wolfram.com/search.html?query=%%","name":"Mathematica"}],"perl":[{"home":"http://perldoc.perl.org/","search":"http://perldoc.perl.org/search.html?q=%%","name":"Perl"}],"lua":[{"home":"http://www.lua.org/manual/5.1/manual.html","name":"Lua"}],"html":[{"home":"https://developer.mozilla.org/en/HTML","google":"developer.mozilla.org/en/HTML","name":"HTML"}],"factor":[{"home":"docs.factorcode.org","search":"http://docs.factorcode.org/search?search=%%","name":"Factor"}],"erlang":[{"google":"erlang.org/doc","name":"Erlang"}],"css":[{"home":"https://developer.mozilla.org/en/CSS","google":"developer.mozilla.org/en/CSS","name":"CSS"}],"shell":[{"home":"http://www.gnu.org/software/bash/manual/bashref.html","name":"Bash"}],"python":[{"home":"http://docs.python.org","google":"docs.python.org","hide":[".related",".sphinxsidebar"],"name":"Python","css":".bodywrapper { margin: inherit !important; }"},{"home":"http://docs.djangoproject.com","google":"docs.djangoproject.com","hide":["#header","#billboard"],"name":"Django"}],"js":[{"home":"https://developer.mozilla.org/en/JavaScript","google":"developer.mozilla.org/en/JavaScript","name":"JavaScript"}],"sql":[{"home":"http://www.sqlite.org/lang.html","google":"http://www.sqlite.org/lang.html","name":"SQLite"},{"home":"http://dev.mysql.com/doc/refman/5.6/en/","google":"http://dev.mysql.com/doc/refman/5.6/en/","name":"MySQL"},{"home":"http://www.postgresql.org/docs/9.1/interactive/sql.html","google":"http://www.postgresql.org/docs/9.1/interactive/","name":"PostgreSQL"}],"php":[{"home":"http://www.php.net/manual/en/","search":"http://www.php.net/manual-lookup.php?pattern=%%&lang=en","hide":["#headnav","#headsearch",".manualnavbar.manualnavbar_top","#leftbar"],"name":"PHP","css":"#layout_2 #content, #layout_3 #content { margin-left: 0 !important; }"}],"ruby":[{"home":"http://ruby-doc.org/","google":"ruby-doc.org","name":"Ruby"},{"search":"http://www.google.com/search?q=%%+site%3Aguides.rubyonrails.org+OR+site%3Aapi.rubyonrails.org","name":"Rails"}],"groovy":[{"google":"groovy.codehaus.org/","name":"Groovy"}],"c":[{"google":["pubs.opengroup.org/onlinepubs"],"name":"C"}],"dylan":[{"google":"opendylan.org/books/drm","name":"Dylan"}],"d":[{"google":"d-programming-language.org","name":"D"}],"lisp":[{"google":"lispworks.com/documentation/HyperSpec","name":"Common Lisp"}],"ml":[{"google":"http://www.standardml.org/Basis/","name":"ML Basis"}],"scala":[{"home":"http://www.scala-lang.org/api/current/index.html","search":"http://scalex.org/?q=%%","name":"Scala"}],"ocaml":[{"home":"http://caml.inria.fr/pub/docs/manual-ocaml/","google":"caml.inria.fr/pub/docs/manual-ocaml/","name":"OCaml"}],"c++":[{"google":"cplusplus.com/reference","name":"C++"}],"r":[{"google":"stat.ethz.ch/R-manual/R-patched","name":"R"}],"oz":[{"google":"mozart-oz.org/documentation/","name":"OCaml"}],"tcl":[{"google":"tcl.tk/man/tcl8.6/","name":"Tcl"}],"clojure":[{"google":"clojure.org","name":"Clojure"}],"prolog":[{"google":"http://www.swi-prolog.org/pldoc","name":"SWI Prolog"}]}
{"scheme":[{"home":"http://docs.racket-lang.org/","google":"http://docs.racket-lang.org/reference","name":"Racket"}],"objc":[{"google":"developer.apple.com/library/mac","name":"Objective-C (Mac)"},{"google":"developer.apple.com/library/ios","name":"Objective-C (iOS)"}],"haskell":[{"home":"http://haskell.org/ghc/docs/7.0-latest/html/libraries/index.html","search":"http://www.haskell.org/hoogle/?hoogle=%%","name":"Hoogle"}],"io":[{"home":"http://www.iolanguage.com/scm/io/docs/IoGuide.html","google":"iolanguage.com/scm/io/docs/reference","name":"Io"}],"go":[{"google":"golang.org","name":"Go"}],"haml":[{"home":"haml-lang.com/docs/yardoc/file.HAML_REFERENCE.html","name":"Haml"}],"smalltalk":[{"google":"http://www.oldenbuettel.de/squeak-doku","name":"Squeak"}],"java":[{"google":"http://download.oracle.com/javase/6/docs/api/","name":"Java"}],"arduino":{"home":"http://arduino.cc/en/Reference","google":"arduino.cc/en/Reference","hide":["#ssotoolbar","#pageheader","#pagenav","#pagefooter",".addthis_container"],"name":"Arduino","css":"#page { width: 100% !important; } h2 { margin-top: 17px !important; } p:first-child { display: none; }"},"mathematica":[{"search":"http://reference.wolfram.com/search.html?query=%%","name":"Mathematica"}],"perl":[{"home":"http://perldoc.perl.org/","search":"http://perldoc.perl.org/search.html?q=%%","name":"Perl"}],"lua":[{"home":"http://www.lua.org/manual/5.1/manual.html","name":"Lua"}],"html":[{"home":"https://developer.mozilla.org/en/HTML","google":"developer.mozilla.org/en/HTML","name":"HTML"}],"factor":[{"home":"docs.factorcode.org","search":"http://docs.factorcode.org/search?search=%%","name":"Factor"}],"erlang":[{"google":"erlang.org/doc","name":"Erlang"}],"css":[{"home":"https://developer.mozilla.org/en/CSS","google":"developer.mozilla.org/en/CSS","name":"CSS"}],"oz":[{"google":"mozart-oz.org/documentation/","name":"OCaml"}],"shell":[{"home":"http://www.gnu.org/software/bash/manual/bashref.html","name":"Bash"}],"python":[{"home":"http://docs.python.org","google":"docs.python.org","hide":[".related",".sphinxsidebar"],"name":"Python","css":".bodywrapper { margin: inherit !important; }"},{"home":"http://docs.djangoproject.com","google":"docs.djangoproject.com","hide":["#header","#billboard"],"name":"Django"}],"js":[{"home":"https://developer.mozilla.org/en/JavaScript","google":"developer.mozilla.org/en/JavaScript","name":"JavaScript"}],"sql":[{"home":"http://www.sqlite.org/lang.html","google":"http://www.sqlite.org/lang.html","name":"SQLite"},{"home":"http://dev.mysql.com/doc/refman/5.6/en/","google":"http://dev.mysql.com/doc/refman/5.6/en/","name":"MySQL"},{"home":"http://www.postgresql.org/docs/9.1/interactive/sql.html","google":"http://www.postgresql.org/docs/9.1/interactive/","name":"PostgreSQL"}],"php":[{"home":"http://www.php.net/manual/en/","search":"http://www.php.net/manual-lookup.php?pattern=%%&lang=en","hide":["#headnav","#headsearch",".manualnavbar.manualnavbar_top","#leftbar"],"name":"PHP","css":"#layout_2 #content, #layout_3 #content { margin-left: 0 !important; }"}],"ruby":[{"home":"http://ruby-doc.org/","google":"ruby-doc.org","name":"Ruby"},{"search":"http://www.google.com/search?q=%%+site%3Aguides.rubyonrails.org+OR+site%3Aapi.rubyonrails.org","name":"Rails"}],"groovy":[{"google":"groovy.codehaus.org/","name":"Groovy"}],"c":[{"google":["pubs.opengroup.org/onlinepubs"],"name":"C"}],"dylan":[{"google":"opendylan.org/books/drm","name":"Dylan"}],"d":[{"google":"d-programming-language.org","name":"D"}],"lisp":[{"google":"lispworks.com/documentation/HyperSpec","name":"Common Lisp"}],"ml":[{"google":"http://www.standardml.org/Basis/","name":"ML Basis"}],"scala":[{"home":"http://www.scala-lang.org/api/current/index.html","search":"http://scalex.org/?q=%%","name":"Scala"}],"ocaml":[{"home":"http://caml.inria.fr/pub/docs/manual-ocaml/","google":"caml.inria.fr/pub/docs/manual-ocaml/","name":"OCaml"}],"c++":[{"google":"cplusplus.com/reference","name":"C++"}],"r":[{"google":"stat.ethz.ch/R-manual/R-patched","name":"R"}],"fortran":[{"google":"h21007.www2.hp.com/portal/download/files/unprot/Fortran/docs/","name":"Fortran"}],"tcl":[{"google":"tcl.tk/man/tcl8.6/","name":"Tcl"}],"clojure":[{"google":"clojure.org","name":"Clojure"}],"prolog":[{"google":"http://www.swi-prolog.org/pldoc","name":"SWI Prolog"}]}
3 changes: 3 additions & 0 deletions extended-completions/.gitignore
@@ -0,0 +1,3 @@
topsitehtml
topsitehtml_toobig

7 changes: 5 additions & 2 deletions extended-completions/cssgen.py
Expand Up @@ -47,8 +47,11 @@ def uniq(input):
# print ''

# keyvals = allvals & allkeys

#for v in sorted(list(keyvals)):
# allvals = set()
# for k in d:
# for v in d[k]['values']:
# allvals.add(v)
# for v in sorted(list(allvals)):
# print v


Expand Down
2 changes: 1 addition & 1 deletion extended-completions/html.min.json

Large diffs are not rendered by default.

22 changes: 15 additions & 7 deletions extended-completions/parseattributes.py
Expand Up @@ -9,6 +9,8 @@
reader = csv.reader(d, delimiter="\t")
reader2 = csv.reader(d2, delimiter="\t")

tophtml = json.loads(open("tophtmltags.json", "r").read())

tags = {}
for row in reader:
# print row
Expand Down Expand Up @@ -153,32 +155,38 @@


for t in sorted(list(tags)):
if t in tophtml:
tags[t]['popularity'] = tophtml[t]
else:
tags[t]['popularity'] = 0.0

if 'children' in tags[t]:
# print t
ch = tags[t]['children']
if t not in ['li', 'th', 'blockquote'] and ('flow' in ch or 'flow*' in ch or t in ['colgroup', 'hgroup', 'html', 'head', 'body', 'ol', 'ul', 'dl', 'style', 'select', 'optgroup', 'script', 'table', 'tbody', 'tfoot', 'thead', 'tr', 'div', 'datalist']):
if t not in ['li', 'th', 'blockquote', 'td', 'caption'] and ('flow' in ch or 'flow*' in ch or t in ['colgroup', 'hgroup', 'html', 'head', 'body', 'ol', 'ul', 'dl', 'style', 'select', 'optgroup', 'script', 'table', 'tbody', 'tfoot', 'thead', 'tr', 'th', 'div', 'datalist']):
block_tags.append(t)
tags[t]['kind'] = 'block'
elif 'empty' in ch:
empty_tags.append(t)
tags[t]['kind'] = 'empty'
elif 'phrasing' in ch or 'phrasing*' in ch or 'transparent*' in ch or 'transparent' in ch or t in ['iframe', 'noscript', 'textarea', 'option', 'title']:
elif 'phrasing' in ch or 'phrasing*' in ch or 'transparent*' in ch or 'transparent' in ch or t in ['iframe', 'noscript', 'textarea', 'option', 'title', 'td', 'th', 'blockquote', 'li', 'caption']:
inline_tags.append(t)
tags[t]['kind'] = 'inline'
else:
# print t
# print ' ' + ', '.join(ch)

unknown_tags.append(t)
tags[t]['kind'] = 'unknown'
tags[t]['kind'] = 'inline'

if t in snippets:
tags[t]['snippet'] = snippets[t]

# print empty_tags
# print block_tags
# print inline_tags
# print unknown_tags
#print empty_tags
#print block_tags
#print inline_tags
#print ''
#print unknown_tags

# pprint.pprint(tags)
if isminified:
Expand Down
200 changes: 200 additions & 0 deletions extended-completions/tophtmltags.json
@@ -0,0 +1,200 @@
{
"a": 4688.547442,
"div": 4175.680366,
"li": 3736.905426,
"span": 3117.129253,
"img": 2992.744109,
"script": 2683.061392,
"ul": 2347.367797,
"p": 2171.181506,
"meta": 1846.920736,
"input": 1810.743184,
"link": 1544.851049,
"br": 1471.810789,
"td": 1130.503384,
"strong": 1063.412554,
"option": 1022.997391,
"form": 894.500362,
"tr": 891.519880,
"b": 749.521491,
"html": 716.982053,
"body": 690.564028,
"table": 669.525871,
"head": 668.612898,
"label": 668.361684,
"title": 664.320484,
"dd": 618.570115,
"noscript": 602.454551,
"iframe": 567.498423,
"style": 552.289229,
"em": 530.728318,
"dt": 509.229789,
"dl": 484.681492,
"select": 358.591746,
"i": 327.199535,
"font": 250.641901,
"param": 239.591269,
"button": 231.185757,
"ol": 217.370667,
"area": 210.077840,
"fieldset": 178.704765,
"tbody": 150.835005,
"hr": 138.939927,
"th": 136.664808,
"object": 112.618163,
"map": 104.834802,
"small": 96.959655,
"embed": 94.285534,
"sup": 92.811945,
"center": 87.138017,
"section": 84.792739,
"textarea": 70.291547,
"base": 67.470843,
"legend": 62.663364,
"nav": 56.760235,
"s": 56.272927,
"header": 55.892221,
"cite": 52.534577,
"footer": 43.934610,
"u": 42.382867,
"article": 40.895867,
"nobr": 37.991948,
"abbr": 37.063107,
"del": 37.013583,
"thead": 30.630039,
"aside": 21.342737,
"code": 20.345533,
"blockquote": 19.550978,
"optgroup": 17.658312,
"address": 16.347850,
"noindex": 15.813406,
"col": 14.950262,
"figure": 13.826332,
"time": 12.278579,
"var": 12.052339,
"dfn": 12.014234,
"ins": 10.461502,
"hgroup": 9.862666,
"marquee": 8.841014,
"strike": 7.973844,
"q": 7.878534,
"big": 7.377759,
"mark": 6.854355,
"tfoot": 6.643790,
"caption": 6.356108,
"colgroup": 6.238325,
"tt": 6.234411,
"acronym": 5.950643,
"frame": 5.375278,
"canvas": 5.075174,
"figcaption": 4.564348,
"text": 4.317488,
"stringtable": 4.317488,
"pre": 4.189655,
"samp": 4.127134,
"item": 4.094345,
"flags": 4.094345,
"iconposition": 4.094345,
"answer": 4.094345,
"sohuadcode": 3.828641,
"source": 3.465736,
"frameset": 3.465736,
"url": 3.401197,
"month": 3.178054,
"date": 3.178054,
"event": 3.091042,
"spantemp": 2.944439,
"menu": 2.890372,
"fragmentinstance": 2.833213,
"noframes": 2.772589,
"variant": 2.772589,
"wbr": 2.639057,
"year": 2.564949,
"spacer": 2.484907,
"venue": 2.484907,
"layer": 2.484907,
"number": 2.397895,
"banner": 2.197225,
"name": 2.197225,
"xscript": 2.197225,
"value": 2.197225,
"sitename": 2.197225,
"parameter": 2.197225,
"pageattributes": 2.079442,
"video": 2.079442,
"new": 2.079442,
"top": 2.079442,
"question": 2.079442,
"username": 1.791759,
"target": 1.791759,
"gametitle": 1.609438,
"module": 1.609438,
"datasources": 1.609438,
"parameters": 1.609438,
"datasource": 1.609438,
"blink": 1.609438,
"user": 1.609438,
"blur": 1.386294,
"noembed": 1.386294,
"list": 1.386294,
"ilayer": 1.386294,
"z": 1.386294,
"smoothie": 1.098612,
"artist": 1.098612,
"keeper": 1.098612,
"attribute": 1.098612,
"description": 1.098612,
"bgsound": 0.693147,
"redbullvid": 0.693147,
"scrollingarrow": 0.693147,
"eapid": 0.693147,
"maselecnoticia": 0.693147,
"config": 0.693147,
"captions": 0.693147,
"children": 0.693147,
"userdata": 0.693147,
"maeltosiguiente": 0.693147,
"background": 0.693147,
"incurl": 0.693147,
"h": 0.693147,
"f": 0.693147,
"plugins": 0.693147,
"maincontent": 0.693147,
"transnav": 0.693147,
"pagemap": 0.693147,
"lego": 0.693147,
"timeline": 0.693147,
"webengage": 0.693147,
"xsltransform": 0.693147,
"urls": 0.693147,
"bcolumnabove": 0.693147,
"comment": 0.693147,
"controls": 0.693147,
"guid": 0.693147,
"fjtignoreurl": 0.693147,
"editmode": 0.693147,
"charactercreation": 0.693147,
"maeltoanterior": 0.693147,
"nolayer": 0.693147,
"snbuttons": 0.693147,
"rss": 0.693147,
"tpid": 0.693147,
"addurl": 0.693147,
"n": 0.693147,
"plaintext": 0.693147,
"modules": 0.693147,
"texts": 0.693147,
"dataobject": 0.693147,
"min": 0.693147,
"localnav": 0.693147,
"channel": 0.693147,
"links": 0.693147,
"d": 0.693147,
"preform": 0.693147,
"ssinfo": 0.693147,
"sub": 0.693147,
"m": 0.693147,
"elem": 0.693147,
"audio": 0.693147,
"left": 0.693147
}
17 changes: 17 additions & 0 deletions extended-completions/topsites.py
@@ -0,0 +1,17 @@
import csv, pprint, json, re, urllib2

f = open("topsites.txt", "r")
reader = csv.reader(f, delimiter="\t")

for row in reader:
site = row[1].strip()
print site
try:
content = urllib2.urlopen('http://www.' + site).read()
except:
try:
content = urllib2.urlopen('http://' + site).read()
except Exception as e:
print ' ' + repr(e)
continue
open('topsitehtml/' + site, 'w').write(content)

0 comments on commit c172902

Please sign in to comment.