In [1]:
import sys
import os.path
sys.path.append(os.path.join(os.pardir, os.pardir, 'common'))

In [2]:
import urllib

from evaluate import load_detected_regions, load_groundtruth_regions, save_results, print_stats, RegionInspector
from open import build_local_url
from order import get_test_lists

# Data Loading and Preparation

In [3]:
DETECTED_FILE = os.path.join('regions', 'detected', 'jquery.tsv')
TRUTH_FILE = os.path.join('regions', 'extracted', 'jquery_validation_50.txt')

For now, we limit ourselves to considering only the first 50 cross-validation results.  Eventually we should be able to boost this number up to around 100.

In [4]:
VALIDATION_SIZE = 50

In [5]:
wget_pages = get_test_lists('jquery')['validation'][:VALIDATION_SIZE]
validation_urls = [urllib.quote(build_local_url(p), safe='/:') for p in wget_pages]

Load in the detections from the stored TSV files

In [6]:
detected_regions = load_detected_regions(DETECTED_FILE, validation_urls)

In [7]:
truth_regions = load_groundtruth_regions(TRUTH_FILE, delimiter=',,,', valid_urls=validation_urls)

## Verify that the regions have been properly loaded

In [8]:
detected_urls = set([r.url for r in detected_regions])
truth_urls = set(key[0] for key in truth_regions.keys())

In [9]:
print len(detected_urls), len(truth_urls)

23 45


In [10]:
print detected_regions[0]

<evaluate.Region object at 0x10dc61f10>


In [11]:
print truth_regions.values()[0][0]

<evaluate.Region object at 0x10ded6d10>


# Compute Accuracy!

In [16]:
true_regions, false_regions, missing_regions = print_stats(detected_regions, truth_regions)

Precision: 0.8025 (191/238), Recall 0.4526 (191/422)
Per-page Precision: 0.7693, Recall 0.3969


## Save Results for posterity

In [14]:
history_dir = os.path.join('regions', 'detected', 'history')

In [15]:
save_results(history_dir, false_regions, missing_regions)

## Examine Detection Faults

In [13]:
inspector = RegionInspector()

In [14]:
inspector.open_missing_detections(missing_regions)


?? Open next example of missed detection? 


In [15]:
inspector.open_false_detections(false_regions)


?? Open next example of missed detection? 
 
===== REGION 0 =====
{Text: iframe URL: http://127.0.0.1:8000/pages/jquery/iframe%20javascript%20jquery%20tutorial/6/www.startutorial.com/articles/view/jquery-file-posting-using-iframe.html, Element: HTML > BODY:nth-of-type(1) > DIV:nth-of-type(2) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(2) > PRE:nth-of-type(2), Offsets: (258, 263)}
* Text:  iframe
* HTML:  <pre class="prettyprint">&lt;header&gt;

***other codes ****


&lt;!-- document javascripts --&gt;	 
&lt;script type="text/javascript"&gt;
$(document).ready(function () {
  $('#submitButton').click(function(){
     if($('iframe[name=iframeTarget]').length&lt;1){
		    var iframe=document.createElement('iframe');
			$(iframe).css('display','none');
			$(iframe).attr('src','#');
				
			$(iframe).attr('name','iframeTarget');
			$('body').append(iframe);
				
			$(this).attr('target','iframeTarget');
			}          
     });
    });
&lt;/script&gt;



***other codes ****



WebDriverException: Message: Argument 1 of Range.selectNode is not an object.
Stacktrace:
    at anonymous (http://127.0.0.1:8000/pages/jquery/css%20jquery%20jquery-ui%20tutorial/8/code.tutsplus.com/tutorials/a-massive-guide-to-custom-theming-jquery-ui-widgets--net-22714.html line 69 > Function:3)
    at handleEvaluateEvent (http://127.0.0.1:8000/pages/jquery/css%20jquery%20jquery-ui%20tutorial/8/code.tutsplus.com/tutorials/a-massive-guide-to-custom-theming-jquery-ui-widgets--net-22714.html:69)
    at nrWrapper (http://127.0.0.1:8000/pages/jquery/css%20jquery%20jquery-ui%20tutorial/8/code.tutsplus.com/tutorials/a-massive-guide-to-custom-theming-jquery-ui-widgets--net-22714.html:3)

In [19]:
inspector.open_false_detections(false_regions, start_index=4)


?? Open next example of missed detection? 
 
===== REGION 4 =====
{Text: #tabs-2 URL: http://127.0.0.1:8000/pages/jquery/css%20jquery%20jquery-ui%20tutorial/8/code.tutsplus.com/tutorials/a-massive-guide-to-custom-theming-jquery-ui-widgets--net-22714.html, Element: HTML > BODY:nth-of-type(1) > DIV:nth-of-type(2) > MAIN:nth-of-type(1) > ARTICLE:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(2) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(6) > DIV:nth-of-type(1) > TABLE:nth-of-type(1) > TBODY:nth-of-type(1) > TR:nth-of-type(1) > TD:nth-of-type(2) > DIV:nth-of-type(1) > DIV:nth-of-type(10) > CODE:nth-of-type(8), Offsets: (1, 7)}
* Text:  #tabs-2
* HTML:  <code class="html string">"#tabs-2"</code>


===== REGION 5 =====
{Text: #tabs-3 URL: http://127.0.0.1:8000/pages/jquery/css%20jquery%20jquery-ui%20tutorial/8/code.tutsplus.com/tutorials/a-massive-guide-to-custom-theming-jquery-ui-widgets--net-22714.html, Element: HTML > BODY:nth-of-type(1) > DIV:nth-of-type(2) > MAI

WebDriverException: Message: Argument 1 of Range.selectNode is not an object.
Stacktrace:
    at anonymous (http://127.0.0.1:8000/pages/jquery/jquery%20php%20wordpress%20tutorial/1/www.inkthemes.com/how-to-implement-custom-jquery-and-css-in-wordpress-plugin/ line 69 > Function:3)
    at handleEvaluateEvent (http://127.0.0.1:8000/pages/jquery/jquery%20php%20wordpress%20tutorial/1/www.inkthemes.com/how-to-implement-custom-jquery-and-css-in-wordpress-plugin/:69)

In [20]:
inspector.open_false_detections(false_regions, start_index=36)


?? Open next example of missed detection? 
 
===== REGION 36 =====
{Text: style URL: http://127.0.0.1:8000/pages/jquery/jquery%20php%20wordpress%20tutorial/1/www.inkthemes.com/how-to-implement-custom-jquery-and-css-in-wordpress-plugin/, Element: HTML > BODY:nth-of-type(1) > DIV:nth-of-type(4) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(2) > DIV:nth-of-type(1) > DIV:nth-of-type(6) > PRE:nth-of-type(1) > CODE:nth-of-type(1), Offsets: (216, 220)}


WebDriverException: Message: Argument 1 of Range.selectNode is not an object.
Stacktrace:
    at anonymous (http://127.0.0.1:8000/pages/jquery/jquery%20php%20wordpress%20tutorial/1/www.inkthemes.com/how-to-implement-custom-jquery-and-css-in-wordpress-plugin/ line 69 > Function:3)
    at handleEvaluateEvent (http://127.0.0.1:8000/pages/jquery/jquery%20php%20wordpress%20tutorial/1/www.inkthemes.com/how-to-implement-custom-jquery-and-css-in-wordpress-plugin/:69)

In [21]:
inspector.open_false_detections(false_regions, start_index=37)


?? Open next example of missed detection? 
 
===== REGION 37 =====
{Text: script URL: http://127.0.0.1:8000/pages/jquery/jquery%20php%20wordpress%20tutorial/1/www.inkthemes.com/how-to-implement-custom-jquery-and-css-in-wordpress-plugin/, Element: HTML > BODY:nth-of-type(1) > DIV:nth-of-type(4) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(2) > DIV:nth-of-type(1) > DIV:nth-of-type(6) > PRE:nth-of-type(1) > CODE:nth-of-type(1), Offsets: (290, 295)}


WebDriverException: Message: Argument 1 of Range.selectNode is not an object.
Stacktrace:
    at anonymous (http://127.0.0.1:8000/pages/jquery/jquery%20php%20wordpress%20tutorial/1/www.inkthemes.com/how-to-implement-custom-jquery-and-css-in-wordpress-plugin/ line 69 > Function:3)
    at handleEvaluateEvent (http://127.0.0.1:8000/pages/jquery/jquery%20php%20wordpress%20tutorial/1/www.inkthemes.com/how-to-implement-custom-jquery-and-css-in-wordpress-plugin/:69)

In [22]:
inspector.open_false_detections(false_regions, start_index=38)


?? Open next example of missed detection? 
 
===== REGION 38 =====
{Text: #jp_demo URL: http://127.0.0.1:8000/pages/jquery/jquery%20php%20wordpress%20tutorial/1/www.inkthemes.com/how-to-implement-custom-jquery-and-css-in-wordpress-plugin/, Element: HTML > BODY:nth-of-type(1) > DIV:nth-of-type(4) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(2) > DIV:nth-of-type(1) > DIV:nth-of-type(8) > PRE:nth-of-type(1) > CODE:nth-of-type(1), Offsets: (46, 53)}


WebDriverException: Message: Argument 1 of Range.selectNode is not an object.
Stacktrace:
    at anonymous (http://127.0.0.1:8000/pages/jquery/jquery%20php%20wordpress%20tutorial/1/www.inkthemes.com/how-to-implement-custom-jquery-and-css-in-wordpress-plugin/ line 69 > Function:3)
    at handleEvaluateEvent (http://127.0.0.1:8000/pages/jquery/jquery%20php%20wordpress%20tutorial/1/www.inkthemes.com/how-to-implement-custom-jquery-and-css-in-wordpress-plugin/:69)

In [23]:
inspector.open_false_detections(false_regions, start_index=39)


?? Open next example of missed detection? 
 
===== REGION 39 =====
{Text: .popup_window URL: http://127.0.0.1:8000/pages/jquery/jquery%20php%20wordpress%20tutorial/1/www.inkthemes.com/how-to-implement-custom-jquery-and-css-in-wordpress-plugin/, Element: HTML > BODY:nth-of-type(1) > DIV:nth-of-type(4) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(2) > DIV:nth-of-type(1) > DIV:nth-of-type(8) > PRE:nth-of-type(1) > CODE:nth-of-type(1), Offsets: (83, 95)}


WebDriverException: Message: Argument 1 of Range.selectNode is not an object.
Stacktrace:
    at anonymous (http://127.0.0.1:8000/pages/jquery/jquery%20php%20wordpress%20tutorial/1/www.inkthemes.com/how-to-implement-custom-jquery-and-css-in-wordpress-plugin/ line 69 > Function:3)
    at handleEvaluateEvent (http://127.0.0.1:8000/pages/jquery/jquery%20php%20wordpress%20tutorial/1/www.inkthemes.com/how-to-implement-custom-jquery-and-css-in-wordpress-plugin/:69)

In [24]:
inspector.open_false_detections(false_regions, start_index=40)


?? Open next example of missed detection? 
 
===== REGION 40 =====
{Text: title URL: http://127.0.0.1:8000/pages/jquery/javascript%20jquery%20json%20tutorial/1/iviewsource.com/codingtutorials/getting-started-with-javascript-object-notation-json-for-absolute-beginners/, Element: HTML > BODY:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(2) > OL:nth-of-type(1) > LI:nth-of-type(20) > ARTICLE:nth-of-type(1) > SECTION:nth-of-type(1) > PRE:nth-of-type(1) > CODE:nth-of-type(1), Offsets: (42, 46)}
* Text:  title
* HTML:  <code class="objectivec">    var mTitle== document<span class="variable">.getElementById</span>(<span class="string">"title"</span>)<span class="variable">.value</span>;
    var mWriter = document<span class="variable">.getElementById</span>(<span class="string">"writer"</span>)<span class="variable">.value</span>;
    var mArticle = document<span class="variable">.getElementById</span>(<span class="string"