In [1]:
import sys
import os.path
sys.path.append(os.path.join(os.pardir, os.pardir, 'common'))

In [2]:
import urllib

from evaluate import load_detected_regions, load_groundtruth_regions, save_results, print_stats, RegionInspector
from open import build_local_url
from order import get_test_lists

# Data Loading and Preparation

In [3]:
DETECTED_FILE = os.path.join('regions', 'detected', 'jquery.tsv')
TRUTH_FILE = os.path.join('regions', 'extracted', 'jquery_validation_44.txt')

For now, we limit ourselves to considering only the first 50 cross-validation results.  Eventually we should be able to boost this number up to around 100.

In [4]:
VALIDATION_SIZE = 45

In [5]:
wget_pages = get_test_lists('jquery')['validation'][:VALIDATION_SIZE]
validation_urls = [urllib.quote(build_local_url(p), safe='/:') for p in wget_pages]

Load in the detections from the stored TSV files

In [6]:
detected_regions = load_detected_regions(DETECTED_FILE, validation_urls)

In [7]:
truth_regions = load_groundtruth_regions(TRUTH_FILE, valid_urls=validation_urls)

## Verify that the regions have been properly loaded

In [8]:
detected_urls = set([r.url for r in detected_regions])
truth_urls = set(key[0] for key in truth_regions.keys())

In [9]:
print len(detected_urls), len(truth_urls)

23 43


In [10]:
print detected_regions[0]

{Text: #submitButton URL: http://127.0.0.1:8000/pages/jquery/iframe%20javascript%20jquery%20tutorial/6/www.startutorial.com/articles/view/jquery-file-posting-using-iframe.html, Element: HTML > BODY:nth-of-type(1) > DIV:nth-of-type(2) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(2) > PRE:nth-of-type(2), Offsets: (133, 145)}


In [11]:
print truth_regions.values()[0][0]

{Text: None URL: http://127.0.0.1:8000/pages/jquery/javascript%20jquery%20jquery-plugins%20tutorial/5/www.smashingmagazine.com/2011/10/essential-jquery-plugin-patterns/, Element: HTML > BODY:nth-of-type(1) > MAIN:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(3) > ARTICLE:nth-of-type(1) > PRE:nth-of-type(13) > CODE:nth-of-type(1) > SPAN:nth-of-type(14), Offsets: (1, 7)}


# Compute Accuracy!

In [12]:
true_regions, false_regions, missing_regions = print_stats(detected_regions, truth_regions)

Precision: 0.6891 (164/238), Recall 0.4781 (164/343)
Per-page Precision: 0.6173, Recall 0.4087


## Save Results for posterity

In [13]:
history_dir = os.path.join('regions', 'detected', 'history')

In [14]:
save_results(history_dir, false_regions, missing_regions)

## Examine Detection Faults

In [15]:
inspector = RegionInspector()

In [17]:
inspector.open_missing_detections(missing_regions)


?? Open next example of missed detection? 
 
===== REGION 0 =====
{Text: None URL: http://127.0.0.1:8000/pages/jquery/javascript%20jquery%20jquery-plugins%20tutorial/5/www.smashingmagazine.com/2011/10/essential-jquery-plugin-patterns/, Element: HTML > BODY:nth-of-type(1) > MAIN:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(3) > ARTICLE:nth-of-type(1) > PRE:nth-of-type(13) > CODE:nth-of-type(1) > SPAN:nth-of-type(14), Offsets: (1, 7)}
* Text:  .item-b
* HTML:  <span class="token string">'.item-b'</span>


NoSuchWindowException: Message: Window not found. The browser window may have been closed.
Stacktrace:
    at nsCommandProcessor.prototype.execute (file:///var/folders/05/6w_fy5m96dj6w_wdd89v0y0m0000gp/T/tmpHxH4qH/extensions/fxdriver@googlecode.com/components/command-processor.js:12711)
    at Dispatcher.executeAs/< (file:///var/folders/05/6w_fy5m96dj6w_wdd89v0y0m0000gp/T/tmpHxH4qH/extensions/fxdriver@googlecode.com/components/driver-component.js:9456)
    at Resource.prototype.handle (file:///var/folders/05/6w_fy5m96dj6w_wdd89v0y0m0000gp/T/tmpHxH4qH/extensions/fxdriver@googlecode.com/components/driver-component.js:9603)
    at Dispatcher.prototype.dispatch (file:///var/folders/05/6w_fy5m96dj6w_wdd89v0y0m0000gp/T/tmpHxH4qH/extensions/fxdriver@googlecode.com/components/driver-component.js:9550)
    at WebDriverServer/<.handle (file:///var/folders/05/6w_fy5m96dj6w_wdd89v0y0m0000gp/T/tmpHxH4qH/extensions/fxdriver@googlecode.com/components/driver-component.js:12497)
    at createHandlerFunc/< (file:///var/folders/05/6w_fy5m96dj6w_wdd89v0y0m0000gp/T/tmpHxH4qH/extensions/fxdriver@googlecode.com/components/httpd.js:2054)
    at ServerHandler.prototype.handleResponse (file:///var/folders/05/6w_fy5m96dj6w_wdd89v0y0m0000gp/T/tmpHxH4qH/extensions/fxdriver@googlecode.com/components/httpd.js:2387)
    at Connection.prototype.process (file:///var/folders/05/6w_fy5m96dj6w_wdd89v0y0m0000gp/T/tmpHxH4qH/extensions/fxdriver@googlecode.com/components/httpd.js:1223)
    at RequestReader.prototype._handleResponse (file:///var/folders/05/6w_fy5m96dj6w_wdd89v0y0m0000gp/T/tmpHxH4qH/extensions/fxdriver@googlecode.com/components/httpd.js:1677)
    at RequestReader.prototype._processBody (file:///var/folders/05/6w_fy5m96dj6w_wdd89v0y0m0000gp/T/tmpHxH4qH/extensions/fxdriver@googlecode.com/components/httpd.js:1525)
    at RequestReader.prototype.onInputStreamReady (file:///var/folders/05/6w_fy5m96dj6w_wdd89v0y0m0000gp/T/tmpHxH4qH/extensions/fxdriver@googlecode.com/components/httpd.js:1393)

In [None]:
inspector.open_false_detections(false_regions)