In [24]:
import sys
import os.path
sys.path.append(os.path.join(os.pardir, os.pardir, 'common'))

In [25]:
import urllib

from evaluate import load_detected_regions, load_groundtruth_regions, save_results, print_stats, RegionInspector
from open import build_local_url
from order import get_test_lists

# Data Loading and Preparation

In [36]:
DETECTED_FILE = os.path.join('regions', 'detected', 'jquery.tsv')
TRUTH_FILE = os.path.join('regions', 'extracted', 'test_longer.txt')

For now, we limit ourselves to considering only the first 50 cross-validation results.  Eventually we should be able to boost this number up to around 100.

In [37]:
VALIDATION_SIZE = 50

In [38]:
wget_pages = get_test_lists('jquery')['validation'][:VALIDATION_SIZE]
validation_urls = [urllib.quote(build_local_url(p), safe='/:') for p in wget_pages]

Load in the detections from the stored TSV files

In [39]:
detected_regions = load_detected_regions(DETECTED_FILE, validation_urls)

In [40]:
truth_regions = load_groundtruth_regions(TRUTH_FILE, delimiter=',,,', valid_urls=validation_urls)

## Verify that the regions have been properly loaded

In [41]:
detected_urls = set([r.url for r in detected_regions])
truth_urls = set(key[0] for key in truth_regions.keys())

In [42]:
print len(detected_urls), len(truth_urls)

23 48


In [43]:
print detected_regions[0]

<evaluate.Region object at 0x106b6e250>


In [44]:
print truth_regions.values()[0][0]

<evaluate.Region object at 0x1072c8510>


# Compute Accuracy!

In [45]:
true_regions, false_regions, missing_regions = print_stats(detected_regions, truth_regions)

Precision: 0.8025 (191/238), Recall 0.4099 (191/466)
Per-page Precision: 0.7693, Recall 0.3721


## Save Results for posterity

In [46]:
history_dir = os.path.join('regions', 'detected', 'history')

In [47]:
save_results(history_dir, false_regions, missing_regions)

## Examine Detection Faults

In [15]:
inspector = RegionInspector()

In [14]:
inspector.open_missing_detections(missing_regions)


?? Open next example of missed detection? 


In [16]:
inspector.open_false_detections(false_regions)


?? Open next example of missed detection? 
 
===== REGION 0 =====
{Text: iframe URL: http://127.0.0.1:8000/pages/jquery/iframe%20javascript%20jquery%20tutorial/6/www.startutorial.com/articles/view/jquery-file-posting-using-iframe.html, Element: HTML > BODY:nth-of-type(1) > DIV:nth-of-type(2) > DIV:nth-of-type(1) > DIV:nth-of-type(1) > DIV:nth-of-type(2) > PRE:nth-of-type(2), Offsets: (258, 263)}
* Text:  iframe
* HTML:  <pre class="prettyprint">&lt;header&gt;

***other codes ****


&lt;!-- document javascripts --&gt;	 
&lt;script type="text/javascript"&gt;
$(document).ready(function () {
  $('#submitButton').click(function(){
     if($('iframe[name=iframeTarget]').length&lt;1){
		    var iframe=document.createElement('iframe');
			$(iframe).css('display','none');
			$(iframe).attr('src','#');
				
			$(iframe).attr('name','iframeTarget');
			$('body').append(iframe);
				
			$(this).attr('target','iframeTarget');
			}          
     });
    });
&lt;/script&gt;



***other codes ****

