Browse files

Offline crawler added. Browser is now using it

  • Loading branch information...
1 parent 1222e07 commit 9bb58e294f07075eb899f2e2f8aa01fce5cc362b @jmg committed Oct 16, 2011
Showing with 93 additions and 152 deletions.
  1. +42 −0 .code_swarm/project.config
  2. +44 −0 crawley/crawlers/offline.py
  3. +4 −5 crawley/web_browser/browser.py
  4. +0 −145 crawley/web_browser/browser.ui
  5. +3 −2 setup.py
View
42 .code_swarm/project.config
@@ -0,0 +1,42 @@
+# This is a sample configuration file for code_swarm
+
+# Input file
+InputFile=log.xml
+
+# Color assignment rules
+# Keep in order, do not skip numbers. Numbers start
+# at 1.
+#
+# Pattern: "Label", "regex", R,G,B, R,G,B
+# Label is optional. If it is omitted, the regex
+# will be used.
+#
+ColorAssign1="Tests",".*test.*", 90,225,90, 110,200,90
+ColorAssign2="Localizations","(.*(\.mo|\.po))|(.*\.lproj.*)", 90,225,225, 110,200,200
+ColorAssign3="Source Code",".*(\.py|\.rb|\.erb|\.hs|\.sql|\.c|\.cpp|\.h|\.m|\.d|\.js|\.pl|\.sh|\.java|\.lhs|\.hi)", 225,90,90, 200,90,110
+ColorAssign4="Documents/Images",".*(\.txt|\.html|\.tex|\.tmpl|\.css|\.xml|\.yml|\.json|\.png|\.jpg|\.gif|\.jpeg|README|COPYING|LICENSE|AUTHORS)", 90,90,225, 90,110,200
+
+
+# Save each frame to an image?
+TakeSnapshots=false
+
+# Where to save each frame
+SnapshotLocation=code_swarm_frames/#####.png
+
+
+
+#Is the input xml sorted by date? It's faster and uses much less memory if it is
+IsInputSorted=true
+# - All of the given scripts for producing repository xml files produce sorted data
+# - Please file a bug if one doesn't
+
+
+# Uncomment to not use avatars
+#AvatarFetcher=NoAvatar
+
+# To use local avatars, uncomment this line:
+#AvatarFetcher=LocalAvatar
+
+# then place png files named after the usernames of committers in
+# the data/local_avatars directory
+# data/local_avatars/default.png is used by default
View
44 crawley/crawlers/offline.py
@@ -0,0 +1,44 @@
+from base import BaseCrawler
+from lxml import etree
+from crawley.extractors import XPathExtractor
+from StringIO import StringIO
+from lxml import etree
+
+class OffLineCrawler(BaseCrawler):
+
+ def __init__(self, *args, **kwargs):
+
+ BaseCrawler.__init__(self, *args, **kwargs)
+
+ def _get_data(self, url, data=None):
+
+ data = BaseCrawler._get_data(self, url, data)
+
+ fixer = HTMLFixer(self._url_regex, url, data)
+
+ html = fixer.get_fixed_html()
+ return html
+
+
+class HTMLFixer(object):
+
+ def __init__(self, url_regex, url, html):
+
+ self._url_regex = url_regex
+ self.url = url
+ self.html_tree = XPathExtractor().get_object(html)
+
+ def get_fixed_html(self):
+
+ self._fix_tags("link", "href")
+ self._fix_tags("img", "src")
+
+ return etree.tostring(self.html_tree.getroot(), pretty_print=True, method="html")
+
+ def _fix_tags(self, tag, attrib):
+
+ tags = self.html_tree.xpath("//%s" % tag)
+
+ for tag in tags:
+ if not self._url_regex.match(tag.attrib[attrib]):
+ tag.attrib[attrib] = "%s/%s" % (self.url, tag.attrib[attrib])
View
9 crawley/web_browser/browser.py
@@ -3,7 +3,7 @@
from PyQt4 import QtCore, QtWebKit
from baseBrowser import BaseBrowser, BaseBrowserTab
from config import DEFAULTS, SELECTED_CLASS
-from crawley.crawlers.fast import FastCrawler
+from crawley.crawlers.offline import OffLineCrawler
from crawley.extractors import PyQueryExtractor
from crawley.manager.commands.startproject import StartProjectCommand
@@ -79,7 +79,7 @@ class BrowserTab(BaseBrowserTab):
def __init__(self, parent):
BaseBrowserTab.__init__(self, parent)
self.url = None
- self.crawler = FastCrawler()
+ self.crawler = OffLineCrawler()
def load_bar(self, value):
""" Load the progress bar """
@@ -100,7 +100,7 @@ def load_url(self, url):
""" Load the requested url in the webwiew """
self.url = str(url)
- html = self.crawler._get_data(self.url)
+ html = self.crawler._get_data(self.url)
with open(get_full_template_path("html_template"), "r") as f:
template = f.read()
@@ -156,8 +156,7 @@ def generate(self):
f.write(stream.replace("<br/>", "\r\n"))
os.sys.path.insert(0, project_name)
-
- #self.html.setHtml(stream)
+
self.html.show()
def run(self):
View
145 crawley/web_browser/browser.ui
@@ -1,145 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<ui version="4.0">
- <class>MainWindow</class>
- <widget class="QMainWindow" name="MainWindow">
- <property name="geometry">
- <rect>
- <x>0</x>
- <y>0</y>
- <width>859</width>
- <height>623</height>
- </rect>
- </property>
- <property name="sizePolicy">
- <sizepolicy hsizetype="Expanding" vsizetype="Expanding">
- <horstretch>0</horstretch>
- <verstretch>0</verstretch>
- </sizepolicy>
- </property>
- <property name="windowTitle">
- <string>SimpleWebBrowser</string>
- </property>
- <widget class="QWidget" name="centralwidget">
- <property name="font">
- <font>
- <family>Ubuntu</family>
- <weight>50</weight>
- <italic>false</italic>
- <bold>false</bold>
- </font>
- </property>
- <property name="autoFillBackground">
- <bool>true</bool>
- </property>
- <layout class="QHBoxLayout" name="horizontalLayout_3">
- <property name="spacing">
- <number>0</number>
- </property>
- <property name="margin">
- <number>1</number>
- </property>
- <item>
- <widget class="QFrame" name="frame">
- <property name="frameShape">
- <enum>QFrame::StyledPanel</enum>
- </property>
- <property name="frameShadow">
- <enum>QFrame::Raised</enum>
- </property>
- <layout class="QGridLayout" name="gridLayout">
- <property name="margin">
- <number>0</number>
- </property>
- <property name="spacing">
- <number>0</number>
- </property>
- <item row="1" column="0">
- <layout class="QHBoxLayout" name="horizontalLayout_5">
- <property name="spacing">
- <number>0</number>
- </property>
- <item>
- <widget class="QPushButton" name="bt_back">
- <property name="text">
- <string/>
- </property>
- <property name="shortcut">
- <string>Alt+Left</string>
- </property>
- </widget>
- </item>
- <item>
- <widget class="QPushButton" name="bt_ahead">
- <property name="text">
- <string/>
- </property>
- <property name="shortcut">
- <string>Alt+Right</string>
- </property>
- </widget>
- </item>
- <item>
- <widget class="QPushButton" name="bt_reload">
- <property name="text">
- <string/>
- </property>
- <property name="shortcut">
- <string>F5</string>
- </property>
- </widget>
- </item>
- <item>
- <widget class="QPushButton" name="bt_generate">
- <property name="text">
- <string>Generate Template</string>
- </property>
- </widget>
- </item>
- <item>
- <widget class="QLineEdit" name="tb_url">
- <property name="sizePolicy">
- <sizepolicy hsizetype="Expanding" vsizetype="Preferred">
- <horstretch>0</horstretch>
- <verstretch>0</verstretch>
- </sizepolicy>
- </property>
- <property name="sizeIncrement">
- <size>
- <width>0</width>
- <height>0</height>
- </size>
- </property>
- </widget>
- </item>
- </layout>
- </item>
- <item row="5" column="0">
- <widget class="QTabWidget" name="tab_pages">
- <property name="currentIndex">
- <number>-1</number>
- </property>
- <property name="tabsClosable">
- <bool>true</bool>
- </property>
- <property name="movable">
- <bool>true</bool>
- </property>
- </widget>
- </item>
- </layout>
- </widget>
- </item>
- </layout>
- </widget>
- <action name="actionReload">
- <property name="text">
- <string>reload</string>
- </property>
- <property name="shortcut">
- <string>F5</string>
- </property>
- </action>
- </widget>
- <resources/>
- <connections/>
-</ui>
View
5 setup.py
@@ -4,8 +4,9 @@
import os
PATH = os.path.dirname(os.path.abspath(__file__))
-templates_dir = os.path.join(PATH, "crawley", "conf", "templates")
-templates_files = [os.path.join(templates_dir, file) for file in os.listdir(templates_dir)]
+templates_dir = os.path.join("crawley", "conf", "templates")
+templates_local_dir = os.path.join(PATH, templates_dir)
+templates_files = [os.path.join(templates_dir, file) for file in os.listdir(templates_local_dir)]
setup(
name="crawley",

0 comments on commit 9bb58e2

Please sign in to comment.