Skip to content

Commit

Permalink
inital commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Hannes Fuchs committed Jan 21, 2015
0 parents commit c3583fc
Show file tree
Hide file tree
Showing 15 changed files with 1,162 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
@@ -0,0 +1 @@
*.swp
3 changes: 3 additions & 0 deletions crawler/.gitignore
@@ -0,0 +1,3 @@
*.pyc
*.log
testare
18 changes: 18 additions & 0 deletions crawler/config-sample.cfg
@@ -0,0 +1,18 @@
[pgsql]
host = localhost
db = test
user = test
pass = test

[proxy]
host = localhost
port = 8118

[logging]
level = INFO
file = wansview.log

[daemon]
queue_size = 10
worker_threads = 5
pid = wansview.pid
116 changes: 116 additions & 0 deletions crawler/lib/DBHelper.py
@@ -0,0 +1,116 @@
#/bin/env python
import logging
import pgdb
import threading

"""
DBHelper Class
"""
class DBHelper():
def __init__(self, config):
self.lock = threading.Lock()
self.lock.acquire()
self.con = pgdb.connect(
dsn=config.get('pgsql', 'host') + ':' + config.get('pgsql', 'db'),
user=config.get('pgsql', 'user'),
password=config.get('pgsql', 'pass')
)
self.cur = self.con.cursor()
self.lock.release()

def get_random_ua(self):
self.lock.acquire()
self.cur.execute("SELECT ua FROM uas ORDER BY RANDOM() LIMIT 1")
r = self.cur.fetchone()[0]
self.lock.release()
return r

def get_credentials(self):
self.lock.acquire()
self.cur.execute("""SELECT
u.username,
u.priority as uprio,
p.password,
p.priority as pprio,
u.priority + p.priority as prio
FROM
usernames u,
passwords p
ORDER BY prio, RANDOM()""")
r = self.cur.fetchall()
self.lock.release()
return r


def update_status(self, host, status):
self.lock.acquire()
self.cur.execute("""UPDATE
ip_cam_hosts
SET
status = '%s',
updated = NOW()
WHERE
hostname = '%s'""" % (status, host))
self.con.commit()
self.lock.release()

def save_image(self, host, username, password, image_data, country):
self.lock.acquire()
self.cur.execute("""SELECT
count(hostname)
FROM
ip_cam_images
WHERE
hostname = '%s'""" % host)
if int(self.cur.fetchone()[0]) == 0:
self.cur.execute("""INSERT INTO
ip_cam_images (hostname, username, password, image, country)
VALUES
('%s', '%s', '%s', '%s', '%s')""" % (host, username, password, pgdb.escape_bytea(image_data), country))
else:
self.cur.execute("""UPDATE
ip_cam_images
SET
username = '%s',
password = '%s',
image = '%s',
country = '%s'
WHERE
hostname = '%s'""" % (username, password, pgdb.escape_bytea(image_data), country, host))

self.cur.execute("""UPDATE
ip_cam_hosts
SET
updated = NOW()
WHERE
hostname = '%s'""" % host)
self.con.commit()
self.lock.release()


def get_unchecked_hosts(self, count):
self.lock.acquire()
self.cur.execute("""SELECT
hostname
FROM
ip_cam_hosts
WHERE
status = 'unchecked'
GROUP BY
hostname
ORDER BY RANDOM() LIMIT %i""" % count)
r = self.cur.fetchall()
self.lock.release()
return r

def get_unchecked_host_count(self):
self.lock.acquire()
self.cur.execute("SELECT count(*) FROM ip_cam_hosts WHERE status = 'unchecked'")
r = self.cur.fetchone()[0]
self.lock.release()
return r

def close(self):
self.cur.close()
self.con.close()
# vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
114 changes: 114 additions & 0 deletions crawler/lib/HTTPClient.py
@@ -0,0 +1,114 @@
#/bin/env python
import logging
import urllib2
import base64
import re

"""
HTTPClient Class
"""
class HTTPClient():
def __init__(self, config, url=None, ua=None, auth=None):
self.proxy_host = config.get('proxy', 'host')
self.proxy_port = config.get('proxy', 'port')
self.url = url
self.ua = ua
self.auth = auth

self.response = None
self.redirectc = 0
self.r_code = None

self.ip_pattern = re.compile(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}')

self.logger = logging.getLogger(self.__class__.__name__)

proxy = urllib2.ProxyHandler(
{
'http': self.proxy_host + ':' + self.proxy_port,
'https': self.proxy_host + ':' + self.proxy_port
})
opener = urllib2.build_opener(proxy)
urllib2.install_opener(opener)

if self.url is not None:
self.openurl()

def openurl(self):
# stop on to many redirects
if self.redirectc >= 5:
self.logger.warn('Too many redirects!')
return None

request = urllib2.Request(self.url)
self.logger.debug('Requesting: %s' % self.url)

if self.ua is not None:
self.logger.debug('Using UA: %s' % self.ua)
request.add_header('User-agent', self.ua)

if self.auth is not None:
credentials = '%s:%s' % self.auth
base64string = base64.encodestring('%s' % (credentials)).replace('\n', '')
self.logger.debug('Using Authdata: %s' % credentials)
request.add_header("Authorization", "Basic %s" % base64string)

try:
self.response = urllib2.urlopen(request)
self.r_code = self.response.getcode()
if self.response.getcode() == 200:
if self.url != self.response.geturl():
self.url = self.response.geturl()
if self.redirectc > 0:
self.redirectc = 0
return self.response
elif self.response.getcode() == 301:
self.logger.debug('Got an redirect to %s folowing ...' % self.response.geturl())
self.redirectc += 1
self.url = self.response.geturl()
self.openurl()
else:
self.logger.error('Unexceptet return code: %i' % self.response.getcode())
return None
except urllib2.HTTPError as e:
self.r_code = e.code
if self.r_code == 401:
self.logger.debug('Authentication failed: %s' % e)
elif self.r_code == 503:
self.logger.debug('Forwarding failed: %s' % e)
elif self.r_code == 404:
self.logger.debug('Not found %s' % e)
else:
self.logger.error('Unknown HTTPError: %s' % e)
except:
self.logger.error('Unexpected ERROR!')
return None

def set_url(self, url):
self.url = url

def get_url(self):
return self.url

def append_url(self, s):
self.url += s

def set_ua(self, ua):
self.ua = ua

def set_auth(self, auth):
self.auth = auth

def get_response(self):
return self.response

def get_r_code(self):
return self.r_code

def get_ip(self):
if self.url is not None and self.ip_pattern.search(self.url):
return self.ip_pattern.search(self.url).group(0)
return None


# vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
48 changes: 48 additions & 0 deletions crawler/lib/SocketThread.py
@@ -0,0 +1,48 @@
#/bin/env python
import logging
import socket
import threading

"""
SocketThread Class
"""
class SocketThread(threading.Thread):
def __init__(self, bind_host, bind_port):
threading.Thread.__init__(self)
self.logger = logging.getLogger(self.__class__.__name__)
self.s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

self.logger.debug('Binding to %s:%i' % (bind_host, bind_port))
self.s.bind((bind_host, bind_port))
self.s.listen(True)

self.con = None
self.addr = None
self.stop = False

def run(self):
if not self.stop:
self.con, self.addr = self.s.accept()

def send(self, data):
if self.con is not None:
try:
self.logger.debug('Sending data')
self.con.sendall(data)
except socket.error as e:
self.logger.error('Lost connection')
self.con.close()
if self.s is not None or not self.stop:
self.logger.debug('Allowing new connection')
self.con, self.addr = self.s.accept()

def close(self):
self.con.close()

def stop(self):
self.stop = True
self.logger.info('Closing socket')
self.s.shutdown(socket.SHUT_RDWR)
self.s.close()

# vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4

0 comments on commit c3583fc

Please sign in to comment.