Skip to content

Commit

Permalink
update project
Browse files Browse the repository at this point in the history
  • Loading branch information
giangm9 committed Feb 25, 2016
1 parent 3f56aa2 commit fb3cbc1
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 21 deletions.
27 changes: 14 additions & 13 deletions CrawlerAndValidator/CymonCrawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,30 +6,31 @@


def crawl(days, tags, adrtypes):
for day in days:
for tag in tags:
for adrtype in adrtypes:
getCymonPage(str(day), tag, adrtype, '10', '0')
pass
print 'Crawling from cymon.io...'
print 'j'
for day in days:
for tag in tags:
for adrtype in adrtypes:
getCymonPage(str(day), tag, adrtype, '100000', '0')


def getCymonPage( day, tag, adrtype, limit, offset):
print 'Crawling from cymon.io...'
def getCymonPage( day, tag, adrtype, limit, offset):
url = 'https://cymon.io/api/nexus/v1/blacklist/'
url += adrtype + '/' + tag
url += '/?days=' + day
url += '&limit=' + limit
url += '&offset=' + offset

print url
print 'URL: ', url
r = requests.get(url)
raw = json.loads(r.text)
result = raw['results']
print 'Count : ', len(result)
result = raw['results']
for item in result:
print '.'

ShowProgress.show(result.index(item) + 1, len(result))
if not ('addr' in item.keys()):
item['addr'] = item['name']
DBHelper.updateAddress({'address': item['addr'],
'address_type': adrtype})

print '\b'

4 changes: 1 addition & 3 deletions CrawlerAndValidator/DBHelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
db = client['CymonAV']
collection = db['addresses']

def updateAddress(item):
print 'Updating item ', item['address']
def updateAddress(item):
row = collection.find_one({'address': item['address']})


Expand All @@ -41,7 +40,6 @@ def updateAddress(item):
noDup = {v['value']:v for v in item['detections']}.values()
collection.update({'address' : item['address']},
{"$set":{'detections': noDup}})
print 'New size : ', len(item['detections'])

def getAddresses():
result = []
Expand Down
16 changes: 12 additions & 4 deletions CrawlerAndValidator/DailyCrawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,20 @@


def crawl(delta_time=1):
# cymondelta = min(4, 1 + delta_time)
# CymonCrawler.crawl(range(1,), ['malware'], ['ip', 'domain'])
crawlCymon(delta_time)
crawAlienVault()
validateAll()


def crawlCymon(delta_time):
cymondelta = min(4, 1 + delta_time)
CymonCrawler.crawl(range(1,), ['malware'], ['ip', 'domain'])

def crawAlienVault():
today = datetime.datetime.today().strftime("%Y-%m-%d")
AVcrawler.crawl(today)
AVcrawler.crawl(today)

VirusTotalValidater.validateAll()
def validateAll():
VirusTotalValidater.validateAll()

crawl()
2 changes: 1 addition & 1 deletion setup.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from setuptools import setup

setup(name='Crawler and Validator',
setup(name='CrawlerAndValidator',
version='1.0',
description='Crawler and Validator to work with informations from AllienVault and Cymon',
author='BigForce',
Expand Down

0 comments on commit fb3cbc1

Please sign in to comment.