Skip to content

Commit

Permalink
new: [statistics] add an optional statistic option in the server to have
Browse files Browse the repository at this point in the history
a sorted set of hashes matching and non-matching.
  • Loading branch information
adulau committed Aug 13, 2021
1 parent fc11f3d commit 72b462b
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 10 deletions.
2 changes: 1 addition & 1 deletion bin/import-poc/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"url": "https://s3.amazonaws.com/rds.nsrl.nist.gov/RDS/current/RDS_legacy.iso"
}
},
"local_path": "/home/koenv/nsrl/",
"local_path": "/home/adulau/",
"import": {
"max_value": 500000000,
"mod_lines": 2500
Expand Down
22 changes: 18 additions & 4 deletions bin/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@
from flask import Flask, url_for, send_from_directory, render_template, make_response, request
from flask_restx import Resource, Api, reqparse
import redis
import configparser

config = configparser.ConfigParser()
config.read('../etc/server.conf')
stats = config['global'].getboolean('stats')
app = Flask(__name__)
app.url_map.strict_slashes = False
api = Api(app, version=version, title='hashlookup CIRCL API', description='![](https://www.circl.lu/assets/images/circl-logo.png)\n[CIRCL hash lookup](https://hashlookup.circl.lu/) is a public API to lookup hash values against known database of files. NSRL RDS database is included. More database will be included in the future. The API is accessible via HTTP ReST API and the API is also [described as an OpenAPI](https://hashlookup.circl.lu/swagger.json). A [documentation is available with](https://www.circl.lu/services/hashlookup/) some sample queries. The API can be tested live in the interface below.', doc='/', license='CC-BY', contact='info@circl.lu', ordered=True)
Expand All @@ -24,9 +28,14 @@ def get(self, md5):
return {'message': 'Expecting a MD5 hex value'}, 400
if not is_hex(md5):
return {'message': 'MD5 is not in hex format'}, 400
if not rdb.exists("l:{}".format(md5.upper())):
k = md5.upper()
score = 1
if not rdb.exists("l:{}".format(k)):
rdb.zincrby("s:nx:md5", score, k)
return {'message': 'Non existing MD5', 'query': md5}, 404
sha1 = rdb.get("l:{}".format(md5.upper()))
if stats:
rdb.zincrby("s:exist:md5", score, k)
sha1 = rdb.get("l:{}".format(k))
h = rdb.hgetall("h:{}".format(sha1))
if "OpSystemCode" in h:
if rdb.exists("h-OpSystemCode:{}".format(h['OpSystemCode'])):
Expand All @@ -44,9 +53,14 @@ def get(self, sha1):
return {'message': 'Expecting a SHA-1 hex value'}, 400
if not is_hex(sha1):
return {'message': 'SHA-1 is not in hex format'}, 400
if not rdb.exists("h:{}".format(sha1.upper())):
k = sha1.upper()
score = 1
if not rdb.exists("h:{}".format(k)):
rdb.zincrby("s:nx:sha1", score, k)
return {'message': 'Non existing SHA-1', 'query': sha1}, 404
h = rdb.hgetall("h:{}".format(sha1.upper()))
if stats:
rdb.zincrby("s:exist:sha1", score, k)
h = rdb.hgetall("h:{}".format(k))
if "OpSystemCode" in h:
if rdb.exists("h-OpSystemCode:{}".format(h['OpSystemCode'])):
h['OpSystemCode'] = rdb.hgetall("h-OpSystemCode:{}".format(h['OpSystemCode']))
Expand Down
8 changes: 8 additions & 0 deletions doc/DATABASE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Database structure of hashlookup

# Statistics

- `s:nx:md5` sorted set of MD5 non-existing hashes looked up
- `s:nx:sha1` sorted set of SHA1 non-existing hashes looked up
- `s:exist:md5` sorted set of SHA1 existing hashes looked up
- `s:exixt:sha1` sorted set of SHA1 existing hashes looked up
13 changes: 8 additions & 5 deletions etc/kvrocks.conf
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ db-name nsrl
#
# The DB will be written inside this directory
# Note that you must specify a directory here, not a file name.
dir /home/adulau/nsrl/db
dir /home/adulau/git/hashlookup-server/db

# The logs of server will be stored in this directory. If you don't specify
# one directory, by default, we store logs in the working directory that set
Expand All @@ -77,7 +77,7 @@ dir /home/adulau/nsrl/db
# When running daemonized, kvrocks writes a pid file in ${CONFIG_DIR}/kvrocks.pid by
# default. You can specify a custom pid file location here.
# pidfile /var/run/kvrocks.pid
pidfile /home/adulau/nsrl/db/kvrocks.pid
pidfile /home/adulau/git/hashlookup-server/db/kvrocks.pid

# You can configure a slave instance to accept writes or not. Writing against
# a slave instance may be useful to store some ephemeral data (because data
Expand Down Expand Up @@ -379,7 +379,7 @@ rocksdb.wal_size_limit_mb 16384
# compression is enabled.
#
# Default: 4KB
rocksdb.block_size 16384
rocksdb.block_size 2048

# Indicating if we'd put index/filter blocks to the block cache
#
Expand Down Expand Up @@ -439,5 +439,8 @@ rocksdb.disable_auto_compactions no
################################ NAMESPACE #####################################
# namespace.test change.me

backup-dir /home/adulau/nsrl/db/backup
log-dir /home/adulau/nsrl/db
backup-dir /home/adulau/git/hashlookup-server/db/backup
log-dir /home/adulau/git/hashlookup-server/db

auto-resize-block-and-sst yes
cluster-enabled no
2 changes: 2 additions & 0 deletions etc/server.conf.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[global]
stats = yes

0 comments on commit 72b462b

Please sign in to comment.