Skip to content

Commit

Permalink
Updates to LevelDB record parsing (#38)
Browse files Browse the repository at this point in the history
* Updates

* Update docs

* Update version
  • Loading branch information
sydp committed Apr 17, 2024
1 parent 3fedb1b commit 4ce45c2
Show file tree
Hide file tree
Showing 8 changed files with 313 additions and 49 deletions.
64 changes: 63 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,61 @@ installation:

```
$ dfindexeddb -h
usage: dfindexeddb [-h] -s SOURCE [-o {json,jsonl,repr}]
usage: dfindexeddb [-h] {db,ldb,log} ...
A cli tool for parsing indexeddb files
positional arguments:
{db,ldb,log}
db Parse a directory as indexeddb.
ldb Parse a ldb file as indexeddb.
log Parse a log file as indexeddb.
options:
-h, --help show this help message and exit
```

To parse Indexeddb records from a LevelDB folder, use the following command:

```
dfindexeddb db -h
usage: dfindexeddb db [-h] -s SOURCE [--use_manifest] [-o {json,jsonl,repr}]
options:
-h, --help show this help message and exit
-s SOURCE, --source SOURCE
The source leveldb folder
--use_manifest Use manifest file to determine active/recovered records.
-o {json,jsonl,repr}, --output {json,jsonl,repr}
Output format. Default is json
```

To parse Indexeddb records from a LevelDB ldb (.ldb) file, use the following
command:

```
dfindexeddb ldb -h
usage: dfindexeddb ldb [-h] -s SOURCE [-o {json,jsonl,repr}]
options:
-h, --help show this help message and exit
-s SOURCE, --source SOURCE
The source .ldb file.
-o {json,jsonl,repr}, --output {json,jsonl,repr}
Output format. Default is json
```

To parse Indexeddb records from a LevelDB log (.log) file, use the following
command:

```
dfindexeddb log -h
usage: dfindexeddb log [-h] -s SOURCE [-o {json,jsonl,repr}]
options:
-h, --help show this help message and exit
-s SOURCE, --source SOURCE
The source .log file.
-o {json,jsonl,repr}, --output {json,jsonl,repr}
Output format. Default is json
```
Expand All @@ -92,6 +139,21 @@ options:
-h, --help show this help message and exit
```

To parse records from a LevelDB folder, use the following command:

```
dfindexeddb db -h
usage: dfindexeddb db [-h] -s SOURCE [--use_manifest] [-o {json,jsonl,repr}]
options:
-h, --help show this help message and exit
-s SOURCE, --source SOURCE
The source leveldb folder
--use_manifest Use manifest file to determine active/recovered records.
-o {json,jsonl,repr}, --output {json,jsonl,repr}
Output format. Default is json
```

To parse records from a LevelDB log (.log) file, use the following command:

```
Expand Down
32 changes: 20 additions & 12 deletions dfindexeddb/indexeddb/chromium/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@
from dfindexeddb import errors
from dfindexeddb.indexeddb.chromium import blink
from dfindexeddb.indexeddb.chromium import definitions
from dfindexeddb.leveldb import ldb
from dfindexeddb.leveldb import log
from dfindexeddb.leveldb import record
from dfindexeddb.leveldb import utils


Expand Down Expand Up @@ -1337,24 +1336,33 @@ class IndexedDBRecord:
value: the value of the record.
sequence_number: if available, the sequence number of the record.
type: the type of the record.
level: the leveldb level, None indicates the record came from a log file.
recovered: True if the record is a recovered record.
"""
path: str
offset: int
key: Any
value: Any
sequence_number: int
sequence_number: Optional[int]
type: int
level: Optional[int]
recovered: Optional[bool]

@classmethod
def FromLevelDBRecord(
cls, record: Union[ldb.KeyValueRecord, log.ParsedInternalKey]
cls, db_record: record.LevelDBRecord
) -> IndexedDBRecord:
"""Returns an IndexedDBRecord from a ParsedInternalKey."""
idb_key = IndexedDbKey.FromBytes(record.key, base_offset=record.offset)
idb_value = idb_key.ParseValue(record.value)
idb_key = IndexedDbKey.FromBytes(
db_record.record.key, base_offset=db_record.record.offset)
idb_value = idb_key.ParseValue(db_record.record.value)
return cls(
offset=record.offset,
key=idb_key,
value=idb_value,
sequence_number=record.sequence_number if hasattr(
record, 'sequence_number') else None,
type=record.record_type)
path=db_record.path,
offset=db_record.record.offset,
key=idb_key,
value=idb_value,
sequence_number=db_record.record.sequence_number if hasattr(
db_record.record, 'sequence_number') else None,
type=db_record.record.record_type,
level=db_record.level,
recovered=db_record.recovered)
118 changes: 107 additions & 11 deletions dfindexeddb/indexeddb/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,23 +73,78 @@ def _Output(structure, output):
print(structure)


def IndexeddbCommand(args):
"""The CLI for processing a log/ldb file as indexeddb."""
for db_record in leveldb_record.LevelDBRecord.FromDir(args.source):
def DbCommand(args):
"""The CLI for processing a directory as indexeddb."""
if args.use_manifest:
for db_record in leveldb_record.LevelDBRecord.FromManifest(args.source):
record = db_record.record
try:
idb_record = chromium_record.IndexedDBRecord.FromLevelDBRecord(
db_record)
except(
errors.ParserError,
errors.DecoderError,
NotImplementedError) as err:
print((
f'Error parsing Indexeddb record {record.__class__.__name__}: {err}'
f' at offset {record.offset} in {db_record.path}'), file=sys.stderr)
print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
continue
_Output(idb_record, output=args.output)
else:
for db_record in leveldb_record.LevelDBRecord.FromDir(args.source):
record = db_record.record
try:
idb_record = chromium_record.IndexedDBRecord.FromLevelDBRecord(
db_record)
except(
errors.ParserError,
errors.DecoderError,
NotImplementedError) as err:
print((
f'Error parsing Indexeddb record {record.__class__.__name__}: {err}'
f' at offset {record.offset} in {db_record.path}'), file=sys.stderr)
print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
continue
_Output(idb_record, output=args.output)


def LdbCommand(args):
"""The CLI for processing a leveldb table (.ldb) file as indexeddb."""
for db_record in leveldb_record.LevelDBRecord.FromFile(args.source):
record = db_record.record
try:
idb_record = chromium_record.IndexedDBRecord.FromLevelDBRecord(
db_record)
except(
errors.ParserError,
errors.DecoderError,
NotImplementedError) as err:
print(
(f'Error parsing Indexeddb record {record.__class__.__name__}: {err} '
f'at offset {record.offset} in {db_record.path}'), file=sys.stderr)
print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
continue
_Output(idb_record, output=args.output)


def LogCommand(args):
"""The CLI for processing a leveldb log file as indexeddb."""
for db_record in leveldb_record.LevelDBRecord.FromFile(args.source):
record = db_record.record
try:
db_record.record = chromium_record.IndexedDBRecord.FromLevelDBRecord(
record)
idb_record = chromium_record.IndexedDBRecord.FromLevelDBRecord(
db_record)
except(
errors.ParserError,
errors.DecoderError,
NotImplementedError) as err:
print(
(f'Error parsing blink value: {err} for {record.__class__.__name__} '
(f'Error parsing Indexeddb record {record.__class__.__name__}: {err} '
f'at offset {record.offset} in {db_record.path}'), file=sys.stderr)
print(f'Traceback: {traceback.format_exc()}', file=sys.stderr)
print(f'Record: {record}', file=sys.stderr)
_Output(db_record, output=args.output)
continue
_Output(idb_record, output=args.output)


def App():
Expand All @@ -98,10 +153,51 @@ def App():
prog='dfindexeddb',
description='A cli tool for parsing indexeddb files',
epilog=f'Version {version.GetVersion()}')
parser.add_argument(

subparsers = parser.add_subparsers()

parser_db = subparsers.add_parser(
'db', help='Parse a directory as indexeddb.')
parser_db.add_argument(
'-s', '--source', required=True, type=pathlib.Path,
help='The source leveldb folder')
parser.add_argument(
parser_db.add_argument(
'--use_manifest',
action='store_true',
help='Use manifest file to determine active/deleted records.')
parser_db.add_argument(
'-o',
'--output',
choices=[
'json',
'jsonl',
'repr'],
default='json',
help='Output format. Default is json')
parser_db.set_defaults(func=DbCommand)

parser_ldb = subparsers.add_parser(
'ldb', help='Parse a ldb file as indexeddb.')
parser_ldb.add_argument(
'-s', '--source', required=True, type=pathlib.Path,
help='The source .ldb file.')
parser_ldb.add_argument(
'-o',
'--output',
choices=[
'json',
'jsonl',
'repr'],
default='json',
help='Output format. Default is json')
parser_ldb.set_defaults(func=LdbCommand)

parser_log = subparsers.add_parser(
'log', help='Parse a log file as indexeddb.')
parser_log.add_argument(
'-s', '--source', required=True, type=pathlib.Path,
help='The source .log file.')
parser_log.add_argument(
'-o',
'--output',
choices=[
Expand All @@ -110,7 +206,7 @@ def App():
'repr'],
default='json',
help='Output format. Default is json')
parser.set_defaults(func=IndexeddbCommand)
parser_log.set_defaults(func=LogCommand)

args = parser.parse_args()
args.func(args)
12 changes: 10 additions & 2 deletions dfindexeddb/leveldb/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,12 @@ def _Output(structure, output):

def DbCommand(args):
"""The CLI for processing leveldb folders."""
for rec in record.LevelDBRecord.FromDir(args.source):
_Output(rec, output=args.output)
if args.use_manifest:
for rec in record.LevelDBRecord.FromManifest(args.source):
_Output(rec, output=args.output)
else:
for rec in record.LevelDBRecord.FromDir(args.source):
_Output(rec, output=args.output)


def LdbCommand(args):
Expand Down Expand Up @@ -159,6 +163,10 @@ def App():
required=True,
type=pathlib.Path,
help='The source leveldb directory')
parser_db.add_argument(
'--use_manifest',
action='store_true',
help='Use manifest file to determine active/deleted records.')
parser_db.add_argument(
'-o',
'--output',
Expand Down
2 changes: 2 additions & 0 deletions dfindexeddb/leveldb/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
SEQUENCE_LENGTH = 7
TYPE_LENGTH = 1

MANIFEST_FILENAME_PATTERN = r'MANIFEST-[0-9]{6}'


class BlockCompressionType(enum.IntEnum):
"""Block compression types."""
Expand Down
Loading

0 comments on commit 4ce45c2

Please sign in to comment.