Skip to content

Commit

Permalink
Python Unpack Tutorial Script (uses "testctx.bin" file)
Browse files Browse the repository at this point in the history
  • Loading branch information
cheeky4n6monkey committed Aug 21, 2017
1 parent 65ab712 commit a689c0c
Showing 1 changed file with 91 additions and 0 deletions.
91 changes: 91 additions & 0 deletions python-tutorials/unpack-tute.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#! /usr/bin/env python

"""
Author: Adrian Leong (cheeky4n6monkey@gmail.com)
Tutorial Python v2/v3 script to print (fictional) Contact records from test file.
Fictional Contact Record format:
Record Entry Marker field "ctx!" = x63x74x78x21 in Hex
Index Number field (2 byte LE unsigned int)
Name Length field (1 byte unsigned int)
Name String field (UTF16LE encoded string)
Phone Length field (1 byte unsigned int)
Phone String field (UTF8 encoded string)
Unix Timestamp field (4 byte LE unsigned int)
"""

import struct
import datetime

version_string = "unpack-tute.py v2017-08-19"
print("Running " + version_string + "\n")

filename = "testctx.bin"
try:
fb = open(filename, "rb") # read-only in binary mode
except:
print("Error - Input file failed to open!")
exit(-1)

# Read file into one variable (file should not/cannot be too large)
filecontent = fb.read()

# Search the filecontent variable for the hex equivalent of "ctx!" which appears in each record.
# Doing it this way allows us to change the searchstring variable to search for different raw hex patterns eg "\x11\x22\x00"
searchstring = "\x63\x74\x78\x21" # "ctx!" in ASCII/UTF8

# For Python2, we could just use: nexthit = filecontent.find(searchstring, 0)
# However, Python3 compatibility requires we call the searchstring.encode() to encode searchstring to UTF8 before calling "find".
nexthit = filecontent.find(searchstring.encode('utf-8'), 0) # returns -1 if substring not found otherwise returns offset where hit found
hitlist = []

# loop iterates thru filecontent looking for hits and
# bailsout the first time it does not find the searchstring
while nexthit >= 0:
hitlist.append(nexthit)
nexthit = filecontent.find(searchstring.encode(), nexthit + 1)

# hitlist should now contain a list of offsets where "ctx!" was found
for hit in hitlist:
print("\nHit found at offset: " + str(hit) + " decimal = " + hex(hit) + " hex")
# 1st field ("Index Number") occurs 4 bytes AFTER beginning of "ctx!" and uses 2 LE bytes
indexnum_offset = hit + 4
# "indexnum_offset:(indexnum_offset+2)" means indexnum_offset, indexnum_offset+1 ONLY. It does not include (indexnum_offset+2) byte
indexnum = struct.unpack("<H", filecontent[indexnum_offset:(indexnum_offset+2)])[0] # LE 2 byte unsigned integer
print("indexnum = " + str(indexnum))
# 2nd field ("Name Length") starts after 2 byte "Index Number" field
namelength_offset = indexnum_offset + 2
print("namelength_offset = " + str(namelength_offset))
namelength = struct.unpack("B", filecontent[namelength_offset:(namelength_offset+1)])[0] # 1 byte contains size of name string (in bytes)
print("namelength = " + str(namelength))
# 3rd field ("Name String") starts after 1 byte "Name Length" field
namestring_offset = namelength_offset + 1
# decode string slice as UTF16LE
namestring = filecontent[namestring_offset:(namestring_offset+namelength)].decode('utf-16-le')
print("namestring = " + namestring)
# 4th field ("Phone Length") starts after "Name String" field ends ("Name Length" bytes)
phonelength_offset = namestring_offset + namelength
phonelength = struct.unpack("B", filecontent[phonelength_offset:(phonelength_offset+1)])[0] # 1 byte contains size of phone string (in bytes)
print("phonelength = " + str(phonelength))
# 5th field ("Phone String") starts after 1 byte "Phone Length" field
phonestring_offset = phonelength_offset + 1
print("phonestring_offset = " + str(phonestring_offset))
# decode string slice as UTF8 (could also just print slice without the decode call)
phonestring = filecontent[phonestring_offset:(phonestring_offset+phonelength)].decode('utf-8')
print("phonestring = " + phonestring)
# 6th field ("Unix Timestamp") starts after "Phone String" field ends
timestamp_offset = phonestring_offset + phonelength
print("timestamp_offset = " + str(timestamp_offset))
# Unix timestamp is LE unsigned 4 byte representing seconds since 1JAN1970
# eg LE x56xDBxCDx26 = 1457245478 decimal = Sun, 06 March 2016 06:24:38 UTC
timestamp = struct.unpack("<I", filecontent[timestamp_offset:(timestamp_offset+4)])[0]
print("raw timestamp decimal value = " + str(timestamp))
# Now we call the datetime.utcfromtimestamp function to return a human readable ISO formatted string from the raw timestamp value
timestring = datetime.datetime.utcfromtimestamp(timestamp).strftime("%Y-%m-%dT%H:%M:%S")
print("timestring = " + timestring)

fb.close()

print("\nProcessed " + str(len(hitlist)) + " ctx! hits. Exiting ...\n")

0 comments on commit a689c0c

Please sign in to comment.