magic.py

#!/usr/bin/env python

# Magic Module

#    Magic - Python module to classify like the 'file' command using a 'magic' file
#    See: 'man 4 magic' and 'man file'
#
#    Copyright (C) 2002 Thomas Mangin
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

import re
import string
import sys

# Need to have a checksum on the cache and source file to update at object creation
# Could use circle safe_pickle (see speed performance impact)
# This program take some input file, we should check the permission on those files ..
# Some code cleanup and better error catching are needed
# Implement the missing part of the magic file definition

# ------------------------------------------------------------------------------
# convert
# ------------------------------------------------------------------------------

__oct='01234567'
__dec='0123456789'
__hex='0123456789abcdefABCDEF'

__size = { 10:0, 8:1, 16:2 }

# Assume that the string have the appropriate length for tests

def __is_cross(char):
	return char in "xX"

def __is_digit_start(char):
	return char in "0\\"

def __is_oct_digit(char):
	return char in __oct

def __is_dec_digit(char):
	return char in __dec

def __is_hex_digit(char):
	return char in __hex


def __is_oct_start(text):
	return __is_digit_start(text[0]) and __is_oct_digit(text[1])

def __is_dec_start(text):
	return __is_dec_digit(text[0])

def __is_hex_start(text):
	return __is_digit_start(text[0]) and __is_cross(text[1]) and  __is_hex_digit(text[2])


def __is_number_start(text):
	# The order of the test are important as they can raise exceptions
	return \
		__is_dec_start(text) or \
		__is_oct_start(text) or \
		__is_hex_start(text)

# End of Assume

def base10 (text,base):
	number = str(text).lower()
	result=0L
	for digit in number:
		result*=base
		pos = __hex.index(digit)
		result+=pos
	return result


def which_base(text):
	# return the base in (8,10,16) or 0 if not a number
	length = len(text)
	text.lower()

	if length > 2 and __is_hex_start(text):
		return 16

	if length > 1 and __is_oct_start(text):
		return 8

	if length > 0 and __is_dec_start(text):
		return 10

	return 0


def start_base(text):
	return which_base(text) != 0

def _size_base(base):
	return __size[base]

def _size_number(text):
	base = which_base(text)

	if base == 0:
		return 0

	length = len(text)
	size = _size_base(base)
	end = size+1

	while end < length and text[end] in __hex[:base]:
		end += 1 
	return end


def index_number(text):
	index=0
	try:
		while 1:
			if __is_number_start(text[index:]):
				break
			index += 1
	except:
		# for the offstring access
		index = -1

	return index

def convert (text):
	base = which_base(text)
	start = _size_base(base)
	end = _size_number(text)	
	return base10(text[start:end],base)


# Special function to extract numbers from strings
# Should not be really be here !

def is_final_dash (text):
	if len(text) < 2:
		return text[-1] == '\\' 
	else:
		return text[-1] == '\\' and text[-2] != '\\'

def is_c_escape (text):
	if len (text) < 2:
		return 0
	elif text[0] != '\\':
		return 0
	# I am probably missing some but do not have C book nearby
	if text[1] in "nrb0":
		return 1
	return 0

# End special function

def little2 (number):
	low = ord(number[0])
	high = ord(number[1])

	return (high << 8) + low
	
def little4 (number):
	low = long(little2(number))
	high = long(little2(number[2:]))

	return (high << 16) + low

def big2 (number):
	low = ord(number[1])
	high = ord(number[0])

	return (high << 8) + low

def big4 (number):
	low = long(big2(number[2:]))
	high = long(big2(number))

	return (high << 16) + low

def local2 (number):
	if sys.byteorder == 'big':
		return big2(number)
	return little2(number)

def local4 (number):
	if sys.byteorder == 'big':
		return big4(number)
	return little4(number)

def test_convert():
	print "---"
	print "base10(\"FF\",16) = ", 255, "\tgot ", base10("FF",16)
	print "base10(\"77\", 8) = ",  63, "\tgot ", base10("77",8)

	print "---"
	print "convert(\"0xFF\"  ) = ",  255, "\tgot ", convert("0xFF")
	print "convert(\"\\xFF\"  ) = ", 255, "\tgot ", convert("\\xFF")
	print "convert(\"077\"   ) = ",   63, "\tgot ", convert("077")
	print "convert(\"\\77\"   ) = ",  63, "\tgot ", convert("\\77")
	print "convert(\"\\177E\"   ) = ",  127, "\tgot ", convert("\\177E"), "The E is not used"

	print "---"
	print "size_number(\"100FFF\") = ",   3, "\tgot", size_number("100qwerty")
	print "size_number(\"\\7799\" ) = ",  3, "\tgot", size_number("\\77FF")
	print "size_number(\"\\XFFG\" ) = ",  3, "\tgot", size_number("\\XFFG")

	print "---"
	print "index_number(\"0XF\"       ) = ",   0, "\tgot", index_number("0XF")
	print "index_number(\"\\XF\"       ) = ",   0, "\tgot", index_number("\\XF")
	print "index_number(\"FF\\FFGG\"   ) = ",  -1, "\tgot", index_number("FF\\FFGG")
	print "index_number(\"FF\\7\"      ) = ",   2, "\tgot", index_number("FF\\7")
	print "index_number(\"FFF\\XFFGG\" ) = ",   3, "\tgot", index_number("FFF\\XFFGG")
	print "index_number(\"\\\\\\XFFGG\"  ) = ",   2, "\tgot", index_number("FF\\XFFGG")


	# 0000 0001 -->     1
	# 0001 0000 -->    16
	# 0001 1000 -->    24
	# 1000 0001 -->   129
	# 0000 0001 1000 0000 -->   384
	# 1000 0000 0000 0001 --> 32769
	# 0000 0000 0000 0001 1000 0000 0000 0000 --> 98304
	# 0000 0000 1000 0000 0000 0001 0000 0000 --> 8388864
	# 1000 0000 0000 0000 0000 0000 0000 0001 --> 2147483649


	print "---"
	print "little2   ","1    ",little2(chr( 1)+chr(0))
	print "little2   ","16   ",little2(chr(16)+chr(0))

	print "---"
	print "big2","1    ",big2(chr(0)+chr(1))
	print "big2","16   ",big2(chr(0)+chr(16))

	print "---"
	print "little4","2147483649",little4(chr(1)+chr(0)+chr(0)+chr(128))
	print "big4   ","2147483649",big4(chr(128)+chr(0)+chr(0)+chr(1))

# ------------------------------------------------------------------------------
# magic
# ------------------------------------------------------------------------------

class Failed (Exception):
	pass


class Magic:

	data_size = { 'byte':1, 'short':2, 'long':4, 'string':1, 'pstring':1, 'date': 4, 'ldate': 4 }
	type_size = { 'b':1, 'B':1, 's':2, 'S':2, 'l':4, 'L':5 }


	se_offset_abs="^\(([0\\\][xX][\dA-Fa-f]+|[0\\\][0-7]*|\d+)(\.[bslBSL])*\)"
	se_offset_add="^\(([0\\\][xX][\dA-Fa-f]+|[0\\\][0-7]*|\d+)(\.[bslBSL])*([-+])([0\\\][xX][\dA-Fa-f]+|[0\\\][0-7]*|\d+)\)"


	def __init__ (self,filename,cachename):
		self.entries = 0

		self._level = {}
		self._direct = {}
		self._offset_relatif = {}
		self._offset_type = {}
		self._offset_delta = {}
		self._endian = {}
		self._kind = {}
		self._oper = {}
		self._mask = {}
		self._test = {}
		self._data = {}
		self._length = {}
		self._mime = {}

		import os

		if not os.path.isfile(cachename):
			self.read_magic(filename)
			self.write_cache(cachename)
		
		self.read_cache(cachename)
		

	# read_magic subfunction

	def __split (self,line):
		result = ''
		split = line.split()

		again = 1

		# Make sure the split function did not split too much
		while again:
			again = 0
			pos = 0
			part = []
			top = len(split)
			while pos < top:
				if is_final_dash(split[pos]):
					result = split[pos] + ' '
					index = line.find(result)
					if index != -1:
						char = line[index+len(result)]
						if char != ' ' and char != '\t':
							pos += 1
							result += split[pos]
							again = 1
				else:
					result = split[pos]

				part.append(result)
				pos += 1
			split = part
		
		return part


	def __level(self, text):
		return text.count('>')


	def __strip_start (self,char,text):
		if text[0] == char:
			return text[1:]
		return text


	def __direct_offset(self,text):
		if text[0] == '(' and text[-1] == ')':
			return 0
		return 1


	def __offset (self,text):
		direct = self.__direct_offset(text)
		offset_type = 'l'
		offset_delta = 0L
		offset_relatif = 0L

		# Get the offset information
		if direct:
			offset_delta = convert(text)
		else:
			match_abs = re.compile(self.se_offset_abs).match(text)
			match_add = re.compile(self.se_offset_add).match(text)

			if match_abs:
				offset_relatif = convert(match_abs.group(1))

				if match_abs.group(2) != None:
					offset_type = match_abs.group(2)[1]


			elif match_add:
				offset_relatif = convert(match_add.group(1))

				if match_add.group(2) != None:
					offset_type = match_add.group(2)[1]

				if match_add.group(3) == '-':
					offset_delta = 0L - match_add.group(4)
				else:
					offset_delta = convert(match_add.group(4))

		return (direct,offset_type,offset_delta,offset_relatif)


	def __oper_mask (self,text):
		type_mask_and = text.split('&')
		type_mask_or = text.split('^')

		if len(type_mask_and) > 1:
			oper = '&'
			mask = convert(type_mask_and[1])
			rest = type_mask_and[0]
			return (oper,mask,rest)
		elif len(type_mask_or) > 1:
			oper = '^'
			mask = convert(type_mask_or[1])
			rest = type_mask_or[0]
			return (oper,mask,rest)
		else:
			return ('',0L,text)


	def __endian (self,full_type):
		if full_type.startswith('be'):
			return 'big'
		elif full_type.startswith('le'):
			return 'little'
		return 'local'


	def __kind (self,full_type,endian):
		if endian == 'local':
			kind = full_type
		else:
			kind = full_type[2:]

		# XXX: string case and white space compaction option not implemented
		# XXX: Not very used ...
		if kind.startswith("string/"):
			NOT_DONE_YET=kind[7:]
			kind="string"

		# XXX: No idea what are those extension
		if kind.startswith("ldate-"):
			NOT_DONE_YET=kind[6:]
			kind="ldate"
	
		return kind


	def __test_result (self,test_result):
		if test_result[0] in "=><&!^":
			test   = test_result[0]
			result = test_result[1:]
			return (test,result)
		elif test_result == 'x':
			test = 'x'
			result = 'x'
			return (test,result)
		else:
			test = '='
			result = test_result
			return (test,result)


	def __string (self,list):
		r = []
		for s in list:
			if type(s) is str:
				if s == "\\0":
					r.append(chr(0))
				else:
					r.append(s)
			elif s <10:
				r.append(ord(str(s)))
			else:
				r.append(s)
		return r


	def __data (self,kind,result):
		pos = 0
		data = list('')
		prev = ''

		while pos < len(result):
			if is_c_escape(result[pos:]):
				# \0 is not a number it is the null string
				if result[pos+1] == '0':
					data.append(result[pos])
					data.append(0L)
				# \rnt are special
				else:
					data.append(result[pos:pos+2])
				pos +=2
			elif kind == "string" and (result[pos] in string.ascii_letters or result[pos] in string.digits):
				data.append(ord(result[pos])*1L)
				pos +=1
			else:
				base = which_base(result[pos:])

				if base == 0:
					data.append(ord(result[pos])*1L)
					pos += 1
				else:
					size_base = _size_base(base)
					size_number = _size_number(result[pos:])
					start = pos + size_base
					end = pos + size_number
					nb = base10(result[start:end],base)
					pos += size_number
					data.append(nb*1L)
						
		return data
		
	def __length (self, kind, data):
		# Calculate the size of the data to read in the file
		if kind == "string":
			replace = ""
			for i in data:
				# except: Too lazy to handle the '\r' and co otherwise
				try: replace+=chr(i)
				except: replace+='*'

			# This is for "\0"
			replace=replace.replace('*\0','*')
			# This is for two "\"
			replace=replace.replace('\\\\','*')
			# This is for the remaining "\{whatever}"
			replace=replace.replace('\\','')

			length = len(replace)
		else:
			length = self.data_size[kind]
					
		return length


	def __mime (self,list):
		mime=''
		for name in list:
			mime += name + " "

		mime = mime.rstrip()

		mime = mime.replace("\\a","\a")
		mime = mime.replace("\\b","\b")
		mime = mime.replace("\\f","\f")
		mime = mime.replace("\\n","\n")
		mime = mime.replace("\\r","\r")
		mime = mime.replace("\\t","\t")
		mime = mime.replace("\\v","\v")
		mime = mime.replace("\\0","\0")

		return mime


	def read_magic (self,magic_file):
		self.magic = []

		try:
			f = open(magic_file,'rb')
		except:
			raise StandardError, "No valid magic file called \"" + str(magic_file) + "\""
	
		index = 0
		for line in f.readlines():
			line = line.strip()

			if line and not line.startswith('#'):
				part = self.__split(line)

				# If the line is missing a text string assume it is '\b'
				while len(part) < 4:
					part.append('\b')

				# Get the level of the test
				level = self.__level(part[0])

				# XXX: What does the & is used for in ">>&2" as we do not know skip it
				offset_string = self.__strip_start('&',part[0][level:])

				# offset such as (<number>[.[bslBSL]][+-][<number>]) are indirect offset
				(direct,offset_type,offset_delta,offset_relatif) = self.__offset(offset_string)
		
				# The type can be associated to a netmask
				(oper,mask,rest) = self.__oper_mask(part[1])

				# No idea what this 'u' is so skip it
				full_type = self.__strip_start('u',rest)

				endian = self.__endian(full_type)
				kind = self.__kind(full_type,endian)

				# Get the comparaison test and result
				(test,result) = self.__test_result(part[2])
				
				# Get the value to check against
				data = self.__data(kind,result)

				# Get the length of the data
				length = self.__length(kind,data)
				
				# Special characters
				mime = self.__mime(part[3:])

				# Append the line to the list
				self._level[index] = level
				self._direct[index] = direct
				self._offset_type[index] = offset_type
				self._offset_delta[index] = offset_delta
				self._offset_relatif[index] = offset_relatif
				self._endian[index] = endian
				self._kind[index] = kind
				self._oper[index] = oper
				self._mask[index] = mask
				self._test[index] = test
				self._data[index] = data
				self._length[index] = length
				self._mime[index] = mime

				self.entries = index
				index += 1

		f.close()


	def write_cache (self,name):
		f = open (name,'wb')

		import cPickle
		
		cPickle.dump(self._level,f,1)
		cPickle.dump(self._direct,f,1)
		cPickle.dump(self._offset_relatif,f,1)
		cPickle.dump(self._offset_type,f,1)
		cPickle.dump(self._offset_delta,f,1)
		cPickle.dump(self._endian,f,1)
		cPickle.dump(self._kind,f,1)
		cPickle.dump(self._oper,f,1)
		cPickle.dump(self._mask,f,1)
		cPickle.dump(self._test,f,1)
		cPickle.dump(self._data,f,1)
		cPickle.dump(self._length,f,1)
		cPickle.dump(self._mime,f,1)

		f.close()


	def read_cache (self,name):
		f = open (name,'rb')

		import cPickle
		
		self._level = cPickle.load(f)
		self._direct = cPickle.load(f)
		self._offset_relatif = cPickle.load(f)
		self._offset_type = cPickle.load(f)
		self._offset_delta = cPickle.load(f)
		self._endian = cPickle.load(f)
		self._kind = cPickle.load(f)
		self._oper = cPickle.load(f)
		self._mask = cPickle.load(f)
		self._test = cPickle.load(f)
		self._data = cPickle.load(f)
		self._length = cPickle.load(f)
		self._mime = cPickle.load(f)

		self.entries = len(self._level)

		f.close()


	# classify subfuntions

	def __indirect_offset (self,file,type,offset):
		# Raise file error if file too short	
		f.seek(offset)
		if type == 'l':
			delta = little4(self.__read(f,4))
		elif type == 'L':
			delta = big4(self.__read(f,4))
		elif type == 's':
			delta = little2(self.__read(f,2))
		elif type == 'S':
			delta = big2(self.__read(f,2))
		elif type == 'b':
			delta = ord(self.__read(f,1))
		elif type == 'B':
			delta = ord(self.__read(f,1))

		return offset + delta

	def __read (self,file,number):
		# This may retun IOError
		data = file.read(number)
		if not data:
			raise IOError, "out of file access"
		return data


	def __convert (self,kind,endian,data):
		# Can raise StandardError and IOError
		value = 0

		# Convert the data from the file
		if kind == 'byte':
			if len(data) < 1:
				raise StandardError, "Should never happen, not enough data"
			value= ord(data[0])
		
		elif kind == 'short':
			if len(data) < 2:
				raise StandardError, "Should never happen, not enough data"
			if endian == 'local':
				value= local2(data)
			elif endian == 'little':
				value= little2(data)
			elif endian == 'big':
				value= big2(data)
			else:
				raise StandardError, "Endian type unknown"
			
		elif kind == 'long':
			if len(data) < 4:
				raise StandardError, "Should never happen, not enough data"
			if endian == 'local':
				value= local4(data)
			elif endian == 'little':
				value= little4(data)
			elif endian == 'big':
				value= big4(data)
			else:
				raise StandardError, "Endian type unknown"
			
		elif kind == 'date':
			# XXX: Not done yet
			pass
		elif kind == 'ldate':
			# XXX: Not done yet
			pass
		elif kind == 'string':
			# Nothing to do
			pass
		elif kind == 'pstring':
			# Not done here anymore
			pass
			#	#Convert it to be like a string
			#	size=ord(data[0])
			#	# Not sure the test is right (at one byte)
			#	if file_length < offset + size:
			#		value= self.__read(f,size)
			#		leng = size
			#		kind = "string"
		else:
			raise StandardError, "Type " + str(kind) + " not recognised"

		return value


	def __binary_mask(self,oper,value,mask):

		if oper == '&':
			value &= mask
		elif oper == '^':
			value ^= mask
		elif oper == '':
			pass
		else:
			raise StandardError, "Binary operator unknown " + str(oper) 

		return value


	def __read_string (self,file):
		# This may retun IOError
		limit=0
		result = "" 
		while limit < 100:
			char = self.__read(file,1)
			# chr(0) == '\x00'
			if char == '\x00' or char == '\n':
				break
			result += char
			limit += 1

		if limit == 100:
			raise Failed()

		return result


	def __is_null_string(self,data):
		return len(data) == 2 and data[0] == '\\' and data[1] == 0L
		

	def classify(self,name):

		if not self.entries:
			raise StandardError, "Not initialised properly"
		
		# Are we still looking for the ruleset to apply or are we in a rule
		found_rule = 0

		# When we found the rule, what is the level that we successfull passed
		in_level = 0

		# If we failed part of the rule there is no point looking for higher level subrule
		allow_next = 0

		# String provided by the successfull rule
		result = ""

		if isinstance(name, basestring):
			f = open(name,'rb')
		else:
			f = name

		f.seek(0,2)
		file_length = f.tell()

		for i in range(self.entries):
			level = self._level[i]

			# Optimisation: Skipping all the rule we do not match the first prerequesite
			if not found_rule and level > 0:
				# Nothing to do with this rule
				continue

			# We are currently working a rule
			if found_rule:
				# Finished: we performed a series of test and it is now completed
				if level == 0:
					break

				# No need to check a level if the previous one failed
				if level > allow_next:
					# Safely ignore this rule
					continue

			# The full magic rule
			direct = self._direct[i]
			offset_type = self._offset_type[i]
			offset_delta = self._offset_delta[i]
			offset_relatif = self._offset_relatif[i]
			endian = self._endian[i]
			kind = self._kind[i]
			oper = self._oper[i]
			mask = self._mask[i]
			test = self._test[i]
			data = self._data[i]
			leng = self._length[i]
			mime = self._mime[i]

			# This is what the file should contain to succed the test
			value = 0

			# Does the magic line checked match the content of the file ?
			success = 0

			# The content of the file that may be used for substitution with %s
			replace = None

			try:
				# Get the offset of the information to read
				if direct == 1:
					offset = offset_delta
				else:
					offset = self.__indirect_offset(file,offset_type,offset_delta)

			
				# If it is out of the file then the test fails.
				if file_length < offset:
					raise Failed()

				# Make sure we can read the data at the offset position
				f.seek(offset)
				extract=self.__read(f,leng)
				if not extract:
					raise Failed()


				# Convert the little/big endian value from the file
				value = self.__convert(kind,endian,extract)

				# If the value is masked, remove the unwanted bits
				value = self.__binary_mask(oper,value,mask)


				# Perform the test
				if test == '=':
					# If we are comparing string the string is already read
					if kind == 'string':
						# The string \0 seems special and it seems to be what to do
						if self.__is_null_string(data):
							success = 1
						# Other string perform a byte to byte comparaison
						elif len(data) == len(extract):
							success=1
							for index in range(len(data)):
								# XXX: Does this fail for '\r' test
								if ord(extract[index]) != data[index]:
									success = 0
					elif kind == 'pstring':
						raise Failed, "pstring not implemented"
					else:
						success = (data[0] == value)
						replace = value
						
				elif test == '>':
					# If we are > a string, we have to read it from the file
					if kind == 'string':
						if self.__is_null_string(data):
							if ord(extract[0]) != 0:
								replace = extract + self.__read_string(f)
								success = 1
						else:
							raise Failed, ">[^0] Not implemented"
					elif kind == 'pstring':
						raise Failed, "pstring not implemented"
					else:
						success = (value > data[0])
						replace = value
						
				elif test == '<':
					if kind == 'string':
						success = 1 
		
						minimum = min(len(data),len(extract))
						if len(extract) > minimum:
							success = 0
						else:
							for index in range(minimum):
								if data[index] > extract[index]:
									success = 0
									break
					elif kind == 'pstring':
						raise Failed, "pstring not implemented"
					else:
						success = (value < data[0])
						replace = value
						
				elif test == '&':
					success = ((value & data[0]) == data[0])
					replace = value
					
				elif test == '^':
					# XXX: To be tested with a known file
					success = ((value ^ data[0]) == 0)
					replace = value
					
				elif test == '!':
					# XXX: To be tested with a known file
					# XXX: Wrong so must be a binary inversion test
					# success = (value != data[0])
					success = 0
					replace = value
					
				elif test == 'x':
					# XXX: copy from the code in test == '>', should create a function
					if kind == 'string':
						limit=0
						while 1:
							if ord(extract[0]) == 0 or limit > 100:
								break
							replace += extract
							extract = self.__read(f,1)
							limit += 1
						if limit <= 100:
							success = 1
					elif kind == 'pstring':
						raise Failed, "pstring not implemented"
					else:
						success = 1
						replace = value
						
				else:
					raise StandardError, "test used '"+test+"' is not defined"
			
				
				if success:
					found_rule = 1
					in_level = level
					allow_next = level+1

					if replace is not None:
						try:
							mime = mime % replace
						except:
							pass

					if mime != []:
						result += mime
						result += ' '
				else:
					raise Failed()
			except Failed, IOError:
				allow_next = level
			except:
				# The code must not raise any exception when it fails.
				pass	

		f.close()

		if found_rule == 0:
			# XXX: API Change this was previously returning "unknown"
			return None

		# The magic file use "backspace" to concatenate what is normally separated with a space"
		return result.rstrip().lstrip().replace(' \x08','')

def main(argv=None):

    import sys
    print sys.argv

    try:
        binname = sys.argv[1]
    except:
        binname = sys.argv[0]

    try:
        filename = sys.argv[2]
    except:
        filename = "data/magic.linux" # not portable @/@

    try:
        cachename = sys.argv[3]
    except:
        cachename = "data/magic.linux.cache.delete-me"

    magic = Magic(filename, cachename)
    print magic
    classify = magic.classify(binname)

    print classify

    if classify:
        print binname + ": " + classify
    else:
        print binname + ": Can not recognise file type"


# ------------------------------------------------------------------------------
# our public domain kode ;p
# ------------------------------------------------------------------------------

_magic_db = None

DEFAULT_STORES = [
    'magic.zope',
    'magic.mime',
    ]

def initialise_magic_datastores(stores=DEFAULT_STORES):

    global _magic_db

    if _magic_db is not None:
        return

    _magic_db = []

    from os.path import abspath, dirname, join

    data_dir = join(dirname(abspath(__file__)), 'data')

    for store in stores:
        filename = join(data_dir, store)
        cachename = join(data_dir, store + '.cache')
        _magic_db.append(Magic(filename, cachename))

def classify(file):
    """
    Return the mimetype of the given file.

    The ``file`` can either be a file path or a consumable file-like object.

    Returns an appropriate mimetype if available or None.

    """

    if _magic_db is None:
        initialise_magic_datastores()

    for db in _magic_db:
        mimetype = db.classify(file)
        if mimetype:
            return mimetype.lstrip(' \x08') # some kontrol chars ??