diff --git a/spatialmedia/gui.py b/spatialmedia/gui.py index ba6b3bd..d36653b 100755 --- a/spatialmedia/gui.py +++ b/spatialmedia/gui.py @@ -60,13 +60,20 @@ def action_open(self): self.set_message("Opened file: %s\n" % ntpath.basename(self.in_file)) console = Console() - metadata = spherical.parse_metadata(self.in_file, console.append) + parsedMetadata = spherical.parse_metadata(self.in_file, console.append) + + metadata = None + audio_metadata = None + if parsedMetadata: + metadata = parsedMetadata.video + audio_metadata = parsedMetadata.audio for line in console.log: if "Error" in line: self.set_error("Failed to load file %s" % ntpath.basename(self.in_file)) self.var_spherical.set(0) + self.var_spatial_audio.set(0) self.disable_state() self.button_open.configure(state="normal") self.button_quit.configure(state="normal") @@ -75,10 +82,18 @@ def action_open(self): self.enable_state() self.checkbox_spherical.configure(state="normal") + infile = os.path.abspath(self.in_file) + file_extension = os.path.splitext(infile)[1].lower() + self.enable_spatial_audio =\ + True if (file_extension == ".mp4") else False + if not metadata: self.var_spherical.set(0) self.var_3d.set(0) + if not audio_metadata: + self.var_spatial_audio.set(0) + if metadata: metadata = metadata.itervalues().next() self.var_spherical.set(1) @@ -93,6 +108,12 @@ def action_open(self): else: self.var_3d.set(0) + if audio_metadata: + self.var_spatial_audio.set(1) + self.options_ambisonics["text"] =\ + audio_metadata.get_metadata_string() + + self.update_state() def action_inject_delay(self): @@ -103,8 +124,19 @@ def action_inject_delay(self): xml = spherical.generate_spherical_xml(stereo=stereo) console = Console() + c = console.append + + metadata = spherical.Metadata() + metadata.video = xml + + if self.var_spatial_audio.get(): + # Default ambisonics audio metadata + audio_metadata = {'ambisonic_order': 1, + 'ambisonic_type': 'periphonic'} + metadata.audio = audio_metadata + spherical.inject_metadata( - self.in_file, self.save_file, xml, console.append) + self.in_file, self.save_file, metadata, console.append) self.set_message("Successfully saved file to %s\n" % ntpath.basename(self.save_file)) self.button_open.configure(state="normal") @@ -131,6 +163,9 @@ def action_inject(self): def action_set_spherical(self): self.update_state() + def action_set_spatial_audio(self): + self.update_state() + def action_set_3d(self): self.update_state() @@ -140,8 +175,10 @@ def enable_state(self): def disable_state(self): self.checkbox_spherical.configure(state="disabled") + self.checkbox_spatial_audio.configure(state="disabled") self.checkbox_3D.configure(state="disabled") self.options_projection.configure(state="disabled") + self.options_ambisonics.configure(state="disabled") self.button_inject.configure(state="disabled") self.button_open.configure(state="disabled") self.button_quit.configure(state="disabled") @@ -152,10 +189,22 @@ def update_state(self): self.checkbox_3D.configure(state="normal") self.options_projection.configure(state="normal") self.button_inject.configure(state="normal") + if self.enable_spatial_audio: + self.checkbox_spatial_audio.configure(state="normal") + if self.var_spatial_audio.get(): + self.options_ambisonics.configure(state="normal") + else: + self.options_ambisonics.configure(state="disable") else: self.checkbox_3D.configure(state="disabled") self.options_projection.configure(state="disabled") self.button_inject.configure(state="disabled") + self.checkbox_spatial_audio.configure(state="disabled") + if self.var_spatial_audio.get(): + self.options_ambisonics.configure(state="normal") + else: + self.options_ambisonics.configure(state="disable") + self.options_ambisonics.configure(state="disabled") def set_error(self, text): self.label_message["text"] = text @@ -185,6 +234,20 @@ def create_widgets(self): self.checkbox_spherical["command"] = self.action_set_spherical self.checkbox_spherical.grid(row=row, column=column, padx=14, pady=2) + # Spatial Audio Checkbox + row += 1 + column = 0 + self.label_spatial_audio = Label(self) + self.label_spatial_audio["text"] = "Spatial Audio" + self.label_spatial_audio.grid(row=row, column=column) + + column += 1 + self.var_spatial_audio = IntVar() + self.checkbox_spatial_audio = \ + Checkbutton(self, variable=self.var_spatial_audio) + self.checkbox_spatial_audio["command"] = self.action_set_spatial_audio + self.checkbox_spatial_audio.grid(row=row, column=column, padx=0, pady=0) + # 3D column = 0 row = row + 1 @@ -210,6 +273,18 @@ def create_widgets(self): self.options_projection["text"] = "Equirectangular" self.options_projection.grid(row=row, column=column, padx=14, pady=2) + # Ambisonics Type + column = 0 + row = row + 1 + self.label_ambisonics = Label(self) + self.label_ambisonics["text"] = "Ambisonics Type" + self.label_ambisonics.grid(row=row, column=column, padx=14, pady=2) + column += 1 + + self.options_ambisonics = Label(self) + self.options_ambisonics["text"] = "1st Order, ACN, SN3D, Periphonic" + self.options_ambisonics.grid(row=row, column=column, padx=14, pady=2) + # Message Box row = row + 1 column = 0 @@ -261,7 +336,7 @@ def __init__(self, master=None): master.attributes("-topmost", True) master.focus_force() self.after(50, lambda: master.attributes("-topmost", False)) - + self.enable_spatial_audio = False def main(): root = Tk() diff --git a/spatialmedia/mpeg/__init__.py b/spatialmedia/mpeg/__init__.py index 6442984..d5475cb 100644 --- a/spatialmedia/mpeg/__init__.py +++ b/spatialmedia/mpeg/__init__.py @@ -15,6 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import spatialmedia.mpeg.sa3d import spatialmedia.mpeg.box import spatialmedia.mpeg.constants import spatialmedia.mpeg.container @@ -23,7 +24,8 @@ load = mpeg4_container.load Box = box.Box +SA3DBox = sa3d.SA3DBox Container = container.Container Mpeg4Container = mpeg4_container.Mpeg4Container -__all__ = ["box", "mpeg4", "container", "constants"] +__all__ = ["box", "mpeg4", "container", "constants", "sa3d"] diff --git a/spatialmedia/mpeg/constants.py b/spatialmedia/mpeg/constants.py index c36149e..78bc59e 100755 --- a/spatialmedia/mpeg/constants.py +++ b/spatialmedia/mpeg/constants.py @@ -30,6 +30,8 @@ TAG_XML = "xml " TAG_HDLR = "hdlr" TAG_FTYP = "ftyp" +TAG_ESDS = "esds" +TAG_SOUN = "soun" # Container types. TAG_MOOV = "moov" @@ -37,15 +39,20 @@ TAG_META = "meta" TAG_TRAK = "trak" TAG_MDIA = "mdia" +TAG_MP4A = "mp4a" TAG_MINF = "minf" TAG_STBL = "stbl" +TAG_STSD = "stsd" TAG_UUID = "uuid" +TAG_SA3D = "SA3D" CONTAINERS_LIST = [ TAG_MDIA, TAG_MINF, + TAG_MP4A, TAG_MOOV, TAG_STBL, + TAG_STSD, TAG_TRAK, TAG_UDTA, ] diff --git a/spatialmedia/mpeg/container.py b/spatialmedia/mpeg/container.py index 77915fc..0e9bf37 100755 --- a/spatialmedia/mpeg/container.py +++ b/spatialmedia/mpeg/container.py @@ -25,7 +25,7 @@ from spatialmedia.mpeg import box from spatialmedia.mpeg import constants - +from spatialmedia.mpeg import sa3d def load(fh, position, end): if position is None: @@ -37,7 +37,10 @@ def load(fh, position, end): name = fh.read(4) if name not in constants.CONTAINERS_LIST: - return box.load(fh, position, end) + if name == constants.TAG_SA3D: + return sa3d.load(fh, position, end) + else: + return box.load(fh, position, end) if size == 1: size = struct.unpack(">Q", fh.read(8))[0] @@ -51,13 +54,30 @@ def load(fh, position, end): print "Error: Container box size exceeds bounds." return None + padding = 0 + stsd_version = 0 + if (name == constants.TAG_STSD): + padding = 8 + + if (name == constants.TAG_MP4A): + current_pos = fh.tell() + fh.seek(current_pos + 8) + sample_description_version = struct.unpack(">h", fh.read(2))[0] + fh.seek(current_pos) + + if sample_description_version == 1: + padding = 28+16 # Mov + else: + padding = 28 # Mp4 + new_box = Container() new_box.name = name new_box.position = position new_box.header_size = header_size new_box.content_size = size - header_size + new_box.padding = padding new_box.contents = load_multiple( - fh, position + header_size, position + size) + fh, position + header_size + padding, position + size) if new_box.contents is None: return None @@ -81,16 +101,17 @@ def load_multiple(fh, position=None, end=None): class Container(box.Box): """MPEG4 container box contents / behaviour.""" - def __init__(self): + def __init__(self, padding=0): self.name = "" self.position = 0 self.header_size = 0 self.content_size = 0 self.contents = list() + self.padding = padding def resize(self): """Recomputes the box size and recurses on contents.""" - self.content_size = 0 + self.content_size = self.padding for element in self.contents: if isinstance(element, Container): element.resize() @@ -176,5 +197,9 @@ def save(self, in_fh, out_fh, delta): out_fh.write(struct.pack(">I", self.size())) out_fh.write(self.name) + if self.padding > 0: + in_fh.seek(self.content_start()) + box.tag_copy(in_fh, out_fh, self.padding) + for element in self.contents: element.save(in_fh, out_fh, delta) diff --git a/spatialmedia/mpeg/mpeg4_container.py b/spatialmedia/mpeg/mpeg4_container.py index 3201ad6..464c031 100755 --- a/spatialmedia/mpeg/mpeg4_container.py +++ b/spatialmedia/mpeg/mpeg4_container.py @@ -94,6 +94,7 @@ def __init__(self): self.first_mdat_box = None self.ftyp_box = None self.first_mdat_position = None + self.padding = 0 def merge(self, element): """Mpeg4 containers do not support merging.""" diff --git a/spatialmedia/mpeg/sa3d.py b/spatialmedia/mpeg/sa3d.py new file mode 100644 index 0000000..ad3ef23 --- /dev/null +++ b/spatialmedia/mpeg/sa3d.py @@ -0,0 +1,173 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2015 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""MPEG SA3D box processing classes. + + Enables the injection of an SA3D MPEG-4. The SA3D box specification + comforms to that outlined in docs/spatial-audio-rfc.md +""" + +from spatialmedia.mpeg import box +from spatialmedia.mpeg import constants + +import struct + + +def load(fh, position=None, end=None): + """ Loads the SA3D box located at position in an mp4 file. + + Args: + fh: file handle, input file handle. + position: int or None, current file position. + + Returns: + new_box: box, SA3D box loaded from the file location or None. + """ + if position is None: + position = fh.tell() + + fh.seek(position) + new_box = SA3DBox() + new_box.position = position + size = struct.unpack(">I", fh.read(4))[0] + name = fh.read(4) + + if (name != constants.TAG_SA3D): + print "Error: atom is not an SA3D atom." + return None + + if (position + size > end): + print "Error: SA3D atom size exceeds bounds." + return None + + new_box.content_size = size - new_box.header_size + new_box.version = struct.unpack(">B", fh.read(1))[0] + new_box.ambisonic_type = struct.unpack(">B", fh.read(1))[0] + new_box.ambisonic_order = struct.unpack(">I", fh.read(4))[0] + new_box.ambisonic_channel_ordering = struct.unpack(">B", fh.read(1))[0] + new_box.normalization_type = struct.unpack(">B", fh.read(1))[0] + new_box.num_channels = struct.unpack(">I", fh.read(4))[0] + for i in range(0, new_box.num_channels): + new_box.channel_map.append( + struct.unpack(">I", fh.read(4))[0]) + return new_box + + +class SA3DBox(box.Box): + ambisonic_types = {'periphonic': 0, 'horizontal': 1} + ambisonic_ordering = {'ACN': 0} + ambisonic_normalization = {'SN3D': 0} + + def __init__(self): + box.Box.__init__(self) + self.name = constants.TAG_SA3D + self.header_size = 8 + self.version = 0 + self.ambisonic_type = 0 + self.ambisonic_order = 0 + self.ambisonic_channel_ordering = 0 + self.normalization_type = 0 + self.num_channels = 0 + self.channel_map = list() + + @staticmethod + def create(num_channels, audio_metadata): + new_box = SA3DBox() + new_box.header_size = 8 + new_box.name = constants.TAG_SA3D + new_box.version = 0 # uint8 + new_box.content_size += 1 # uint8 + new_box.ambisonic_type = SA3DBox.ambisonic_types[ + audio_metadata["ambisonic_type"]] + new_box.content_size += 1 # uint8 + new_box.ambisonic_order = audio_metadata["ambisonic_order"] + new_box.content_size += 4 # uint32 + new_box.ambisonic_channel_ordering = 0 + new_box.content_size += 1 # uint8 + new_box.normalization_type = 0 + new_box.content_size += 1 # uint8 + new_box.num_channels = num_channels + new_box.content_size += 4 # uint32 + + # Assumes that the channel sequence is [0,1,2, ... num_channels]. + for channel_element in range(0, num_channels): + new_box.channel_map.append(channel_element) + new_box.content_size += 4 # uint32 + return new_box + + + def ambisonic_type_name(self): + return (key for key,value in SA3DBox.ambisonic_types.items() + if value==self.ambisonic_type).next() + + + def ambisonic_channel_ordering_name(self): + return (key for key,value in SA3DBox.ambisonic_ordering.items() + if value==self.ambisonic_channel_ordering).next() + + + def normalization_type_name(self): + return (key for key,value in SA3DBox.ambisonic_normalization.items() + if value==self.normalization_type).next() + + + def print_atom(self, console): + """ Prints the contents of this spatial audio (Sa3D) atom to the + console. + """ + ambisonic_type = self.ambisonic_type_name() + channel_ordering = self.ambisonic_channel_ordering_name() + normalization_type = self.normalization_type_name() + console("\t\tAmbisonic Type: %s" % ambisonic_type) + console("\t\tAmbisonic Order: %d" % self.ambisonic_order) + console("\t\tChannel Ordering: %s" % channel_ordering) + console("\t\tNormalization Type: %s" % normalization_type) + console("\t\tNumber of Channel: %d" % self.num_channels) + console("\t\tChannel Mapping: %s" % + ('[' + ', '.join('%d' % v for v in self.channel_map) + ']')) + + + def get_metadata_string(self): + """ Outputs a concise single line audio metadata string. """ + metadata = "%s, %s, %s, Order %d, %d Channels, Channel Map: %s" \ + % (self.normalization_type_name(),\ + self.ambisonic_channel_ordering_name(),\ + self.ambisonic_type_name(),\ + self.ambisonic_order,\ + self.num_channels,\ + ('[' + ', '.join('%d' % v for v in self.channel_map) + ']')) + return metadata + + + def save(self, in_fh, out_fh, delta): + if (self.header_size == 16): + out_fh.write(struct.pack(">I", 1)) + out_fh.write(struct.pack(">Q", self.size())) + out_fh.write(self.name) + elif(self.header_size == 8): + out_fh.write(struct.pack(">I", self.size())) + out_fh.write(self.name) + + out_fh.write(struct.pack(">B", self.version)) + out_fh.write(struct.pack(">B", self.ambisonic_type)) + out_fh.write(struct.pack(">I", self.ambisonic_order)) + out_fh.write(struct.pack(">B", self.ambisonic_channel_ordering)) + out_fh.write(struct.pack(">B", self.normalization_type)) + out_fh.write(struct.pack(">I", self.num_channels)) + for i in self.channel_map: + if (i != None): + out_fh.write(struct.pack(">I", int(i))) diff --git a/spatialmedia/spherical.py b/spatialmedia/spherical.py index 21125be..87e4c6c 100755 --- a/spatialmedia/spherical.py +++ b/spatialmedia/spherical.py @@ -89,6 +89,16 @@ "CroppedAreaTopPixels", ] +class Metadata(object): + def __init__(self): + self.video = None + self.audio = None + +class ParsedMetadata(object): + def __init__(self): + self.video = dict() + self.audio = None + SPHERICAL_PREFIX = "{http://ns.google.com/videos/1.0/spherical/}" SPHERICAL_TAGS = dict() for tag in SPHERICAL_TAGS_LIST: @@ -151,6 +161,73 @@ def mpeg4_add_spherical(mpeg4_file, in_fh, metadata): mpeg4_file.resize() return True +def mpeg4_add_spatial_audio(mpeg4_file, in_fh, audio_metadata, console): + """Adds spatial audio metadata to the first audio track of the input + mpeg4_file. Returns False on failure. + + Args: + mpeg4_file: mpeg4, Mpeg4 file structure to add metadata. + in_fh: file handle, Source for uncached file contents. + audio_metadata: dictionary ('ambisonic_type': string, 'ambisonic_order': int), + Supported ambisonic types are 'periphonic' and 'horizontal'. + """ + for element in mpeg4_file.moov_box.contents: + if element.name == mpeg.constants.TAG_TRAK: + for sub_element in element.contents: + if sub_element.name != mpeg.constants.TAG_MDIA: + continue + for mdia_sub_element in sub_element.contents: + if mdia_sub_element.name != mpeg.constants.TAG_HDLR: + continue + position = mdia_sub_element.content_start() + 8 + in_fh.seek(position) + if in_fh.read(4) == mpeg.constants.TAG_SOUN: + return inject_spatial_audio_atom( + in_fh, sub_element, audio_metadata, console) + return True + +def mpeg4_add_audio_metadata(mpeg4_file, in_fh, audio_metadata, console): + num_audio_tracks = get_num_audio_tracks(mpeg4_file, in_fh) + if (num_audio_tracks > 1): + console("Error: Expected 1 audio track. Found %d" % num_audio_tracks) + return False + + return mpeg4_add_spatial_audio(mpeg4_file, in_fh, audio_metadata, console) + +def inject_spatial_audio_atom(in_fh, audio_media_atom, audio_metadata, console): + for atom in audio_media_atom.contents: + if atom.name != mpeg.constants.TAG_MINF: + continue + for element in atom.contents: + if element.name != mpeg.constants.TAG_STBL: + continue + for sub_element in element.contents: + if sub_element.name != mpeg.constants.TAG_STSD: + continue + for sample_description in sub_element.contents: + if sample_description.name == mpeg.constants.TAG_MP4A: + in_fh.seek(sample_description.position + + sample_description.header_size + 16) + num_channels = get_num_audio_channels( + sample_description, in_fh) + num_ambisonic_components = \ + get_expected_num_audio_components( + audio_metadata["ambisonic_type"], + audio_metadata["ambisonic_order"]) + if num_channels != num_ambisonic_components: + err_msg = "Error: Found %d audio channel(s). "\ + "Expected %d channel(s) for %s ambisonics "\ + "of orded %d."\ + % (num_channels, + num_ambisonic_components, + audio_metadata["ambisonic_type"], + audio_metadata["ambisonic_order"]) + console(err_msg) + return False + sa3d_atom = mpeg.SA3DBox.create( + num_channels, audio_metadata) + sample_description.contents.append(sa3d_atom) + return True def parse_spherical_xml(contents, console): """Returns spherical metadata for a set of xml data. @@ -202,7 +279,7 @@ def parse_spherical_mpeg4(mpeg4_file, fh, console): Returns: Dictionary stored as (trackName, metadataDictionary) """ - metadataSets = dict() + metadata = ParsedMetadata() track_num = 0 for element in mpeg4_file.moov_box.contents: if element.name == mpeg.constants.TAG_TRAK: @@ -222,10 +299,27 @@ def parse_spherical_mpeg4(mpeg4_file, fh, console): contents = sub_element.contents[16:] else: contents = fh.read(sub_element.content_size - 16) - metadataSets[trackName] = \ + metadata.video[trackName] = \ parse_spherical_xml(contents, console) - return metadataSets + if sub_element.name == mpeg.constants.TAG_MDIA: + for mdia_sub_element in sub_element.contents: + if mdia_sub_element.name != mpeg.constants.TAG_MINF: + continue + for stbl_elem in mdia_sub_element.contents: + if stbl_elem.name != mpeg.constants.TAG_STBL: + continue + for stsd_elem in stbl_elem.contents: + if stsd_elem.name != mpeg.constants.TAG_STSD: + continue + for mp4a_elem in stsd_elem.contents: + if mp4a_elem.name != mpeg.constants.TAG_MP4A: + continue + for sa3d_elem in mp4a_elem.contents: + if sa3d_elem.name == mpeg.constants.TAG_SA3D: + sa3d_elem.print_atom(console) + metadata.audio = sa3d_elem + return metadata def parse_mpeg4(input_file, console): with open(input_file, "rb") as in_fh: @@ -248,9 +342,13 @@ def inject_mpeg4(input_file, output_file, metadata, console): if mpeg4_file is None: console("Error file could not be opened.") - if not mpeg4_add_spherical(mpeg4_file, in_fh, metadata): + if not mpeg4_add_spherical(mpeg4_file, in_fh, metadata.video): console("Error failed to insert spherical data") + if metadata.audio: + if not mpeg4_add_audio_metadata(mpeg4_file, in_fh, metadata.audio, console): + console("Error failed to insert spatial audio data") + console("Saved file settings") parse_spherical_mpeg4(mpeg4_file, in_fh, console) @@ -278,7 +376,7 @@ def parse_metadata(src, console): return parse_mpeg4(infile, console) console("Unknown file type") - return dict() + return None def inject_metadata(src, dest, metadata, console): @@ -300,7 +398,10 @@ def inject_metadata(src, dest, metadata, console): extension = os.path.splitext(infile)[1].lower() - if extension in MPEG_FILE_EXTENSIONS: + if (extension in MPEG_FILE_EXTENSIONS): + if (metadata.audio and extension != ".mp4"): + error("Error: Spatial audio current not supported for %s ." % + extension) inject_mpeg4(infile, outfile, metadata, console) return @@ -377,3 +478,94 @@ def generate_spherical_xml(stereo=None, crop=None): additional_xml + SPHERICAL_XML_FOOTER) return spherical_xml + + +def get_descriptor_length(in_fh): + """Derives the length of the MP4 elementary stream descriptor at the current + position in the input file. + """ + descriptor_length = 0 + for i in range(4): + size_byte = struct.unpack(">c", in_fh.read(1))[0] + descriptor_length = (descriptor_length << 7 | + ord(size_byte) & int("0x7f", 0)) + if (ord(size_byte) != int("0x80", 0)): + break + return descriptor_length + + +def get_expected_num_audio_components(ambisonics_type, ambisonics_order): + """ Returns the expected number of ambisonic components for a given ambisonic + type ['periphonic', 'horizontal'] and ambisonic order. + """ + return { + 'periphonic': (ambisonics_order + 1) * (ambisonics_order + 1), + 'horizontal': (2 * ambisonics_order) + 1, + }.get(ambisonics_type, 0) + + +def get_num_audio_channels(mp4a_atom, in_fh): + """Reads the number of audio channels from AAC's AudioSpecificConfig descriptor + within the esds child atom of the input mp4a atom. + """ + p = in_fh.tell() + if mp4a_atom.name != mpeg.constants.TAG_MP4A: + return -1 + + for element in mp4a_atom.contents: + if (element.name != mpeg.constants.TAG_ESDS): + continue + in_fh.seek(element.content_start() + 4) + descriptor_tag = struct.unpack(">c", in_fh.read(1))[0] + + # Verify the read descriptor is an elementary stream descriptor + if (ord(descriptor_tag) != 3): # Not an MP4 elementary stream. + print "Error: failed to read elementary stream descriptor." + return -1 + get_descriptor_length(in_fh) + in_fh.seek(3, 1) # Seek to the decoder configuration descriptor + config_descriptor_tag = struct.unpack(">c", in_fh.read(1))[0] + + # Verify the read descriptor is a decoder config. descriptor. + if (ord(config_descriptor_tag) != 4): + print "Error: failed to read decoder config. descriptor." + return -1 + get_descriptor_length(in_fh) + in_fh.seek(13, 1) # offset to the decoder specific config descriptor. + decoder_specific_descriptor_tag = struct.unpack(">c", in_fh.read(1))[0] + + # Verify the read descriptor is a decoder specific info descriptor + if (ord(decoder_specific_descriptor_tag) != 5): + print "Error: failed to read MP4 audio decoder specific config." + return -1 + audio_specific_descriptor_size = get_descriptor_length(in_fh) + assert(audio_specific_descriptor_size >= 2) + decoder_descriptor = struct.unpack(">h", in_fh.read(2))[0] + object_type = (int("F800", 16) & decoder_descriptor) >> 11 + sampling_frequency_index = (int("0780", 16) & decoder_descriptor) >> 7 + if (sampling_frequency_index == 0): + # TODO: If the sample rate is 96kHz an additional 24 bit offset + # value here specifies the actual sample rate. + print "Error: Graeter than 48khz audio is currently not supported." + return -1 + channel_configuration = (int("0078", 16) & decoder_descriptor) >> 3 + in_fh.seek(p) + return channel_configuration + + +def get_num_audio_tracks(mpeg4_file, in_fh): + """ Returns the number of audio track in the input mpeg4 file. """ + num_audio_tracks = 0 + for element in mpeg4_file.moov_box.contents: + if (element.name == mpeg.constants.TAG_TRAK): + for sub_element in element.contents: + if (sub_element.name != mpeg.constants.TAG_MDIA): + continue + for mdia_sub_element in sub_element.contents: + if (mdia_sub_element.name != mpeg.constants.TAG_HDLR): + continue + position = mdia_sub_element.content_start() + 8 + in_fh.seek(position) + if (in_fh.read(4) == mpeg.constants.TAG_SOUN): + num_audio_tracks += 1 + return num_audio_tracks