diff --git a/spatialmedia/gui.py b/spatialmedia/gui.py
index ba6b3bd..d36653b 100755
--- a/spatialmedia/gui.py
+++ b/spatialmedia/gui.py
@@ -60,13 +60,20 @@ def action_open(self):
         self.set_message("Opened file: %s\n" % ntpath.basename(self.in_file))
 
         console = Console()
-        metadata = spherical.parse_metadata(self.in_file, console.append)
+        parsedMetadata = spherical.parse_metadata(self.in_file, console.append)
+
+        metadata = None
+        audio_metadata = None
+        if parsedMetadata:
+            metadata = parsedMetadata.video
+            audio_metadata = parsedMetadata.audio
 
         for line in console.log:
             if "Error" in line:
                 self.set_error("Failed to load file %s"
                                % ntpath.basename(self.in_file))
                 self.var_spherical.set(0)
+                self.var_spatial_audio.set(0)
                 self.disable_state()
                 self.button_open.configure(state="normal")
                 self.button_quit.configure(state="normal")
@@ -75,10 +82,18 @@ def action_open(self):
         self.enable_state()
         self.checkbox_spherical.configure(state="normal")
 
+        infile = os.path.abspath(self.in_file)
+        file_extension = os.path.splitext(infile)[1].lower()
+        self.enable_spatial_audio =\
+            True if (file_extension == ".mp4") else False
+
         if not metadata:
             self.var_spherical.set(0)
             self.var_3d.set(0)
 
+        if not audio_metadata:
+            self.var_spatial_audio.set(0)
+
         if metadata:
             metadata = metadata.itervalues().next()
             self.var_spherical.set(1)
@@ -93,6 +108,12 @@ def action_open(self):
             else:
                 self.var_3d.set(0)
 
+        if audio_metadata:
+            self.var_spatial_audio.set(1)
+            self.options_ambisonics["text"] =\
+                audio_metadata.get_metadata_string()
+
+
         self.update_state()
 
     def action_inject_delay(self):
@@ -103,8 +124,19 @@ def action_inject_delay(self):
         xml = spherical.generate_spherical_xml(stereo=stereo)
 
         console = Console()
+        c = console.append
+
+        metadata = spherical.Metadata()
+        metadata.video = xml
+
+        if self.var_spatial_audio.get():
+            # Default ambisonics audio metadata
+            audio_metadata = {'ambisonic_order': 1,
+                              'ambisonic_type': 'periphonic'}
+            metadata.audio = audio_metadata
+
         spherical.inject_metadata(
-            self.in_file, self.save_file, xml, console.append)
+            self.in_file, self.save_file, metadata, console.append)
         self.set_message("Successfully saved file to %s\n"
                          % ntpath.basename(self.save_file))
         self.button_open.configure(state="normal")
@@ -131,6 +163,9 @@ def action_inject(self):
     def action_set_spherical(self):
         self.update_state()
 
+    def action_set_spatial_audio(self):
+        self.update_state()
+
     def action_set_3d(self):
         self.update_state()
 
@@ -140,8 +175,10 @@ def enable_state(self):
 
     def disable_state(self):
         self.checkbox_spherical.configure(state="disabled")
+        self.checkbox_spatial_audio.configure(state="disabled")
         self.checkbox_3D.configure(state="disabled")
         self.options_projection.configure(state="disabled")
+        self.options_ambisonics.configure(state="disabled")
         self.button_inject.configure(state="disabled")
         self.button_open.configure(state="disabled")
         self.button_quit.configure(state="disabled")
@@ -152,10 +189,22 @@ def update_state(self):
             self.checkbox_3D.configure(state="normal")
             self.options_projection.configure(state="normal")
             self.button_inject.configure(state="normal")
+            if self.enable_spatial_audio:
+                self.checkbox_spatial_audio.configure(state="normal")
+                if self.var_spatial_audio.get():
+                    self.options_ambisonics.configure(state="normal")
+                else:
+                    self.options_ambisonics.configure(state="disable")
         else:
             self.checkbox_3D.configure(state="disabled")
             self.options_projection.configure(state="disabled")
             self.button_inject.configure(state="disabled")
+            self.checkbox_spatial_audio.configure(state="disabled")
+            if self.var_spatial_audio.get():
+                self.options_ambisonics.configure(state="normal")
+            else:
+                self.options_ambisonics.configure(state="disable")
+            self.options_ambisonics.configure(state="disabled")
 
     def set_error(self, text):
         self.label_message["text"] = text
@@ -185,6 +234,20 @@ def create_widgets(self):
         self.checkbox_spherical["command"] = self.action_set_spherical
         self.checkbox_spherical.grid(row=row, column=column, padx=14, pady=2)
 
+        # Spatial Audio Checkbox
+        row += 1
+        column = 0
+        self.label_spatial_audio = Label(self)
+        self.label_spatial_audio["text"] = "Spatial Audio"
+        self.label_spatial_audio.grid(row=row, column=column)
+
+        column += 1
+        self.var_spatial_audio = IntVar()
+        self.checkbox_spatial_audio = \
+           Checkbutton(self, variable=self.var_spatial_audio)
+        self.checkbox_spatial_audio["command"] = self.action_set_spatial_audio
+        self.checkbox_spatial_audio.grid(row=row, column=column, padx=0, pady=0)
+
         # 3D
         column = 0
         row = row + 1
@@ -210,6 +273,18 @@ def create_widgets(self):
         self.options_projection["text"] = "Equirectangular"
         self.options_projection.grid(row=row, column=column, padx=14, pady=2)
 
+        # Ambisonics Type
+        column = 0
+        row = row + 1
+        self.label_ambisonics = Label(self)
+        self.label_ambisonics["text"] = "Ambisonics Type"
+        self.label_ambisonics.grid(row=row, column=column, padx=14, pady=2)
+        column += 1
+
+        self.options_ambisonics = Label(self)
+        self.options_ambisonics["text"] = "1st Order, ACN, SN3D, Periphonic"
+        self.options_ambisonics.grid(row=row, column=column, padx=14, pady=2)
+
         # Message Box
         row = row + 1
         column = 0
@@ -261,7 +336,7 @@ def __init__(self, master=None):
         master.attributes("-topmost", True)
         master.focus_force()
         self.after(50, lambda: master.attributes("-topmost", False))
-
+        self.enable_spatial_audio = False
 
 def main():
     root = Tk()
diff --git a/spatialmedia/mpeg/__init__.py b/spatialmedia/mpeg/__init__.py
index 6442984..d5475cb 100644
--- a/spatialmedia/mpeg/__init__.py
+++ b/spatialmedia/mpeg/__init__.py
@@ -15,6 +15,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import spatialmedia.mpeg.sa3d
 import spatialmedia.mpeg.box
 import spatialmedia.mpeg.constants
 import spatialmedia.mpeg.container
@@ -23,7 +24,8 @@
 load = mpeg4_container.load
 
 Box = box.Box
+SA3DBox = sa3d.SA3DBox
 Container = container.Container
 Mpeg4Container = mpeg4_container.Mpeg4Container
 
-__all__ = ["box", "mpeg4", "container", "constants"]
+__all__ = ["box", "mpeg4", "container", "constants", "sa3d"]
diff --git a/spatialmedia/mpeg/constants.py b/spatialmedia/mpeg/constants.py
index c36149e..78bc59e 100755
--- a/spatialmedia/mpeg/constants.py
+++ b/spatialmedia/mpeg/constants.py
@@ -30,6 +30,8 @@
 TAG_XML = "xml "
 TAG_HDLR = "hdlr"
 TAG_FTYP = "ftyp"
+TAG_ESDS = "esds"
+TAG_SOUN = "soun"
 
 # Container types.
 TAG_MOOV = "moov"
@@ -37,15 +39,20 @@
 TAG_META = "meta"
 TAG_TRAK = "trak"
 TAG_MDIA = "mdia"
+TAG_MP4A = "mp4a"
 TAG_MINF = "minf"
 TAG_STBL = "stbl"
+TAG_STSD = "stsd"
 TAG_UUID = "uuid"
+TAG_SA3D = "SA3D"
 
 CONTAINERS_LIST = [
     TAG_MDIA,
     TAG_MINF,
+    TAG_MP4A,
     TAG_MOOV,
     TAG_STBL,
+    TAG_STSD,
     TAG_TRAK,
     TAG_UDTA,
     ]
diff --git a/spatialmedia/mpeg/container.py b/spatialmedia/mpeg/container.py
index 77915fc..0e9bf37 100755
--- a/spatialmedia/mpeg/container.py
+++ b/spatialmedia/mpeg/container.py
@@ -25,7 +25,7 @@
 
 from spatialmedia.mpeg import box
 from spatialmedia.mpeg import constants
-
+from spatialmedia.mpeg import sa3d
 
 def load(fh, position, end):
     if position is None:
@@ -37,7 +37,10 @@ def load(fh, position, end):
     name = fh.read(4)
 
     if name not in constants.CONTAINERS_LIST:
-        return box.load(fh, position, end)
+        if name == constants.TAG_SA3D:
+            return sa3d.load(fh, position, end)
+        else:
+            return box.load(fh, position, end)
 
     if size == 1:
         size = struct.unpack(">Q", fh.read(8))[0]
@@ -51,13 +54,30 @@ def load(fh, position, end):
         print "Error: Container box size exceeds bounds."
         return None
 
+    padding = 0
+    stsd_version = 0
+    if (name == constants.TAG_STSD):
+        padding = 8
+
+    if (name == constants.TAG_MP4A):
+        current_pos = fh.tell()
+        fh.seek(current_pos + 8)
+        sample_description_version = struct.unpack(">h", fh.read(2))[0]
+        fh.seek(current_pos)
+
+        if sample_description_version == 1:
+          padding = 28+16 # Mov
+        else:
+          padding = 28 # Mp4
+
     new_box = Container()
     new_box.name = name
     new_box.position = position
     new_box.header_size = header_size
     new_box.content_size = size - header_size
+    new_box.padding = padding
     new_box.contents = load_multiple(
-        fh, position + header_size, position + size)
+        fh, position + header_size + padding, position + size)
 
     if new_box.contents is None:
         return None
@@ -81,16 +101,17 @@ def load_multiple(fh, position=None, end=None):
 class Container(box.Box):
     """MPEG4 container box contents / behaviour."""
 
-    def __init__(self):
+    def __init__(self, padding=0):
         self.name = ""
         self.position = 0
         self.header_size = 0
         self.content_size = 0
         self.contents = list()
+        self.padding = padding
 
     def resize(self):
         """Recomputes the box size and recurses on contents."""
-        self.content_size = 0
+        self.content_size = self.padding
         for element in self.contents:
             if isinstance(element, Container):
                 element.resize()
@@ -176,5 +197,9 @@ def save(self, in_fh, out_fh, delta):
             out_fh.write(struct.pack(">I", self.size()))
             out_fh.write(self.name)
 
+        if self.padding > 0:
+            in_fh.seek(self.content_start())
+            box.tag_copy(in_fh, out_fh, self.padding)
+
         for element in self.contents:
             element.save(in_fh, out_fh, delta)
diff --git a/spatialmedia/mpeg/mpeg4_container.py b/spatialmedia/mpeg/mpeg4_container.py
index 3201ad6..464c031 100755
--- a/spatialmedia/mpeg/mpeg4_container.py
+++ b/spatialmedia/mpeg/mpeg4_container.py
@@ -94,6 +94,7 @@ def __init__(self):
         self.first_mdat_box = None
         self.ftyp_box = None
         self.first_mdat_position = None
+        self.padding = 0
 
     def merge(self, element):
         """Mpeg4 containers do not support merging."""
diff --git a/spatialmedia/mpeg/sa3d.py b/spatialmedia/mpeg/sa3d.py
new file mode 100644
index 0000000..ad3ef23
--- /dev/null
+++ b/spatialmedia/mpeg/sa3d.py
@@ -0,0 +1,173 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2015 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""MPEG SA3D box processing classes.
+
+   Enables the injection of an SA3D MPEG-4. The SA3D box specification
+   comforms to that outlined in docs/spatial-audio-rfc.md
+"""
+
+from spatialmedia.mpeg import box
+from spatialmedia.mpeg import constants
+
+import struct
+
+
+def load(fh, position=None, end=None):
+    """ Loads the SA3D box located at position in an mp4 file.
+
+    Args:
+      fh: file handle, input file handle.
+      position: int or None, current file position.
+
+    Returns:
+      new_box: box, SA3D box loaded from the file location or None.
+    """
+    if position is None:
+        position = fh.tell()
+
+    fh.seek(position)
+    new_box = SA3DBox()
+    new_box.position = position
+    size = struct.unpack(">I", fh.read(4))[0]
+    name = fh.read(4)
+
+    if (name != constants.TAG_SA3D):
+        print "Error: atom is not an SA3D atom."
+        return None
+
+    if (position + size > end):
+        print "Error: SA3D atom size exceeds bounds."
+        return None
+
+    new_box.content_size = size - new_box.header_size
+    new_box.version = struct.unpack(">B", fh.read(1))[0]
+    new_box.ambisonic_type = struct.unpack(">B", fh.read(1))[0]
+    new_box.ambisonic_order = struct.unpack(">I", fh.read(4))[0]
+    new_box.ambisonic_channel_ordering = struct.unpack(">B", fh.read(1))[0]
+    new_box.normalization_type = struct.unpack(">B", fh.read(1))[0]
+    new_box.num_channels = struct.unpack(">I", fh.read(4))[0]
+    for i in range(0, new_box.num_channels):
+        new_box.channel_map.append(
+            struct.unpack(">I", fh.read(4))[0])
+    return new_box
+
+
+class SA3DBox(box.Box):
+    ambisonic_types = {'periphonic': 0, 'horizontal': 1}
+    ambisonic_ordering = {'ACN': 0}
+    ambisonic_normalization = {'SN3D': 0}
+
+    def __init__(self):
+        box.Box.__init__(self)
+        self.name = constants.TAG_SA3D
+        self.header_size = 8
+        self.version = 0
+        self.ambisonic_type = 0
+        self.ambisonic_order = 0
+        self.ambisonic_channel_ordering = 0
+        self.normalization_type = 0
+        self.num_channels = 0
+        self.channel_map = list()
+
+    @staticmethod
+    def create(num_channels, audio_metadata):
+        new_box = SA3DBox()
+        new_box.header_size = 8
+        new_box.name = constants.TAG_SA3D
+        new_box.version = 0                     # uint8
+        new_box.content_size += 1               # uint8
+        new_box.ambisonic_type = SA3DBox.ambisonic_types[
+            audio_metadata["ambisonic_type"]]
+        new_box.content_size += 1               # uint8
+        new_box.ambisonic_order = audio_metadata["ambisonic_order"]
+        new_box.content_size += 4               # uint32
+        new_box.ambisonic_channel_ordering = 0
+        new_box.content_size += 1               # uint8
+        new_box.normalization_type = 0
+        new_box.content_size += 1               # uint8
+        new_box.num_channels = num_channels
+        new_box.content_size += 4               # uint32
+
+        # Assumes that the channel sequence is [0,1,2, ... num_channels].
+        for channel_element in range(0, num_channels):
+            new_box.channel_map.append(channel_element)
+            new_box.content_size += 4  # uint32
+        return new_box
+
+
+    def ambisonic_type_name(self):
+        return  (key for key,value in SA3DBox.ambisonic_types.items()
+                 if value==self.ambisonic_type).next()
+
+
+    def ambisonic_channel_ordering_name(self):
+        return (key for key,value in SA3DBox.ambisonic_ordering.items()
+                if value==self.ambisonic_channel_ordering).next()
+
+
+    def normalization_type_name(self):
+        return (key for key,value in SA3DBox.ambisonic_normalization.items()
+                if value==self.normalization_type).next()
+
+
+    def print_atom(self, console):
+        """ Prints the contents of this spatial audio (Sa3D) atom to the
+            console.
+        """
+        ambisonic_type = self.ambisonic_type_name()
+        channel_ordering = self.ambisonic_channel_ordering_name()
+        normalization_type = self.normalization_type_name()
+        console("\t\tAmbisonic Type: %s" % ambisonic_type)
+        console("\t\tAmbisonic Order: %d" % self.ambisonic_order)
+        console("\t\tChannel Ordering: %s" % channel_ordering)
+        console("\t\tNormalization Type: %s" % normalization_type)
+        console("\t\tNumber of Channel: %d" % self.num_channels)
+        console("\t\tChannel Mapping: %s" %
+               ('[' + ', '.join('%d' % v for v in self.channel_map) + ']'))
+
+
+    def get_metadata_string(self):
+        """ Outputs a concise single line audio metadata string. """
+        metadata = "%s, %s, %s, Order %d, %d Channels, Channel Map: %s" \
+            % (self.normalization_type_name(),\
+               self.ambisonic_channel_ordering_name(),\
+               self.ambisonic_type_name(),\
+               self.ambisonic_order,\
+               self.num_channels,\
+               ('[' + ', '.join('%d' % v for v in self.channel_map) + ']'))
+        return metadata
+
+
+    def save(self, in_fh, out_fh, delta):
+        if (self.header_size == 16):
+            out_fh.write(struct.pack(">I", 1))
+            out_fh.write(struct.pack(">Q", self.size()))
+            out_fh.write(self.name)
+        elif(self.header_size == 8):
+            out_fh.write(struct.pack(">I", self.size()))
+            out_fh.write(self.name)
+
+        out_fh.write(struct.pack(">B", self.version))
+        out_fh.write(struct.pack(">B", self.ambisonic_type))
+        out_fh.write(struct.pack(">I", self.ambisonic_order))
+        out_fh.write(struct.pack(">B", self.ambisonic_channel_ordering))
+        out_fh.write(struct.pack(">B", self.normalization_type))
+        out_fh.write(struct.pack(">I", self.num_channels))
+        for i in self.channel_map:
+            if (i != None):
+                out_fh.write(struct.pack(">I", int(i)))
diff --git a/spatialmedia/spherical.py b/spatialmedia/spherical.py
index 21125be..87e4c6c 100755
--- a/spatialmedia/spherical.py
+++ b/spatialmedia/spherical.py
@@ -89,6 +89,16 @@
     "CroppedAreaTopPixels",
 ]
 
+class Metadata(object):
+    def __init__(self):
+        self.video = None
+        self.audio = None
+
+class ParsedMetadata(object):
+    def __init__(self):
+        self.video = dict()
+        self.audio = None
+
 SPHERICAL_PREFIX = "{http://ns.google.com/videos/1.0/spherical/}"
 SPHERICAL_TAGS = dict()
 for tag in SPHERICAL_TAGS_LIST:
@@ -151,6 +161,73 @@ def mpeg4_add_spherical(mpeg4_file, in_fh, metadata):
     mpeg4_file.resize()
     return True
 
+def mpeg4_add_spatial_audio(mpeg4_file, in_fh, audio_metadata, console):
+    """Adds spatial audio metadata to the first audio track of the input
+       mpeg4_file. Returns False on failure.
+
+    Args:
+      mpeg4_file: mpeg4, Mpeg4 file structure to add metadata.
+      in_fh: file handle, Source for uncached file contents.
+      audio_metadata: dictionary ('ambisonic_type': string, 'ambisonic_order': int),
+      Supported ambisonic types are 'periphonic' and 'horizontal'.
+    """
+    for element in mpeg4_file.moov_box.contents:
+        if element.name == mpeg.constants.TAG_TRAK:
+            for sub_element in element.contents:
+                if sub_element.name != mpeg.constants.TAG_MDIA:
+                    continue
+                for mdia_sub_element in sub_element.contents:
+                    if mdia_sub_element.name != mpeg.constants.TAG_HDLR:
+                        continue
+                    position = mdia_sub_element.content_start() + 8
+                    in_fh.seek(position)
+                    if in_fh.read(4) == mpeg.constants.TAG_SOUN:
+                        return inject_spatial_audio_atom(
+                            in_fh, sub_element, audio_metadata, console)
+    return True
+
+def mpeg4_add_audio_metadata(mpeg4_file, in_fh, audio_metadata, console):
+    num_audio_tracks = get_num_audio_tracks(mpeg4_file, in_fh)
+    if (num_audio_tracks > 1):
+        console("Error: Expected 1 audio track. Found %d" % num_audio_tracks)
+        return False
+
+    return mpeg4_add_spatial_audio(mpeg4_file, in_fh, audio_metadata, console)
+
+def inject_spatial_audio_atom(in_fh, audio_media_atom, audio_metadata, console):
+    for atom in audio_media_atom.contents:
+        if atom.name != mpeg.constants.TAG_MINF:
+            continue
+        for element in atom.contents:
+            if element.name != mpeg.constants.TAG_STBL:
+                continue
+            for sub_element in element.contents:
+                if sub_element.name != mpeg.constants.TAG_STSD:
+                    continue
+                for sample_description in sub_element.contents:
+                    if sample_description.name == mpeg.constants.TAG_MP4A:
+                        in_fh.seek(sample_description.position +
+                                   sample_description.header_size + 16)
+                        num_channels = get_num_audio_channels(
+                            sample_description, in_fh)
+                        num_ambisonic_components = \
+                            get_expected_num_audio_components(
+                                audio_metadata["ambisonic_type"],
+                                audio_metadata["ambisonic_order"])
+                        if num_channels != num_ambisonic_components:
+                            err_msg =  "Error: Found %d audio channel(s). "\
+                                  "Expected %d channel(s) for %s ambisonics "\
+                                  "of orded %d."\
+                                % (num_channels,
+                                   num_ambisonic_components,
+                                   audio_metadata["ambisonic_type"],
+                                   audio_metadata["ambisonic_order"])
+                            console(err_msg)
+                            return False
+                        sa3d_atom = mpeg.SA3DBox.create(
+                            num_channels, audio_metadata)
+                        sample_description.contents.append(sa3d_atom)
+    return True
 
 def parse_spherical_xml(contents, console):
     """Returns spherical metadata for a set of xml data.
@@ -202,7 +279,7 @@ def parse_spherical_mpeg4(mpeg4_file, fh, console):
     Returns:
       Dictionary stored as (trackName, metadataDictionary)
     """
-    metadataSets = dict()
+    metadata = ParsedMetadata()
     track_num = 0
     for element in mpeg4_file.moov_box.contents:
         if element.name == mpeg.constants.TAG_TRAK:
@@ -222,10 +299,27 @@ def parse_spherical_mpeg4(mpeg4_file, fh, console):
                             contents = sub_element.contents[16:]
                         else:
                             contents = fh.read(sub_element.content_size - 16)
-                        metadataSets[trackName] = \
+                        metadata.video[trackName] = \
                             parse_spherical_xml(contents, console)
-    return metadataSets
 
+            if sub_element.name == mpeg.constants.TAG_MDIA:
+                for mdia_sub_element in sub_element.contents:
+                    if mdia_sub_element.name != mpeg.constants.TAG_MINF:
+                        continue
+                    for stbl_elem in mdia_sub_element.contents:
+                        if stbl_elem.name != mpeg.constants.TAG_STBL:
+                            continue
+                        for stsd_elem in stbl_elem.contents:
+                            if stsd_elem.name != mpeg.constants.TAG_STSD:
+                                continue
+                            for mp4a_elem in stsd_elem.contents:
+                                if mp4a_elem.name != mpeg.constants.TAG_MP4A:
+                                    continue
+                                for sa3d_elem in mp4a_elem.contents:
+                                    if sa3d_elem.name == mpeg.constants.TAG_SA3D:
+                                        sa3d_elem.print_atom(console)
+                                        metadata.audio = sa3d_elem
+    return metadata
 
 def parse_mpeg4(input_file, console):
     with open(input_file, "rb") as in_fh:
@@ -248,9 +342,13 @@ def inject_mpeg4(input_file, output_file, metadata, console):
         if mpeg4_file is None:
             console("Error file could not be opened.")
 
-        if not mpeg4_add_spherical(mpeg4_file, in_fh, metadata):
+        if not mpeg4_add_spherical(mpeg4_file, in_fh, metadata.video):
             console("Error failed to insert spherical data")
 
+        if metadata.audio:
+            if not mpeg4_add_audio_metadata(mpeg4_file, in_fh, metadata.audio, console):
+                console("Error failed to insert spatial audio data")
+
         console("Saved file settings")
         parse_spherical_mpeg4(mpeg4_file, in_fh, console)
 
@@ -278,7 +376,7 @@ def parse_metadata(src, console):
         return parse_mpeg4(infile, console)
 
     console("Unknown file type")
-    return dict()
+    return None
 
 
 def inject_metadata(src, dest, metadata, console):
@@ -300,7 +398,10 @@ def inject_metadata(src, dest, metadata, console):
 
     extension = os.path.splitext(infile)[1].lower()
 
-    if extension in MPEG_FILE_EXTENSIONS:
+    if (extension in MPEG_FILE_EXTENSIONS):
+        if (metadata.audio and extension != ".mp4"):
+            error("Error: Spatial audio current not supported for %s ." %
+                  extension)
         inject_mpeg4(infile, outfile, metadata, console)
         return
 
@@ -377,3 +478,94 @@ def generate_spherical_xml(stereo=None, crop=None):
                      additional_xml +
                      SPHERICAL_XML_FOOTER)
     return spherical_xml
+
+
+def get_descriptor_length(in_fh):
+    """Derives the length of the MP4 elementary stream descriptor at the current
+       position in the input file.
+    """
+    descriptor_length = 0
+    for i in range(4):
+        size_byte = struct.unpack(">c", in_fh.read(1))[0]
+        descriptor_length = (descriptor_length << 7 |
+                             ord(size_byte) & int("0x7f", 0))
+        if (ord(size_byte) != int("0x80", 0)):
+            break
+    return descriptor_length
+
+
+def get_expected_num_audio_components(ambisonics_type, ambisonics_order):
+    """ Returns the expected number of ambisonic components for a given ambisonic
+        type ['periphonic', 'horizontal'] and ambisonic order.
+    """
+    return {
+        'periphonic': (ambisonics_order + 1) * (ambisonics_order + 1),
+        'horizontal': (2 * ambisonics_order) + 1,
+    }.get(ambisonics_type, 0)
+
+
+def get_num_audio_channels(mp4a_atom, in_fh):
+    """Reads the number of audio channels from AAC's AudioSpecificConfig descriptor
+       within the esds child atom of the input mp4a atom.
+    """
+    p = in_fh.tell()
+    if mp4a_atom.name != mpeg.constants.TAG_MP4A:
+        return -1
+
+    for element in mp4a_atom.contents:
+        if (element.name != mpeg.constants.TAG_ESDS):
+          continue
+        in_fh.seek(element.content_start() + 4)
+        descriptor_tag = struct.unpack(">c", in_fh.read(1))[0]
+
+        # Verify the read descriptor is an elementary stream descriptor
+        if (ord(descriptor_tag) != 3):  # Not an MP4 elementary stream.
+            print "Error: failed to read elementary stream descriptor."
+            return -1
+        get_descriptor_length(in_fh)
+        in_fh.seek(3, 1)  # Seek to the decoder configuration descriptor
+        config_descriptor_tag = struct.unpack(">c", in_fh.read(1))[0]
+
+        # Verify the read descriptor is a decoder config. descriptor.
+        if (ord(config_descriptor_tag) != 4):
+            print "Error: failed to read decoder config. descriptor."
+            return -1
+        get_descriptor_length(in_fh)
+        in_fh.seek(13, 1) # offset to the decoder specific config descriptor.
+        decoder_specific_descriptor_tag = struct.unpack(">c", in_fh.read(1))[0]
+
+        # Verify the read descriptor is a decoder specific info descriptor
+        if (ord(decoder_specific_descriptor_tag) != 5):
+            print "Error: failed to read MP4 audio decoder specific config."
+            return -1
+        audio_specific_descriptor_size = get_descriptor_length(in_fh)
+        assert(audio_specific_descriptor_size >= 2)
+        decoder_descriptor = struct.unpack(">h", in_fh.read(2))[0]
+        object_type = (int("F800", 16) & decoder_descriptor) >> 11
+        sampling_frequency_index = (int("0780", 16) & decoder_descriptor) >> 7
+        if (sampling_frequency_index == 0):
+            # TODO: If the sample rate is 96kHz an additional 24 bit offset
+            # value here specifies the actual sample rate.
+            print "Error: Graeter than 48khz audio is currently not supported."
+            return -1
+        channel_configuration = (int("0078", 16) & decoder_descriptor) >> 3
+    in_fh.seek(p)
+    return channel_configuration
+
+
+def get_num_audio_tracks(mpeg4_file, in_fh):
+    """ Returns the number of audio track in the input mpeg4 file. """
+    num_audio_tracks = 0
+    for element in mpeg4_file.moov_box.contents:
+        if (element.name == mpeg.constants.TAG_TRAK):
+            for sub_element in element.contents:
+                if (sub_element.name != mpeg.constants.TAG_MDIA):
+                    continue
+                for mdia_sub_element in sub_element.contents:
+                    if (mdia_sub_element.name != mpeg.constants.TAG_HDLR):
+                        continue
+                    position = mdia_sub_element.content_start() + 8
+                    in_fh.seek(position)
+                    if (in_fh.read(4) == mpeg.constants.TAG_SOUN):
+                        num_audio_tracks += 1
+    return num_audio_tracks