Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Added new class to read mp4 (m4v) metadata. Hope to eventually be abl…

…e to write as well.
  • Loading branch information...
commit ab8c6ea5cb8a562a8dee475afe46a8c1fadc697c 1 parent 88f7568
joelmeans authored

Showing 1 changed file with 300 additions and 0 deletions. Show diff stats Hide diff stats

  1. +300 0 lib/pymetadatamanager/mp4.py
300 lib/pymetadatamanager/mp4.py
... ... @@ -0,0 +1,300 @@
  1 +############################################################################
  2 +# Copyright (C) 2011 by Joel Means,,, #
  3 +# means.joel@gmail.com #
  4 +# #
  5 +# This program is free software; you can redistribute it and#or modify #
  6 +# it under the terms of the GNU General Public License as published by #
  7 +# the Free Software Foundation; either version 2 of the License, or #
  8 +# (at your option) any later version. #
  9 +# #
  10 +# This program is distributed in the hope that it will be useful, #
  11 +# but WITHOUT ANY WARRANTY; without even the implied warranty of #
  12 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
  13 +# GNU General Public License for more details. #
  14 +# #
  15 +# You should have received a copy of the GNU General Public License #
  16 +# along with this program; if not, write to the #
  17 +# Free Software Foundation, Inc., #
  18 +# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. #
  19 +############################################################################
  20 +
  21 +__author__="jmeans"
  22 +__date__ ="$Apr 3, 2011 12:29:59 PM$"
  23 +
  24 +import os
  25 +import logging
  26 +import struct
  27 +from PyQt4 import QtXml, QtCore
  28 +
  29 +CHILD_ATOMS = ['ftyp', 'mdat', 'pdin', 'mfhd', 'tfhd', 'trun', 'tfra',
  30 + 'mfro', 'free', 'skip', 'uuid', 'mvhd', 'iods', 'drm ',
  31 + 'tkhd', 'clef', 'prof', 'enof', 'mdhd', 'hdlr', 'vmhd',
  32 + 'smhd', 'hmhd', 'nmhd', 'gmhd', 'url ', 'urn ',
  33 + 'alis', 'cios', 'stts', 'ctts', 'stsz', 'stz2',
  34 + 'stsc', 'stco', 'co64', 'stss', 'stsh', 'stdp', 'padb',
  35 + 'sdtp', 'sbgp', 'stps', 'elst', 'mehd', 'trex',
  36 + 'subs', 'xml ', 'bxml', 'iloc', 'pitm', 'infe',
  37 + 'frma', 'imif', 'schm', 'skcr', 'user', 'key ',
  38 + 'iviv', 'righ', 'name', 'priv', 'iKMS', 'iSFM', 'iSLT',
  39 + 'IKEY', 'hint', 'dpnd', 'ipir', 'mpod', 'sync', 'chap',
  40 + 'ipmc', 'tims', 'tsro', 'snro', 'srpp', 'rtp ', 'sdp ',
  41 + 'name', 'trpy', 'nump', 'tpyl', 'totl', 'npck', 'maxr',
  42 + 'dmed', 'dimm', 'drep', 'tmin', 'tmax', 'pmax', 'dmax',
  43 + 'payt', 'tpay', 'alac', 'avcC', 'damr', 'd263', 'dawp',
  44 + 'devc', 'dqcp', 'dsmv', 'bitr', 'btrt', 'm4ds', 'ftab',
  45 + 'ihdr', 'colr', 'fiel', 'jp2p', 'jsub', 'orfo', 'cprt',
  46 + 'cprt', 'titl', 'auth', 'perf', 'gnre', 'dcsp', 'albm',
  47 + 'yrrc', 'rtng', 'clsf', 'kywd', 'loci', 'ID32', 'tsel'
  48 + 'data', 'esds', 'ac-3'
  49 + ]
  50 +
  51 +PARENT_ATOMS = ['moov', 'moof', 'traf', 'mfra', 'trak', 'tref', 'mdia',
  52 + 'tapt', 'minf', 'dinf', 'stbl', 'edts', 'udta', 'mvex',
  53 + 'ipro', 'sinf', 'hnti', 'hinf', 'jp2h', 'ilst', '----',
  54 + 'stik', 'trkn', 'disk', 'tmpo', 'cptr', 'cpil', 'covr',
  55 + 'rtng', 'pcst', 'catg', 'keyw', 'purl', 'egid', 'desc',
  56 + 'tvnn', 'tvsh', 'tven', 'tvsn', 'tves', 'purd', 'pgap',
  57 + 'meta', 'ldes', 'hdvd',
  58 + '\xa9nam', '\xa9too', '\xa9alb', '\xa9art', '\xa9cmt',
  59 + '\xa9gen', '\xa9day', '\xa9wrt', '\xa9grp', '\xa9lyr'
  60 + ]
  61 +
  62 +INTEGER_DATA_TYPES = ['tvsn', 'tves', 'hdvd', 'stik']
  63 +
  64 +OFFSETS = {'meta':12, 'dref':16, 'iinf':14, 'stsd':16, 'schi':8,
  65 + 'mp4s':16, 'srtp':24, 'rtp ':24, 'alac':36, 'mp4a':36,
  66 + 'samr':36, 'sawb':36, 'sawp':36, 'sevc':36, 'sqcp':36,
  67 + 'ssmv':36, 'drms':36, 'tx3g':46, 'mjp2':86, 'mp4v':86,
  68 + 'avc1':86, 'jpeg':86, 's263':86, 'drmi':86
  69 + }
  70 +
  71 +STIKS = {0:'Movie',
  72 + 1:'Normal',
  73 + 2:'Audiobook',
  74 + 5:'Whacked Bookmark',
  75 + 6:'Music Video',
  76 + 9:'Short Film',
  77 + 10:'TV Show',
  78 + 11:'Booklet'
  79 + }
  80 +
  81 +class MP4(object):
  82 + """
  83 + """
  84 + def __init__(self, filename):
  85 + self.logger = logging.getLogger('pymetadatamanager.mp4')
  86 + self.file = open(filename, 'rb')
  87 + self.atoms = QtXml.QDomDocument()
  88 + self.root = self.atoms.createElement('atoms')
  89 + self.atoms.appendChild(self.root)
  90 + self._set_all_atoms()
  91 +
  92 + def __del__(self):
  93 + pass
  94 +
  95 + def _read64(self, offset):
  96 + self.file.seek(offset, os.SEEK_SET)
  97 + data = self.file.read(8)
  98 + if data is not None and len(data) == 8:
  99 + return struct.unpack(">Q", data)[0]
  100 + else:
  101 + raise EOFError
  102 +
  103 + def _read32(self, offset):
  104 + self.file.seek(offset, os.SEEK_SET)
  105 + data = self.file.read(4)
  106 + if data is not None and len(data) == 4:
  107 + return struct.unpack(">L", data)[0]
  108 + else:
  109 + raise EOFError
  110 +
  111 + def _read16(self, offset):
  112 + self.file.seek(offset, os.SEEK_SET)
  113 + data = self.file.read(2)
  114 + if data is not None and len(data) == 2:
  115 + return struct.unpack(">H", data)[0]
  116 + else:
  117 + raise EOFError
  118 +
  119 + def _read8(self, offset):
  120 + self.file.seek(offset, os.SEEK_SET)
  121 + data = self.file.read(1)
  122 + if data is not None and len(data) == 1:
  123 + return struct.unpack(">B", data)[0]
  124 + else:
  125 + raise EOFError
  126 +
  127 + def _to_string64(self, data):
  128 + a = (data >> 0) & 0xff
  129 + b = (data >> 8) & 0xff
  130 + c = (data >> 16) & 0xff
  131 + d = (data >> 24) & 0xff
  132 + e = (data >> 32) & 0xff
  133 + f = (data >> 40) & 0xff
  134 + g = (data >> 48) & 0xff
  135 + h = (data >> 56) & 0xff
  136 + return '%c%c%c%c%c%c%c%c' % (h, g, f, e, d, c, b, a)
  137 +
  138 + def _to_string32(self, data):
  139 + a = (data >> 0) & 0xff
  140 + b = (data >> 8) & 0xff
  141 + c = (data >> 16) & 0xff
  142 + d = (data >> 24) & 0xff
  143 + return '%c%c%c%c' % (d, c, b, a)
  144 +
  145 + def _to_string16(self, data):
  146 + a = (data >> 0) & 0xff
  147 + b = (data >> 8) & 0xff
  148 + return '%c%c' % (b, a)
  149 +
  150 + def _to_string8(self, data):
  151 + a = (data >> 0) & 0xff
  152 + return '%c' % (a)
  153 +
  154 + def _parse_single_atom(self, offset):
  155 + try:
  156 + atom_size = self._read32(offset)
  157 + offset += 4
  158 + atom_type = self._to_string32(self._read32(offset))
  159 + offset += 4
  160 + if atom_size == 1:
  161 + atom_size = self._read64(offset)
  162 + if atom_size == 0:
  163 + return None
  164 + return self._make_atom_element(atom_size, atom_type, offset - 8)
  165 + except EOFError:
  166 + return None
  167 +
  168 + def _parse_multiple_atoms(self, offset, max_offset):
  169 + atoms = []
  170 + while offset < max_offset:
  171 + atom = self._parse_single_atom(offset)
  172 + if atom is not None:
  173 + atoms.append(atom)
  174 + offset += int(atom.attribute('size'))
  175 + return atoms
  176 +
  177 + def _make_atom_element(self, size, type, offset):
  178 + elem_atom = self.atoms.createElement(type)
  179 + elem_atom.setAttribute('offset', offset)
  180 + elem_atom.setAttribute('size', size)
  181 + if type in PARENT_ATOMS or type in OFFSETS:
  182 + if type in OFFSETS:
  183 + buff = OFFSETS[type]
  184 + else:
  185 + buff = 8
  186 + children = self._parse_multiple_atoms(offset + buff, offset + size)
  187 + for child in children:
  188 + if child.tagName() == 'data':
  189 + size = child.attribute('size')
  190 + offset = child.attribute('offset')
  191 + data = self._read_data_atom(size, offset, type)
  192 + text_data = self.atoms.createTextNode(data)
  193 + child.appendChild(text_data)
  194 + if child.tagName() == 'name':
  195 + size = child.attribute('size')
  196 + offset = child.attribute('offset')
  197 + data = self._read_name_atom(size, offset, type)
  198 + text_data = self.atoms.createTextNode(data)
  199 + child.appendChild(text_data)
  200 + elem_atom.appendChild(child)
  201 + return elem_atom
  202 +
  203 + def _read_data_atom(self, size, offset, type):
  204 + end_of_atom = int(size) + int(offset)
  205 + pos = int(offset) + 16
  206 + left = end_of_atom - pos
  207 + data = ""
  208 + if type == 'trkn':
  209 + track_num = self._read16(pos + 2)
  210 + total_tracks = self._read16(pos + 4)
  211 + if total_tracks == 0:
  212 + data = "%d" % (track_num,)
  213 + else:
  214 + data = "%d of %d" % (track_num, total_tracks)
  215 + return data
  216 + while pos < end_of_atom:
  217 + if left >= 8:
  218 + data64 = self._read64(pos)
  219 + if type in INTEGER_DATA_TYPES:
  220 + data += str(data64)
  221 + else:
  222 + data += self._to_string64(data64)
  223 + pos += 8
  224 + elif left >=4:
  225 + data32 = self._read32(pos)
  226 + if type in INTEGER_DATA_TYPES:
  227 + data += str(data32)
  228 + else:
  229 + data += self._to_string32(data32)
  230 + pos += 4
  231 + elif left >=2:
  232 + data16 = self._read16(pos)
  233 + if type in INTEGER_DATA_TYPES:
  234 + data += str(data16)
  235 + else:
  236 + data += self._to_string16(data16)
  237 + pos += 2
  238 + elif left == 1:
  239 + data8 = self._read8(pos)
  240 + if type in INTEGER_DATA_TYPES:
  241 + data += str(data8)
  242 + else:
  243 + data += self._to_string8(data8)
  244 + pos += 1
  245 + left = end_of_atom - pos
  246 + if type == 'stik':
  247 + data = STIKS[int(data)]
  248 + return data
  249 +
  250 + def _read_name_atom(self, size, offset, type):
  251 + if type == '----':
  252 + buff = 12
  253 + else:
  254 + buff = 8
  255 + end_of_atom = int(size) + int(offset)
  256 + pos = int(offset) + buff
  257 + left = end_of_atom - pos
  258 + data = ""
  259 + while pos < end_of_atom:
  260 + if left >= 8:
  261 + data64 = self._read64(pos)
  262 + data += self._to_string64(data64)
  263 + pos += 8
  264 + elif left >=4:
  265 + data32 = self._read32(pos)
  266 + data += self._to_string32(data32)
  267 + pos += 4
  268 + elif left >=2:
  269 + data16 = self._read16(pos)
  270 + data += self._to_string16(data16)
  271 + pos += 2
  272 + elif left == 1:
  273 + data8 = self._read8(pos)
  274 + data += self._to_string8(data8)
  275 + pos += 1
  276 + left = end_of_atom - pos
  277 + return data
  278 +
  279 + def _set_all_atoms(self):
  280 + offset = 0
  281 + self.file.seek(0, os.SEEK_END)
  282 + file_size = self.file.tell()
  283 + atoms = self._parse_multiple_atoms(offset, file_size)
  284 + for atom in atoms:
  285 + self.root.appendChild(atom)
  286 +
  287 + def print_metadata(self):
  288 + moov = self.root.firstChildElement('moov')
  289 + udta = moov.firstChildElement('udta')
  290 + meta = udta.firstChildElement('meta')
  291 + ilst = meta.firstChildElement('ilst')
  292 + tags = ilst.childNodes()
  293 + total_tags = tags.length()
  294 + print "Total tags: %d" % (total_tags,)
  295 + for i in range(0, total_tags):
  296 + tag = tags.at(i)
  297 + type = tag.toElement().tagName()
  298 + data = tag.firstChildElement('data').text()
  299 + print "%s = %s" % (type, data)
  300 +

0 comments on commit ab8c6ea

Please sign in to comment.
Something went wrong with that request. Please try again.