# This is sample code for Chapter 5, Objects & Object Orientation

In [1]:
__author__ = "Mark Pilgrim (mark@diveintopython.org)"
__version__ = "$Revision: 1.3 $"
__date__ = "$Date: 2004/05/05 21:57:19 $"
__copyright__ = "Copyright (c) 2001 Mark Pilgrim"
__license__ = "Python"

import os
import sys
from UserDict import UserDict

def stripnulls(data):
    "strip whitespace and nulls"
    return data.replace("\00", " ").strip()


# UserDict is a class that acts like a dictionary, that you can subclass from. 
# There are also classes UserList and UserString.
class FileInfo(UserDict):
    "store file metadata"
    
    # If you define __init__ at the subclass level, you must explicitly call the superclass __init__ too. 
    def __init__(self, filename=None):
        
        # here is how you call the superclass initializer!
        UserDict.__init__(self)
        # here's the first instance of the class acting like a dictionary.
        self["name"] = filename
    
class MP3FileInfo(FileInfo):
    "store ID3v1.0 MP3 tags"
    tagDataMap = {"title"   : (  3,  33, stripnulls),
                  "artist"  : ( 33,  63, stripnulls),
                  "album"   : ( 63,  93, stripnulls),
                  "year"    : ( 93,  97, stripnulls),
                  "comment" : ( 97, 126, stripnulls),
                  "genre"   : (127, 128, ord)}
    
    def __parse(self, filename):
        "parse ID3v1.0 tags from MP3 file"
        self.clear()
        try:
            fsock = open(filename, "rb", 0)
            try:
                fsock.seek(-128, 2)
                tagdata = fsock.read(128)
            finally:
                fsock.close()
            if tagdata[:3] == 'TAG':
                for tag, (start, end, parseFunc) in self.tagDataMap.items():
                    self[tag] = parseFunc(tagdata[start:end])
        except IOError:
            pass

    def __setitem__(self, key, item):
        if key == "name" and item:
            self.__parse(item)
        FileInfo.__setitem__(self, key, item)

def listDirectory(directory, fileExtList):
    "get list of file info objects for files of particular extensions"
    fileList = [os.path.normcase(f) for f in os.listdir(directory)]
    fileList = [os.path.join(directory, f) for f in fileList \
                if os.path.splitext(f)[1] in fileExtList]
    
    # note function def'd inside another function
    def getFileInfoClass(filename, module=sys.modules[FileInfo.__module__]):
        "get file info class from filename extension"
        subclass = "%sFileInfo" % os.path.splitext(filename)[1].upper()[1:]
        return hasattr(module, subclass) and getattr(module, subclass) or FileInfo
    return [getFileInfoClass(f)(f) for f in fileList]

In [3]:
# Run the code this way
for info in listDirectory("music_singles", [".mp3"]):
        print "\n".join(["%s=%s" % (k, v) for k, v in info.items()])
        print

album=Cross Road
comment=YEAR: 1994
name=music_singles/01 - Bon Jovi - Livin' on a Prayer.mp3
title=Livin' on a Prayer
artist=Bon Jovi
year=1994
genre=12

name=music_singles/01 - Computer World.mp3

name=music_singles/01 - Europe Endless.mp3

album=The Look Of Love - The Very Be
comment=Amazon.com Song ID: 20886239
name=music_singles/01 - The Look Of Love (Part 1).mp3
title=The Look Of Love (Part 1)
artist=ABC
year=2002
genre=12

album=So
comment=
name=music_singles/01_Red Rain_Peter Gabriel.mp3
title=Red Rain
artist=Peter Gabriel
year=1986
genre=12



In [10]:
# Define an instance of the FileInfo class
f = FileInfo("/PyRefreshers/music_singles/01_Red Rain_Peter Gabriel.mp3")

# default attributes of the object
# the __class__ attribute is a classobj so convert to a string in order to print it (it doesn't coerce to a string by default)
print '\n'.join((str(f.__class__), str(f.__doc__), str(f)))


__main__.FileInfo
store file metadata
{'name': '/PyRefreshers/music_singles/01_Red Rain_Peter Gabriel.mp3'}


The UserDict.update(dict) method copies keys and values from the dict argument.

Always define all attributes and give them reasonable init values in the __init__ method.

The UserDict overloads the Dict methods by sending them to the Dict base class. 

If you look at the 'copy' function, it uses introspection. First it asks if the object is a UserDict: if not, it is a subclass of UserDict calling UserDict's copy function. In that case, since UserDict doesn't know how to copy its subclass, it calls the copy function in the "copy" module, which can copy any Python object. 

In [12]:
# Now we look at the source code for the UserDict class.
import inspect
print inspect.getsource(UserDict)

class UserDict:
    def __init__(self, dict=None, **kwargs):
        self.data = {}
        if dict is not None:
            self.update(dict)
        if len(kwargs):
            self.update(kwargs)
    def __repr__(self): return repr(self.data)
    def __cmp__(self, dict):
        if isinstance(dict, UserDict):
            return cmp(self.data, dict.data)
        else:
            return cmp(self.data, dict)
    def __len__(self): return len(self.data)
    def __getitem__(self, key):
        if key in self.data:
            return self.data[key]
        if hasattr(self.__class__, "__missing__"):
            return self.__class__.__missing__(self, key)
        raise KeyError(key)
    def __setitem__(self, key, item): self.data[key] = item
    def __delitem__(self, key): del self.data[key]
    def clear(self): self.data.clear()
    def copy(self):
        if self.__class__ is UserDict:
            return UserDict(self.data.copy())
        import copy
        data = self.data
        try

You can also inherit from the built-in data type 'dict'. 

Note that if you do this, you do not need to call dict.__init__ in the __init__ function. 
This is in contrast to the case where you inherit from UserDict.

## Special class methods

### In Python, all special methods (like __setitem__) and built−in attributes (like __doc__) follow a standard naming convention: they both start with and end with two underscores. Don't name your own methods and attributes this way, because it will only confuse you (and others) later.

Some class methods are called without explicitly calling a method. For example, in a dict object,

    >f["name"]
    >f.__getitem__("name")
    
do the same thing. 

But you can still redefine them in child classes to make them do what you want, 
just as you'd redefine operator[]() in C++ subclasses. 

For example in the class MP3FileInfo, __setitem__ is overloaded this way (note that __parse is an MP3FileInfo method):

    def __setitem__(self, key, item):
        if key == "name" and item:
            self.__parse(item) # <<<=== MP3FileInfo method
        FileInfo.__setitem__(self, key, item)
        
 ### Some fun special class methods (you can redefine these in child classes)
 
     __repr__ returns a string representation of an object. It gets called when you enter the object's name in the interpreter.
     __call__ is a class method that lets you call an instance of a class as though it were a function.
     __len__ will be called on your class if you call len(instance)
     __delitem__ is what gets called if you do del instance[key]
     __cmp__ is what gets called if you use == ('equals')
     
     
## Class attributes

These are like static class variables in C++, I think.
They are called class attributes, and instance variables are called data attributes. 
The difference is that data attributes are defined in the __init__ method.

    class MP3FileInfo(FileInfo):
        "store ID3v1.0 MP3 tags"
    tagDataMap = {"title" : ( 3, 33, stripnulls),
                "artist" : ( 33, 63, stripnulls),
                "album" : ( 63, 93, stripnulls),
                "year" : ( 93, 97, stripnulls),
                "comment" : ( 97, 126, stripnulls),
                "genre" : (127, 128, ord)}
               
## Private functions: Python has them. Who knew?

If the name of a Python function, class method, or attribute starts with (but doesn't end with) two underscores, it's private; everything else is public. 

Python has no concept of protected class methods (accessible only in their own class and descendant classes). Class methods are either private (accessible only in their own class) or public (accessible
from anywhere).

In [24]:
class MyClass(dict):
    def __init__(self, initdict):
        self.data = initdict
    def __add2(self):
        if 2 not in self.data: 
            self.data[2]=3
    
ob = MyClass({})
ob._add2()

AttributeError: 'MyClass' object has no attribute '_add2'

In [25]:
# side note: operator=() for dictionaries equates by reference, it doesn't make a copy.
mydict = {}
mydict[3]=5
md2 = mydict
md2[4] = 6
mydict

{3: 5, 4: 6}

## Copy module

Assignment statements in Python do not copy objects, they create bindings between a target and an object. For collections that are mutable or contain mutable items, a copy is sometimes needed so one can change one copy without changing the other. This module provides generic shallow and deep copy operations.

In [26]:
import copy
md3 = {}
md3[3]=5
md4 = copy.deepcopy(md3)
md4[4] = 6
md3 #unchanged

{3: 5}

Every Python class has a built−in class attribute __module__, which is the name of the module in which the
class is defined.

Using this feature together with sys.modules means you can always get a ref to the module in which a class is defined. 