In [1]:
# create test files
import os

def mkfile(filename, body=None):
    with open(filename, 'w') as f:
        f.write(body or filename)
    return

def make_example_dir(top):
    if not os.path.exists(top):
        os.mkdir(top)
    curdir = os.getcwd()
    os.chdir(top)

    os.mkdir('dir1')
    os.mkdir('dir2')

    mkfile('dir1/file_only_in_dir1')
    mkfile('dir2/file_only_in_dir2')

    os.mkdir('dir1/dir_only_in_dir1')
    os.mkdir('dir2/dir_only_in_dir2')

    os.mkdir('dir1/common_dir')
    os.mkdir('dir2/common_dir')

    mkfile('dir1/common_file', 'this file is the same')
    mkfile('dir2/common_file', 'this file is the same')

    mkfile('dir1/not_the_same')
    mkfile('dir2/not_the_same')

    mkfile('dir1/file_in_dir1', 'This is a file in dir1')
    os.mkdir('dir2/file_in_dir1')

    os.chdir(curdir)
    return

In [3]:
make_example_dir('test_files')
make_example_dir('test_files/dir1/common_dir')
make_example_dir('test_files/dir2/common_dir')

In [2]:
import filecmp

print('Common_file:')
print(filecmp.cmp('test_files/dir1/common_file', 'test_files/dir2/common_file'))
print(filecmp.cmp('test_files/dir1/common_file', 'test_files/dir2/common_file', shallow=False))

print('Not the Same:')
print(filecmp.cmp('test_files/dir1/not_the_same', 'test_files/dir2/not_the_same'))
print(filecmp.cmp('test_files/dir1/not_the_same', 'test_files/dir2/not_the_same', shallow=False))

Common_file:
True
True
Not the Same:
False
False


shallow 参数告诉 cmp() 除了对比文件元数据是否还要比较文件内容。默认只是比较从 os.stat() 获取到的文件元信息，如果 stat 相同那么文件就相同，因此同时创建并且大小相同的文件被认为是相同的，即使它们的内容不同。当 shallow 是 False 的时候，文件内容也会参与比较。  
但这里运行的结果和书中并不相同。

In [9]:
d1_contents = set(os.listdir('test_files/dir1'))
d2_contents = set(os.listdir('test_files/dir2'))
common = list(d1_contents & d2_contents)
# 构建公共文件夹
common_files = [f for f in common if os.path.isfile(os.path.join('test_files/dir1', f))]
print('Common files:', common_files)
# 比较目录
match, mismatch, errors = filecmp.cmpfiles('test_files/dir1', 'test_files/dir2', common_files)
print('Match:', match)
print('Mismatch:', mismatch)
print('Errors:', errors)

Common files: ['file_in_dir1', 'not_the_same', 'common_file']
Match: ['common_file']
Mismatch: ['file_in_dir1', 'not_the_same']
Errors: []


要在不递归的情况下比较两个目录中的一组文件，请使用 cmpfiles() 。这个方法参数是两个目录名称以及要比较的公共文件列表。

## 目录比较

In [3]:
dc = filecmp.dircmp('test_files/dir1', 'test_files/dir2')
dc.report()

diff test_files/dir1 test_files/dir2
Only in test_files/dir1 : ['dir_only_in_dir1', 'file_only_in_dir1']
Only in test_files/dir2 : ['dir_only_in_dir2', 'file_only_in_dir2']
Identical files : ['common_file']
Differing files : ['not_the_same']
Common subdirectories : ['common_dir']
Common funny cases : ['file_in_dir1']


In [4]:
dc.report_full_closure()

diff test_files/dir1 test_files/dir2
Only in test_files/dir1 : ['dir_only_in_dir1', 'file_only_in_dir1']
Only in test_files/dir2 : ['dir_only_in_dir2', 'file_only_in_dir2']
Identical files : ['common_file']
Differing files : ['not_the_same']
Common subdirectories : ['common_dir']
Common funny cases : ['file_in_dir1']

diff test_files/dir1/common_dir test_files/dir2/common_dir
Common subdirectories : ['dir1', 'dir2']

diff test_files/dir1/common_dir/dir1 test_files/dir2/common_dir/dir1
Identical files : ['common_file', 'file_in_dir1', 'file_only_in_dir1', 'not_the_same']
Common subdirectories : ['common_dir', 'dir_only_in_dir1']

diff test_files/dir1/common_dir/dir1/common_dir test_files/dir2/common_dir/dir1/common_dir

diff test_files/dir1/common_dir/dir1/dir_only_in_dir1 test_files/dir2/common_dir/dir1/dir_only_in_dir1

diff test_files/dir1/common_dir/dir2 test_files/dir2/common_dir/dir2
Identical files : ['common_file', 'file_only_in_dir2', 'not_the_same']
Common subdirectories : ['c

In [5]:
import pprint
dc = filecmp.dircmp('test_files/dir1', 'test_files/dir2', ignore=['common_file'])
print('LEFT:')
pprint.pprint(dc.left_list)
print('RIGHT:')
pprint.pprint(dc.right_list)

print('COMMON:')
pprint.pprint(dc.common)
print('LEFT only:')
pprint.pprint(dc.left_only)
print('RIGHT only:')
pprint.pprint(dc.right_only)
print('COMMON dir:')
pprint.pprint(dc.common_dirs)
print('COMMON files:')
pprint.pprint(dc.common_files)
print('COMMON funny:')
pprint.pprint(dc.common_funny) # 在一个目录中是一个文件，另一个中是子目录

print('Same  :', dc.same_files)
print('Diff  :', dc.diff_files)
print('Funny :', dc.funny_files)
print('Subdir :', dc.subdirs)

LEFT:
['common_dir',
 'dir_only_in_dir1',
 'file_in_dir1',
 'file_only_in_dir1',
 'not_the_same']
RIGHT:
['common_dir',
 'dir_only_in_dir2',
 'file_in_dir1',
 'file_only_in_dir2',
 'not_the_same']
COMMON:
['common_dir', 'file_in_dir1', 'not_the_same']
LEFT only:
['dir_only_in_dir1', 'file_only_in_dir1']
RIGHT only:
['dir_only_in_dir2', 'file_only_in_dir2']
COMMON dir:
['common_dir']
COMMON files:
['not_the_same']
COMMON funny:
['file_in_dir1']
Same  : []
Diff  : ['not_the_same']
Funny : []
Subdir : {'common_dir': <filecmp.dircmp object at 0x11245ac50>}
