## Check Diff

This notebook recursively checks under every folder for:
<br>
a) mismatched files
<br>
b) same file name with different lines

In [3]:
import os
import shutil
import filecmp
import difflib


def same_folders(dcmp):
    if dcmp.diff_files or not dcmp.right_only or not dcmp.left_only:
        return False
    for sub_dcmp in dcmp.subdirs.values():
        return same_folders(sub_dcmp)
    return True

    
def gen_diff_file(file0, file1, path):
    text1 = open(file0).readlines()
    text2 = open(file1).readlines()
    txtout = os.path.join(path, '%s_diff.txt'%((os.path.basename(file0)).split('.')[0]))
    with open(txtout, 'a') as f:
        for line in difflib.unified_diff(text1, text2):
            f.write(line)


def gen_diff_html(fromfile, tofile, path):
    fromlines = open(fromfile).readlines()
    tolines = open(tofile).readlines()
    diff = difflib.HtmlDiff().make_file(fromlines, tolines, context=True, numlines=10)
    htmlout = os.path.join(path, '%s_diff.html'%((os.path.basename(tofile)).split('.')[0]))
    with open(htmlout, 'w') as f:
        f.write(diff)


def gen_diff_report(dcmp, report_folder):
    print("\nLooking for diff files in %s: " % (report_folder))
    if dcmp.right_only:
        print("Unmatched files/directories in %s:" % (dcmp.right))
        print(dcmp.right_only)
    if dcmp.left_only:
        print("Unmatched files/directories in %s:" % (dcmp.left))
        print(dcmp.left_only)
    for file in dcmp.diff_files:
        print("Generating diff file for %s" % (file))
#         gen_diff_file(os.path.join(dcmp.left, file), os.path.join(dcmp.right, file), report_folder)
        gen_diff_html(os.path.join(dcmp.left, file), os.path.join(dcmp.right, file), report_folder)
    for sub_dir in dcmp.subdirs.keys():
        subdir_path = os.path.join(report_folder, sub_dir)
        os.mkdir(subdir_path)
        gen_diff_report(dcmp.subdirs[sub_dir], subdir_path)

        
def check_diff_top(folder0, folder1):
    dcmp = filecmp.dircmp(folder0, folder1)
    if same_folders(dcmp):
        print("No difference found.")
    else:
        report_folder = os.path.join(os.getcwd(), 'diff_folder')
        if os.path.exists(report_folder):
            shutil.rmtree(report_folder)
        try:
            os.mkdir(report_folder)
        except OSError as exc:
            pass
        gen_diff_report(dcmp, report_folder)        

        
folder_a = r'C:\Users\bing.chen\Desktop\codePlayground\check_diff\gtrack_3112'
folder_b = r'C:\Users\bing.chen\Desktop\codePlayground\check_diff\gtrack_3206_aop'
folder_c = r'C:\Users\bing.chen\Desktop\codePlayground\check_diff\gtrack_3303'

folder_ta = r'C:\Users\bing.chen\Desktop\codePlayground\check_diff\a_test'
folder_tb = r'C:\Users\bing.chen\Desktop\codePlayground\check_diff\b_test'
folder_tc = r'C:\Users\bing.chen\Desktop\codePlayground\check_diff\c_test'
        
check_diff_top(folder_a, folder_c)


Looking for diff files in C:\Users\bing.chen\Desktop\codePlayground\check_diff\diff_folder: 
Generating diff file for gtrack.h

Looking for diff files in C:\Users\bing.chen\Desktop\codePlayground\check_diff\diff_folder\include: 
Generating diff file for gtrack_2d.h
Generating diff file for gtrack_3d.h
Generating diff file for gtrack_int.h
Generating diff file for gtrack_listlib.h

Looking for diff files in C:\Users\bing.chen\Desktop\codePlayground\check_diff\diff_folder\src: 
Generating diff file for gtrack_create.c
Generating diff file for gtrack_delete.c
Generating diff file for gtrack_module.c
Generating diff file for gtrack_step.c
Generating diff file for gtrack_unit_event.c
Generating diff file for gtrack_unit_predict.c
Generating diff file for gtrack_unit_score.c
Generating diff file for gtrack_unit_start.c
Generating diff file for gtrack_unit_update.c
Generating diff file for gtrack_utilities_2d.c
Generating diff file for gtrack_utilities_3d.c

Looking for diff files in C:\User

In [1]:
# import difflib
# import sys

# fromfile = r'C:\Users\bing.chen\Desktop\codePlayground\check_diff\gtrack_3112\gtrack.h'
# tofile = r'C:\Users\bing.chen\Desktop\codePlayground\check_diff\gtrack_3303\gtrack.h'
# fromlines = open(fromfile).readlines()
# tolines = open(tofile).readlines()

# diff = difflib.HtmlDiff().make_file(fromlines, tolines, context=True)

# with open('diff.html', 'w') as f:
#     f.write(diff)
    
    
    
# def gen_diff_html(fromfile, tofile, path):
#     fromlines = open(fromfile).readlines()
#     tolines = open(tofile).readlines()
#     diff = difflib.HtmlDiff().make_file(fromlines, tolines, context=True)
#     htmlout = os.path.join(path, '%s_diff.html'%((os.path.basename(tofile)).split('.')[0]))
#     with open('diff.html', 'w') as f:
#         f.write(diff)