In [1]:
# Load .esp json representation, count references, compare
from pprint import pprint
import time
import json
from jsondiff import diff as JsonDiff   #https://github.com/xlwings/jsondiff
from deepdiff import DeepDiff
# also see for comparing regardless of order https://stackoverflow.com/questions/25851183/how-to-compare-two-json-objects-with-the-same-elements-in-a-different-order-equa
def pp(i, c=False):
    return pprint(i, width=100, compact=c)

# Simple reference counting for cells

In [None]:
fileold = "TR_Mainland.json"
filenew = "TR_Mainland - Copy.json"
path = ""

In [2]:
def count_references(json_filename):
    with open(json_filename) as file:
        f = json.load(file)
    f = [x for x in f if x['type']=="Cell"]
    #f = [x for x in f if 'atmosphere_data' not in x]    #exteriors only
    for i in range(0, len(f)):
        f[i]['references'] = len(f[i]['references'])    #replace list of references with total number of references
    f = {str(x['data']['grid']) if 'atmosphere_data' not in x else x['id']: x['references'] for x in f}
    return f

In [3]:
count_refs_new = count_references(path+fileold)
count_refs_old = count_references(path+filenew)
print(len(count_refs_new))
print(len(count_refs_old))

3700
3700


In [4]:
compare_ref_counts = {}
for key in count_refs_new.keys():
    if key in count_refs_old.keys():
        compare_ref_counts[key] = count_refs_new[key] - count_refs_old[key] 
compare_ref_counts = [(x, y) for x, y in compare_ref_counts.items() if y != 0]
compare_ref_counts.sort()
for x in compare_ref_counts: 
    print(x[0],x[1]) 

Balmora, Guild of Fighters -1
[11, 22] -9
[12, 20] -2
[12, 22] -11
[13, 19] -1
[13, 20] -11
[13, 21] -17
[2, -15] -27
[3, -15] -10
[3, -16] 1
[4, -16] 8
[42, 16] -1
[42, 17] -27


# JSON differences

In [5]:
file1="C:\Games\Morrowind LowRes\Data Files\TR_Mainland.json"
file2="C:\Games\Morrowind LowRes\Data Files\TR_Mainland - Copy.json"

In [6]:
with open(file1) as file:
    json1 = json.load(file)
with open(file2) as file:
    json2 = json.load(file)

In [9]:
types = []

for x in json1+json2:
    if x['type'] not in types:
        types.append(x['type'])

In [10]:
json1data = {"typecounts":{t:len([x for x in json1 if x['type']==t]) for t in types}}
json2data = {"typecounts":{t:len([x for x in json2 if x['type']==t]) for t in types}}
# compare numbers of entries for each type
[(t, json1data['typecounts'][t], json2data['typecounts'][t], json1data['typecounts'][t]==json2data['typecounts'][t]) for t in types]

[('Header', 1, 1, True),
 ('GlobalVariable', 174, 174, True),
 ('Faction', 1, 1, True),
 ('Sound', 18, 18, True),
 ('Script', 1657, 1657, True),
 ('StartScript', 1, 1, True),
 ('LandscapeTexture', 223, 224, False),
 ('Spell', 136, 136, True),
 ('Static', 308, 308, True),
 ('Door', 183, 183, True),
 ('MiscItem', 413, 413, True),
 ('Weapon', 162, 162, True),
 ('Container', 817, 817, True),
 ('Creature', 286, 286, True),
 ('Bodypart', 13, 13, True),
 ('Light', 188, 188, True),
 ('Enchantment', 244, 244, True),
 ('Npc', 3880, 3880, True),
 ('Armor', 86, 86, True),
 ('Clothing', 191, 191, True),
 ('RepairTool', 1, 1, True),
 ('Activator', 1095, 1095, True),
 ('Apparatus', 9, 9, True),
 ('Ingredient', 52, 52, True),
 ('Book', 567, 567, True),
 ('Alchemy', 70, 70, True),
 ('LevelledItem', 24, 24, True),
 ('LevelledCreature', 11, 11, True),
 ('Cell', 3700, 3700, True),
 ('Landscape', 1790, 1790, True),
 ('PathGrid', 1809, 1824, False),
 ('Dialogue', 2071, 2071, True),
 ('Info', 32097, 32097, T

# Header type

In [11]:
test1 = [x for x in json1 if x.get('type') == "Header"][0]
test2 = [x for x in json2 if x.get('type') == "Header"][0]
jsondiff_dict = (JsonDiff(test1, test2), JsonDiff(test2, test1))
deepdiff_dict = DeepDiff(test1, test2)
pp(jsondiff_dict)
pp(deepdiff_dict)

({'masters': {delete: [4]}, 'num_objects': 52293},
 {'masters': {insert: [(4, ['TD_Addon.esm', 1912481])]}, 'num_objects': 52277})
{'iterable_item_removed': {"root['masters'][4]": ['TD_Addon.esm', 1912481]},
 'values_changed': {"root['num_objects']": {'new_value': 52293, 'old_value': 52277}}}


# Cell types


In [12]:
# list cells by grid if ext, by name if int
test1 = {x['id'] if 'atmosphere_data' in x else str(x['data']['grid']):x for x in json1 if x['type'] == "Cell"}
test2 = {x['id'] if 'atmosphere_data' in x else str(x['data']['grid']):x for x in json2 if x['type'] == "Cell"}
testmissing = {x['id'] if 'atmosphere_data' in x else str(x['data']['grid']):x for x in json2 if x['type'] == "Cell" and x['id'] != 'Balmora, Guild of Fighters'}

cell_ids = []

for x in test1.keys():
    if x not in cell_ids:
        cell_ids.append(x)
for x in test2.keys():
    if x not in cell_ids:
        cell_ids.append(x)

# use dict.get() in case keys are missing
cells_unequal = [id for id in cell_ids if test1.get(id)!=test2.get(id)]
#difference_dict = {id:(diff(test1.get(id), test2.get(id)), diff(test2.get(id), test1.get(id))) for id in cells_unequal}
difference_dict = {id:DeepDiff(test1.get(id), test2.get(id)) for id in cells_unequal}

In [13]:
difference_dict[cells_unequal[-1]]

{'iterable_item_added': {"root['references'][0]": {'mast_index': 1,
   'refr_index': 460129,
   'id': 'com_chest_02_fg_supply',
   'temporary': True,
   'translation': [0.0, 0.0, 0.0],
   'rotation': [0.0, 0.0, 0.0],
   'scale': 1.12,
   'owner_faction': 'Fighters Guild',
   'owner_faction_rank': 0,
   'health_left': 0,
   'deleted': 4729956}}}

In [14]:
pprint(test1[cells_unequal[-1]]['references'])
pprint(test2[cells_unequal[-1]]['references'])


[]
[{'deleted': 4729956,
  'health_left': 0,
  'id': 'com_chest_02_fg_supply',
  'mast_index': 1,
  'owner_faction': 'Fighters Guild',
  'owner_faction_rank': 0,
  'refr_index': 460129,
  'rotation': [0.0, 0.0, 0.0],
  'scale': 1.12,
  'temporary': True,
  'translation': [0.0, 0.0, 0.0]}]


In [15]:
pp(difference_dict[cells_unequal[-1]])

{'iterable_item_added': {"root['references'][0]": {'deleted': 4729956,
                                                   'health_left': 0,
                                                   'id': 'com_chest_02_fg_supply',
                                                   'mast_index': 1,
                                                   'owner_faction': 'Fighters Guild',
                                                   'owner_faction_rank': 0,
                                                   'refr_index': 460129,
                                                   'rotation': [0.0, 0.0, 0.0],
                                                   'scale': 1.12,
                                                   'temporary': True,
                                                   'translation': [0.0, 0.0, 0.0]}}}


# Check to what extent IDs are unique

In [19]:
# check with types do not have IDs?
test=[x for x in json1 if not x.get('id')]
no_id_types = list(set([x['type'] for x in test]))
print(no_id_types)
# check whether IDs are unique?
test=[x for x in json1 if x.get('id') and x['type'] not in no_id_types]
print(len(test)==len(set([x['id'] for x in test])))
# some non-unique IDs, figure out which
from collections import Counter
duplicate_ids={k:v for k, v in Counter([x['id'] for x in test]).items() if v>1}
print(duplicate_ids)
# what are they?
pp([x for x in test if x['id'] in duplicate_ids.keys()])
# well, at least they have different types, so we can still count on IDs being unique within identical-type groups.

['PathGrid', 'Info', 'Cell', 'Header', 'Landscape']
False
{'TR_m3_OE_MG_PuzzleExit': 2, 'TR_m4_HH_ScribPie': 2}
[{'bytecode': 'KLUv/QBYlQcA0o0yMYCp6WZCc4HMDMzAzIqLtMTdTtsd+xcS8mDqg2X4FzAHVuEtEG5BQKwNWDmPu8uLJO3/X5a1RkTCCWwxtJiY/7KsSey1RPZSBhlRZJw0YMXDTLt90+6AFwp86YZ7WRyVYcYEBxokmLhxd3+02qtifGnfigk4Bs1yEBxq4sfc/R3yMARxu+X3uz+I8TDzpmdLPSuKRj0nJiQcZJzIiCIPIuSD9IAFlb5QE3MPiDAEsdWzouy1AEGvW6lYz5ZuR1nNKKVHZAjAXWunCA0ARqi10udv1kIp4GdqCJBG0gwOsqykXQBj5RNYMsDRvgWJwQw=',
  'flags': [0, 0],
  'header': {'bytecode_length': 383,
             'num_floats': 0,
             'num_longs': 0,
             'num_shorts': 1,
             'variables_length': 5},
  'id': 'TR_m3_OE_MG_PuzzleExit',
  'text': 'Begin TR_m3_OE_MG_PuzzleExit\r\n'
          '\r\n'
          'short open\r\n'
          '\r\n'
          'if ( T_Glb_GetTeleportingDisabled == 0 )\r\n'
          '\tPlayer->StartScript TR_m3_OE_MG_PuzzleEnableTele\r\n'
          '\tset T_Glb_GetTeleportingDisabled to 1\r\n'
          '\tDisableTeleporting\

# First DeepDiff pass

In [190]:
#deepdiff is faster but less legible - use to find initial areas to further investigate?

#test_jsondiff = {}
test_deepdiff_all_types = {}
for t in types:

    print("Processing "+t+' '*(16-len(t)), end='')
    start = time.time()
    if t == 'Landscape':
        test1={str(x['grid']):x for x in json1 if x['type']==t}
        test2={str(x['grid']):x for x in json2 if x['type']==t}
    elif t == 'Cell':
        test1={x['id'] if 'atmosphere_data' in x else str(x['data']['grid']): x for x in json1 if x['type']==t}
        test2={x['id'] if 'atmosphere_data' in x else str(x['data']['grid']): x for x in json2 if x['type']==t}
    elif t == 'PathGrid':
        test1={str(x['data']['grid']) if x['data']['grid']==[0, 0] and x['cell'] == "Ashlands Region" else x['cell'] if x['data']['grid']==[0, 0] else str(x['data']['grid']): x for x in json1 if x['type']==t}
        test2={str(x['data']['grid']) if x['data']['grid']==[0, 0] and x['cell'] == "Ashlands Region" else x['cell'] if x['data']['grid']==[0, 0] else str(x['data']['grid']): x for x in json2 if x['type']==t}
    elif t == 'Info':
        # info doesn't store its parent Dialogue entry, so fetch both Dialogue and Info and nest them in a dictonary for the sake of comparison.
        test1d=[x for x in json1 if x['type']=='Dialogue' or x['type']=='Info']
        test2d=[x for x in json2 if x['type']=='Dialogue' or x['type']=='Info']
        topic = None
        test1 = {}
        for x in test1d:
            if x['type']=='Dialogue':
                topic = x['id']
                test1[topic] = []
            else:
                test1[topic].append(x)
        topic = None
        test2 = {}
        for x in test2d:
            if x['type']=='Dialogue':
                topic = x['id']
                test2[topic] = []
            else:
                test2[topic].append(x)
    elif t not in ['Landscape', 'Header', 'PathGrid', 'Cell', 'Info']:  #catch-all for entries that have an id?
        test1={x['id']: x for x in json1 if x['type']==t}
        test2={x['id']: x for x in json2 if x['type']==t}
    else:
        test1=[x for x in json1 if x['type']==t]
        test2=[x for x in json2 if x['type']==t]
    if test1 == test2:
        test_deepdiff_all_types[t]={}
        print("\tidentical")
    else:
        if isinstance(test1, dict)&isinstance(test2, dict):
            keys = []   # first record all unique keys - keys = test1.keys() | test2.keys() could also work but doesn't preserve ordering, so let's just use this for now.
            for x in test1.keys():
                if x not in keys:
                    keys.append(x)
            for x in test2.keys():
                if x not in keys:
                    keys.append(x)
            keys = [k for k in keys if test1.get(k)!=test2.get(k)]
            test_deepdiff_all_types[t]={}
            for k in keys:
                test_deepdiff_all_types[t][k] = DeepDiff(test1.get(k), test2.get(k), ignore_order=False)
        else:
            #test_jsondiff[t] = diff(test1, test2)
            test_deepdiff_all_types[t] = DeepDiff(test1, test2, ignore_order=False)
        end = time.time()
        l = [len(test_deepdiff_all_types[t][k] or '') for k in test_deepdiff_all_types[t].keys()]
        #print(" \t\tdelta="+str(len(test_jsondiff[t] or '')))
        print("\tdelta="+str(sum(l or ''))+' '*max((10-len(str(sum(l or '')))),0)+' time='+str(round(end-start,5)))


Processing Header          	delta=2          time=0.01852
Processing GlobalVariable  	identical
Processing Faction         	identical
Processing Sound           	identical
Processing Script          	identical
Processing StartScript     	identical
Processing LandscapeTexture	delta=139        time=0.03403
Processing Spell           	identical
Processing Static          	identical
Processing Door            	identical
Processing MiscItem        	identical
Processing Weapon          	identical
Processing Container       	identical
Processing Creature        	identical
Processing Bodypart        	identical
Processing Light           	identical
Processing Enchantment     	identical
Processing Npc             	delta=4          time=0.15413
Processing Armor           	identical
Processing Clothing        	identical
Processing RepairTool      	identical
Processing Activator       	identical
Processing Apparatus       	identical
Processing Ingredient      	identical
Processing Book            	

# JsonDiff instead

In [219]:
#jsondiff is more legible but slower, though with the preliminary matching we made, it is acceptable.

test_jsondiff_all_types = {}
#test_deepdiff_all_types = {}
for t in types:

    print("Processing "+t+' '*(16-len(t)), end='')
    start = time.time()
    if t == 'Landscape':
        test1={str(x['grid']):x for x in json1 if x['type']==t}
        test2={str(x['grid']):x for x in json2 if x['type']==t}
    elif t == 'Cell':
        test1={x['id'] if 'atmosphere_data' in x else str(x['data']['grid']): x for x in json1 if x['type']==t}
        test2={x['id'] if 'atmosphere_data' in x else str(x['data']['grid']): x for x in json2 if x['type']==t}
    elif t == 'PathGrid':
        test1={str(x['data']['grid']) if x['data']['grid']==[0, 0] and x['cell'] == "Ashlands Region" else x['cell'] if x['data']['grid']==[0, 0] else str(x['data']['grid']): x for x in json1 if x['type']==t}
        test2={str(x['data']['grid']) if x['data']['grid']==[0, 0] and x['cell'] == "Ashlands Region" else x['cell'] if x['data']['grid']==[0, 0] else str(x['data']['grid']): x for x in json2 if x['type']==t}
    elif t == 'Info':
        # info doesn't store its parent Dialogue entry, so fetch both Dialogue and Info and nest them in a dictonary for the sake of comparison.
        test1d=[x for x in json1 if x['type']=='Dialogue' or x['type']=='Info']
        test2d=[x for x in json2 if x['type']=='Dialogue' or x['type']=='Info']
        topic = None
        test1 = {}
        for x in test1d:
            if x['type']=='Dialogue':
                topic = x['id']
                test1[topic] = []
            else:
                test1[topic].append(x)
        topic = None
        test2 = {}
        for x in test2d:
            if x['type']=='Dialogue':
                topic = x['id']
                test2[topic] = []
            else:
                test2[topic].append(x)
    elif t not in ['Landscape', 'Header', 'PathGrid', 'Cell', 'Info']:  #catch-all for entries that have an id?
        test1={x['id']: x for x in json1 if x['type']==t}
        test2={x['id']: x for x in json2 if x['type']==t}
    else:
        test1=[x for x in json1 if x['type']==t]
        test2=[x for x in json2 if x['type']==t]
    if test1 == test2:
        test_jsondiff_all_types[t]={}
    else:
        if isinstance(test1, dict)&isinstance(test2, dict):
            keys = []   # first record all unique keys - keys = test1.keys() | test2.keys() could also work but doesn't preserve ordering, so let's just use this for now.
            for x in test1.keys():
                if x not in keys:
                    keys.append(x)
            for x in test2.keys():
                if x not in keys:
                    keys.append(x)
            keys = [k for k in keys if test1.get(k)!=test2.get(k)]
            test_jsondiff_all_types[t]={}
            for k in keys:
                test_jsondiff_all_types[t][k] = (JsonDiff(test1.get(k), test2.get(k)), JsonDiff(test2.get(k), test1.get(k)))
        else:
            #test_jsondiff[t] = diff(test1, test2)
            test_jsondiff_all_types[t] = (JsonDiff(test1, test2), JsonDiff(test2, test1))
    l = len(test_jsondiff_all_types[t])
    end = time.time()

    #print(" \t\tdelta="+str(len(test_jsondiff[t] or '')))
    print("\tdelta="+str(l)+' '*max((10-len(str(l))),0)+' time='+str(round(end-start,5)))


Processing Header          	delta=2          time=0.01952
Processing GlobalVariable  	delta=0          time=0.01451
Processing Faction         	delta=0          time=0.01501
Processing Sound           	delta=0          time=0.01501
Processing Script          	delta=0          time=0.01601
Processing StartScript     	delta=0          time=0.01451
Processing LandscapeTexture	delta=139        time=0.02702
Processing Spell           	delta=0          time=0.02002
Processing Static          	delta=0          time=0.02002
Processing Door            	delta=0          time=0.02102
Processing MiscItem        	delta=0          time=0.02002
Processing Weapon          	delta=0          time=0.01751
Processing Container       	delta=0          time=0.01651
Processing Creature        	delta=0          time=0.01852
Processing Bodypart        	delta=0          time=0.02202
Processing Light           	delta=0          time=0.02052
Processing Enchantment     	delta=0          time=0.01751
Processing Npc