In [1]:
import json

def trim(year):
    with open(f'{year}.json') as json_file:  
        data = json.load(json_file)
        states = data['features']

        for s in states:
            if s['geometry']['type'] == 'MultiPolygon':
                s['geometry']['coordinates']=s['geometry']['coordinates'][0][0]
            else:
                s['geometry']['coordinates']=s['geometry']['coordinates'][0]

    with open(f'{year}.json', 'w') as outfile:  
        json.dump(data, outfile)

In [2]:
import copy

class LinkedVertex:
    def __init__(self, crd, prev = None, nxt = None,src = None):
        self.src = src
        self.crd = crd
        self.prev = prev
        self.nxt = nxt
        
def verticesEQ(v1,v2):
    return v1[0]==v2[0] and v1[1] == v2[1]


class LinkedVertexHash:
    def __init__(self,buckets = 2500000):
        self.els = [None]*buckets
        self.buckets = buckets
        self.size = 0
    
    def items(self):
        for b in self.els: 
            if b:
                for el in b:
                    yield el
                    
    def contains(self,el):
        h = self.vHash(el)
        if self.els[h]:
            for v in self.els[h]:
                if verticesEQ(el,v.crd):
                    return v
        return False
                    
    def add(self,el):
        if self.contains(el.crd):
            return 
        
        self.size+=1
        h = self.vHash(el.crd)        
        
        if self.els[h] == None:
            self.els[h] = [el]
        else:
            self.els[h] += [el]
            
    def addPoly(self,items,src=None):
        crds = items 
            
        prev = None
        for v in crds:
            newVertex = LinkedVertex(crd = v, prev = prev, nxt = None,src = src)
            self.add(newVertex)

            if prev == None:
                first = newVertex
            else:     
                prev.nxt = newVertex

            prev = newVertex

        newVertex.nxt = first
        first.prev = newVertex
            
    def remove(self,el):
        h = self.vHash(el.crd)
        if self.els[h]:
            for i,v in enumerate(self.els[h]):
                if verticesEQ(el,v.crd):
                    self.els[h].pop(i)
                    self.size-=1
                    return   
        raise f'self set does not contain {el}'
    
    def vHash(self,v):
        p = 53
        return int((v[0]*(p)+v[1]*(p**2)))%self.buckets
    
    def asList(self):
        res = []
        
        for b in self.els:
            if b:
                res+=b
                
        return res
    
    @staticmethod
    def union(s1,s2):
        res = LinkedVertexHash()
        
        for i in s1.items():
            res.add(i)
            
        for i in s2.items():
            res.add(i)
            
        return res
    
    @staticmethod
    def intersection(s1,s2):
        res = LinkedVertexHash()
        
        for i in s1.items():
            if s2.contains(i.crd):
                res.add(i)
                
        return res
    
    @staticmethod
    def difference(s1,s2):
        res = copy.deepcopy(s1)
        its = LinkedVertexHash.intersection(s1,s2)
        
        for i in its.items():
            res.remove(i)
            
        return res

In [3]:
def setFromJSON(data):
    states = data['features']
    
    vSize = 0
    for s in states:
        vSize += len(s['geometry']['coordinates'])
    
    vSet = LinkedVertexHash(buckets = int(vSize*1.3))
    
    for i,s in enumerate(states):
        crds = s['geometry']['coordinates']
        vSet.addPoly(crds,i)
        
    return vSet

In [4]:
with open('1790.json') as json_file:  
    data1790 = json.load(json_file)
            
with open('1880.json') as json_file:  
    data1880 = json.load(json_file)

with open('1840.json') as json_file:  
    data1840 = json.load(json_file)

In [5]:
from itertools import islice

def decompose(new, prev):
    
    prevSet = setFromJSON(prev)
    newSet = setFromJSON(new)
    
    newStates = new['features']
    compressedStates = []
    for s in newStates:
        crds = s['geometry']['coordinates']
        vs = []
        
        en = iter(enumerate(crds))
        count = 0
        for i,c in en:
            hook = prevSet.contains(c)
            if hook:
                record = [hook]
                j = 0
                line = hook.nxt
                while (verticesEQ(crds[(i+j+1)%len(crds)], line.crd) and
                        not verticesEQ(line.crd, hook.crd) and
                        i+j < len(crds)):

                    j+=1
                    record += [line]
                    line = line.nxt
                count+=j
                vs += [{'hook':hook.crd, 'length':j, 'src':hook.src}]
                
                next(islice(en,j ,j), None)
                
            else:
                count+=1
                vs += [c]
        compressedStates.append(vs)
    return compressedStates

In [8]:
prev = data1880
new = data1790

compressed = decompose(new,prev)

with open('compressed2.json', 'w') as outfile:  
    json.dump({'states':compressed}, outfile)