# Note book looking at options for sorting and making unique cells

Cells don't hash but do equate. This note book looks at options for sorting and making a unique list of these Cells.

In [45]:
from operator import attrgetter
import random
import cProfile

In [4]:
cube1 = [{'name':'time','vals':[1]},{'name':'place', 'vals':[1,2,3]}]
cube2 = [{'name':'time','vals':[2]},{'name':'place', 'vals':[1,2,3]}]

In [7]:
def merge(cubes, names):
    newcube = {name:[] for name in names }
     
    return newcube

merge([cube1, cube2], ['time','place'])

{'place': [], 'time': []}

In [40]:

class Cell(object):
    def __init__(self, val):
        self.val = val
    def __repr__(self):
        return "Cell(%s)" % self.val
    def __eq__(self, other):
        return self.val == other.val
    """
    def __gt__(self, other):
        return self.val > other.val
    def __lt__(self,other):
        return self.val < other.val
        """
    
[c.val for c in sorted([Cell(1), Cell(2), Cell(3), Cell(1), Cell(5)], key=attrgetter('val'))]


[1, 1, 2, 3, 5]

In [41]:
def get_some(n, numtypes=50):
    return [Cell(random.randint(0,numtypes)) for _ in range(n)]



In [55]:
class HashWrap(object):
    def __init__(self, v):
        self.val = v

    def __eq__(self, other):
        return self.val == other.val

    def __hash__(self):
        return self.val.val

    def __repr__(self):
        return 'HashWrap(%s)' % self.val
    


In [42]:
items = get_some(100,30)

In [49]:
def sort_and_unique(items):
    sorted_items = sorted(items, key=attrgetter('val'))
    return [sorted_items[0]] + [sorted_items[i + 1] for i in range(len(sorted_items) -1) if not sorted_items[i + 1] == sorted_items[i] ]



In [54]:
items = get_some(10000000,1000)
cProfile.run('sort_and_unique(items)')


         10000006 function calls in 18.714 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
  9999999    5.531    0.000    5.531    0.000 <ipython-input-40-3395f0f7f5ab>:7(__eq__)
        1    0.000    0.000   18.435   18.435 <ipython-input-49-e8d2e16975b5>:1(sort_and_unique)
        1    7.025    7.025   12.556   12.556 <ipython-input-49-e8d2e16975b5>:3(<listcomp>)
        1    0.280    0.280   18.714   18.714 <string>:1(<module>)
        1    0.000    0.000   18.714   18.714 {built-in method builtins.exec}
        1    0.000    0.000    0.000    0.000 {built-in method builtins.len}
        1    5.879    5.879    5.879    5.879 {built-in method builtins.sorted}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}




In [63]:
def wrap_and_set(items):
    return set(HashWrap(item) for item in items)

items = get_some(10000000,1000)
cProfile.run('wrap_and_set(items)')

         49998003 function calls in 38.951 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
  9998999    3.294    0.000    3.294    0.000 <ipython-input-40-3395f0f7f5ab>:7(__eq__)
 10000000    3.359    0.000    3.359    0.000 <ipython-input-55-fdcd7945b7d1>:2(__init__)
  9998999    7.054    0.000   10.349    0.000 <ipython-input-55-fdcd7945b7d1>:5(__eq__)
 10000000    3.614    0.000    3.614    0.000 <ipython-input-55-fdcd7945b7d1>:8(__hash__)
        1   13.736   13.736   38.951   38.951 <ipython-input-63-06a1ad9a7034>:1(wrap_and_set)
 10000001    7.893    0.000   11.252    0.000 <ipython-input-63-06a1ad9a7034>:2(<genexpr>)
        1    0.000    0.000   38.951   38.951 <string>:1(<module>)
        1    0.000    0.000   38.951   38.951 {built-in method builtins.exec}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}


