In [42]:
# reload python modules
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [43]:
import pandas as pd
from snapdiff.snapper import snapper

In [45]:
from snapdiff.utils import get_path, get_normalized_code

In [46]:
get_path(get_normalized_code)

'snapdiff/utils.py'

In [13]:
@snapper(mode='diff')
def function(s, x, y):
    return x + y + s

In [15]:
function(1, 2, 3)

6

In [6]:
from deepdiff import DeepDiff, Delta

In [7]:
from pathlib import Path

In [8]:
pathone = Path('snapdiff/snapshots/function/1/2/3/1.json')
pathtwo = Path('snapdiff/snapshots/function/1/2/3/1.json')

In [9]:
str(pathone)

'snapdiff/snapshots/function/1/2/3/1.json'

In [10]:
pathone == pathtwo

True

In [11]:
pathone = Path('snapdiff/snapshots/function/1/2/3/1.json')
pathtwo = Path('snapdiff/snapshots/function/1/2/3/1.json')
b = DeepDiff([2,[pathone, 1,6], 1,2,8], [0, [pathtwo, 1,6],pathtwo,2,3])
Delta(b)

<Delta: {'type_changes': {'root[2]': {'old_type': <class 'int'>, 'new_type': <class 'pathlib.PosixPath'>,...}>

In [12]:
from datadiff import diff

In [33]:
from deepdiff import DeepDiff
from deepdiff.operator import BaseOperator
from pathlib import Path

class PathInsensitiveOperator(BaseOperator):
    def match(self, level):
        # Only match when both objects are Path instances
        return isinstance(level.t1, Path) and isinstance(level.t2, Path)
    
    def give_up_diffing(self, level, diff_instance):
        # Compare paths as lowercase resolved strings for insensitivity
        path1, path2 = map(lambda p: p.resolve().as_posix().lower(), (level.t1, level.t2))
        return path1 == path2  # Return True if paths are considered the same

# Instantiate the operator and use it in DeepDiff
path_operator = PathInsensitiveOperator()
diff = DeepDiff([pathtwo, 5, 7], [1, 5,7],
    custom_operators=[PathInsensitiveOperator()]
)
print(diff)  # Should show no difference if paths match under custom comparison


{'type_changes': {'root[0]': {'old_type': <class 'pathlib.PosixPath'>, 'new_type': <class 'int'>, 'old_value': PosixPath('snapdiff/snapshots/function/1/2/3/1.json'), 'new_value': 1}}}


In [43]:
from deepdiff import DeepDiff
from deepdiff.operator import BaseOperator
from pathlib import Path

class PathComparisonOperator(BaseOperator):
    def match(self, level):
        # Ensure both objects at this level are Path instances
        return isinstance(level.t1, Path) and isinstance(level.t2, Path)
    
    def give_up_diffing(self, level, diff_instance):
        # Convert paths to lowercase and resolve for comparison
        path1, path2 = map(lambda p: p.resolve().as_posix().lower(), (level.t1, level.t2))
        if path1 != path2:
            # Use custom_report_result to add a custom difference entry
            diff_instance.custom_report_result(
                "path_difference", 
                level, 
                {"old_value": path1, "new_value": path2}
            )
            return True  # Stop further diffing at this level
        return False  # Paths are the same, continue with standard diffing

# Testing the custom operator in DeepDiff
path_operator = PathComparisonOperator()
diff = DeepDiff(
    {"file_path": Path("/users/test/file.txt")},
    {"file_path": Path("/users/test/file.txt")},
    custom_operators=[path_operator]
)
print(diff)


{'unprocessed': ["root['file_path']: /users/test/file.txt and /users/test/file.txt"]}


In [36]:
import pandas as pd
from deepdiff import DeepDiff
from deepdiff.operator import BaseOperator

class DataFrameComparisonOperator(BaseOperator):
    def __init__(self, ignore_columns=None, ignore_index=True):
        self.ignore_columns = ignore_columns or []
        self.ignore_index = ignore_index
    
    def match(self, level):
        # Ensure both objects at this level are DataFrames
        return isinstance(level.t1, pd.DataFrame) and isinstance(level.t2, pd.DataFrame)
    
    def give_up_diffing(self, level, diff_instance):
        # Remove specified columns
        df1 = level.t1.drop(columns=self.ignore_columns, errors='ignore')
        df2 = level.t2.drop(columns=self.ignore_columns, errors='ignore')
        
        # Reset indices if ignoring index
        if self.ignore_index:
            df1 = df1.reset_index(drop=True)
            df2 = df2.reset_index(drop=True)
        
        # Compare DataFrames for equality
        return df1.equals(df2)

# Sample DataFrames for comparison
df1 = pd.DataFrame({
    'A': [1, 2, 3],
    'B': ['x', 'y', 'z']
})
df2 = pd.DataFrame({
    'A': [1, 2, 3],
    'B': ['x', 'y', 'w']  # Note the change in the last row
})

# Instantiate and use the custom operator in DeepDiff
df_operator = DataFrameComparisonOperator(ignore_columns=['B'], ignore_index=True)
diff = DeepDiff(
    {'data': df1},
    {'data': df2},
    custom_operators=[df_operator]
)
print(diff)  # Displays differences if the DataFrames are not equal


{}


In [20]:
from deepdiff import DeepDiff
from deepdiff.operator import PrefixOrSuffixOperator
t1 = {
    "key1": ["foo", "bar's food", "jack", "joe"]
}
t2 = {
    "key1": ["foo", "bar", "jill", "joe'car"]
}

DeepDiff(t1, t2)


{'values_changed': {"root['key1'][1]": {'new_value': 'bar',
   'old_value': "bar's food"},
  "root['key1'][2]": {'new_value': 'jill', 'old_value': 'jack'},
  "root['key1'][3]": {'new_value': "joe'car", 'old_value': 'joe'}}}

In [21]:

DeepDiff(t1, t2, custom_operators=[
    PrefixOrSuffixOperator()
])


{'values_changed': {"root['key1'][2]": {'new_value': 'jill',
   'old_value': 'jack'}}}

In [74]:
[i for i in dir(a) if not i.startswith('_')]

['CACHE_AUTO_ADJUST_THRESHOLD',
 'affected_paths',
 'affected_root_keys',
 'cache_size',
 'cache_tuning_sample_size',
 'clear',
 'copy',
 'custom_operators',
 'custom_report_result',
 'cutoff_distance_for_pairs',
 'cutoff_intersection_for_pairs',
 'deephash_parameters',
 'encodings',
 'exclude_obj_callback',
 'exclude_obj_callback_strict',
 'exclude_paths',
 'exclude_regex_paths',
 'exclude_types',
 'exclude_types_tuple',
 'from_json_pickle',
 'fromkeys',
 'get',
 'get_ignore_types_in_groups',
 'get_significant_digits',
 'get_stats',
 'group_by',
 'group_by_sort_key',
 'hasher',
 'ignore_encoding_errors',
 'ignore_nan_inequality',
 'ignore_numeric_type_changes',
 'ignore_order',
 'ignore_order_func',
 'ignore_private_variables',
 'ignore_string_case',
 'ignore_string_type_changes',
 'ignore_type_in_groups',
 'ignore_type_subclasses',
 'include_obj_callback',
 'include_obj_callback_strict',
 'include_paths',
 'is_root',
 'items',
 'iterable_compare_func',
 'keys',
 'log_scale_similarity

In [32]:
aa = {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}
bb = aa.copy()
bb['b'] = 33
aadf = pd.DataFrame(aa, index=[0])
bbdf = pd.DataFrame(bb, index=[0])


diff = DeepDiff(aadf, bbdf)


In [37]:
import inspect

In [39]:
def function(s, x, y):
    return x + y + s

In [41]:
inspect.getfile(function)

'/tmp/ipykernel_326473/156289867.py'

In [34]:
a.to_dict()

{}

In [12]:
from snapdiff.utils import get_normalized_code

In [21]:
import ast
import hashlib
import inspect

class NormalizeNames(ast.NodeTransformer):
    def __init__(self):
        self.func_name_counter = 0
        self.var_name_counter = 0
        self.func_name_map = {}
        self.var_name_map = {}

    def visit_FunctionDef(self, node):
        # Assign a generic name to function names
        if node.name not in self.func_name_map:
            self.func_name_map[node.name] = f"func_{self.func_name_counter}"
            self.func_name_counter += 1
        node.name = self.func_name_map[node.name]
        # Continue transforming function arguments and body
        self.generic_visit(node)
        return node

    def visit_Name(self, node):
        # Assign generic names to variable names used in the function
        if isinstance(node.ctx, ast.Store) or isinstance(node.ctx, ast.Load):
            if node.id not in self.var_name_map:
                self.var_name_map[node.id] = f"var_{self.var_name_counter}"
                self.var_name_counter += 1
            node.id = self.var_name_map[node.id]
        return node

def get_normalized_code(func):
    # Get the source code of the function
    source_code = inspect.getsource(func)
    # Parse the source code into an Abstract Syntax Tree
    parsed_code = ast.parse(source_code)
    # Normalize function and variable names
    normalizer = NormalizeNames()
    normalized_tree = normalizer.visit(parsed_code)
    # Convert the normalized AST back to source code (as a string) for hashing
    normalized_code = ast.dump(normalized_tree, annotate_fields=False)
    # Hash the normalized code
    code_hash = hashlib.sha256(normalized_code.encode()).hexdigest()
    return code_hash, normalized_code

In [22]:
def buble_sort(l):
    n = len(l)
    for i in range(n):
        for j in range(0, n-i-1):
            if l[j] > l[j+1]:
                l[j], l[j+1] = l[j+1], l[j]
    return l

In [23]:
hash_, norm = get_normalized_code(buble_sort)

In [25]:
def buble_sort2(l):
    n = len(l)
    for i in range(n):
        for j in range(0, n-i-1):
            if l[j] > l[j+1]:
                l[j], l[j+1] = l[j+1], l[j]
    return l

In [26]:
hash_2, norm2 = get_normalized_code(buble_sort2)

In [34]:
print(norm2)

Module([FunctionDef('func_0', arguments([], [arg('l')], kwonlyargs=[], kw_defaults=[], defaults=[]), [Assign([Name('var_0', Store())], Call(Name('var_1', Load()), [Name('var_2', Load())], [])), For(Name('var_3', Store()), Call(Name('var_4', Load()), [Name('var_0', Load())], []), [For(Name('var_5', Store()), Call(Name('var_4', Load()), [Constant(0), BinOp(BinOp(Name('var_0', Load()), Sub(), Name('var_3', Load())), Sub(), Constant(1))], []), [If(Compare(Subscript(Name('var_2', Load()), Name('var_5', Load()), Load()), [Gt()], [Subscript(Name('var_2', Load()), BinOp(Name('var_5', Load()), Add(), Constant(1)), Load())]), [Assign([Tuple([Subscript(Name('var_2', Load()), Name('var_5', Load()), Store()), Subscript(Name('var_2', Load()), BinOp(Name('var_5', Load()), Add(), Constant(1)), Store())], Store())], Tuple([Subscript(Name('var_2', Load()), BinOp(Name('var_5', Load()), Add(), Constant(1)), Load()), Subscript(Name('var_2', Load()), Name('var_5', Load()), Load())], Load()))], [])], [])], [

In [49]:
import inspect
import ast
import hashlib
import yaml
import os
import json
from deepdiff import DeepDiff
import astor


def get_normalized_code(func):
    # get the source code of the function
    source_code = inspect.getsource(func)
    # parse the source code into an Abstract Syntax Tree
    parsed_code = ast.parse(source_code)
    # convert the Abstract Syntax Tree back to normalized source code
    normalized_code = ast.dump(parsed_code, annotate_fields=True, indent=4)
    # hash the normalized code
    formatted_code = astor.to_source(parsed_code)
    code_hash = hashlib.sha256(normalized_code.encode()).hexdigest()
    return code_hash, normalized_code, formatted_code


In [50]:
get_normalized_code(function)[1])

Module(
    body=[
        FunctionDef(
            name='function',
            args=arguments(
                posonlyargs=[],
                args=[
                    arg(arg='s'),
                    arg(arg='x'),
                    arg(arg='y')],
                kwonlyargs=[],
                kw_defaults=[],
                defaults=[]),
            body=[
                Expr(
                    value=Constant(value='asdfasdf')),
                Return(
                    value=BinOp(
                        left=BinOp(
                            left=Name(id='x', ctx=Load()),
                            op=Add(),
                            right=Name(id='y', ctx=Load())),
                        op=Add(),
                        right=Call(
                            func=Name(id='str', ctx=Load()),
                            args=[
                                Name(id='s', ctx=Load())],
                            keywords=[])))],
            decorator_list=[
      

In [52]:
DeepDiff('asdf', 'sdfg', )

{'values_changed': {'root': {'new_value': 'sdfg', 'old_value': 'asdf'}}}

In [55]:
i = 0
a = i if i else (2 if 2==2 else 3)

In [56]:
a

2

In [None]:
# 1 - the function name changed but the hash is the same
# 2 - the function name changed and the hash is different
# 3 - the function name is the same and the hash is different
# 4 - the function name is the same and the hash is the same

In [21]:
import json
import uuid

ID_FILE = "snapper_ids.json"

# Load ID mappings from a file
def load_id_map():
    try:
        with open(ID_FILE, "r") as f:
            return json.load(f)
    except FileNotFoundError:
        return {}

# Save ID mappings to a file
def save_id_map(id_map):
    with open(ID_FILE, "w") as f:
        json.dump(id_map, f)

# Decorator function to automatically assign a unique ID to each function
def snapper():
    def decorator(func):
        id_map = load_id_map()

        # If the function has been wrapped before, reuse its ID
        if func.__name__ in id_map:
            unique_id = id_map[func.__name__]
        else:
            # Generate a new ID for a new function
            unique_id = str(uuid.uuid4())
            id_map[func.__name__] = unique_id
            save_id_map(id_map)  # Save the updated mapping

        # Assign the ID to the function (or Snapper instance)
        func._snapper_id = unique_id

        # Wrap the function as needed here
        def wrapper(*args, **kwargs):
            print(f"Function {func.__name__} has ID: {func._snapper_id}")
            return func(*args, **kwargs)

        return wrapper

    return decorator



In [35]:

# Example usage
@snapper()
def func3(x, y, z):
    return x + y + z


In [36]:

# Run functions to see the ID assignment
func3(1, 2, 3)  # This should print the ID for func1

Function func3 has ID: a3d671bc-ede7-4653-914a-5f41df6b2b64


6

Function func1 has ID: fe79bafc-1880-4ff8-8c24-9a69973fb80e


3

In [25]:

func2(3, 4)  # This should print the same ID if it's logically the same function


Function func2 has ID: d543b916-a0a7-49e9-b2d1-a859aec8f11e


12