In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# New Algorithm

I thought about a new algorithm, I will call this algorithm timeDP2. This algorithm adds noise to the gradients of the values.
If a given vector is a T size vector, we calculate the linear interpolation of those values in every values. Take only the middle values and then calculate the gradients for each original values to the new ones. And then add laplace noise or Bounded Laplace mechanism.

I want to see what the output looks like.

First I will make the project folder. `timeDP2` and define a laplace noise or a bounded laplace noise mechanism

In [8]:
%%writefile ../timeDP2/mechanism/__init__.py
# %%writefile ../timeDP2/utils/__init__.py
# %%writefile ../timeDP2/__init__.py


Writing ../timeDP2/mechanism/__init__.py


In [45]:
%%writefile ../timeDP2/mechanism/laplace_mechanism.py
import numpy as np

def laplace_mechanism(value : float, sensitivity: float, epsilon:float):
    scale = sensitivity/epsilon
    noise = np.random.laplace(0, size=1, scale=scale)
    return value + noise.item()

Writing ../timeDP2/mechanism/laplace_mechanism.py


Now that we finished writing the mechanisms, we will start to write the Algorithm code.

First, in a given dataset, which is a vector(a vector is expected), we will make a function that extends the vector in a 2*T sized vector. And the we will calculate the gradients and save the new values only. We will simulate our new code with a small toy dataset

In [74]:
from pathlib import Path

CURDIR = Path().cwd()
PROJECT_DIR = CURDIR.parent
data = np.load(PROJECT_DIR.joinpath('data/processed/death_numpy.npz'))['result']

toy_vector = pd.DataFrame(np.transpose(data, (1,0,-1))[0]).T[0]
toy_vector.head()

0    68.0
1    65.0
2    63.0
3    63.0
4    66.0
Name: 0, dtype: float32

In [117]:
for i,z in zip([1,2,3,4,5],[6,7,8,9,10],2):
    print(i,z)

TypeError: 'int' object is not iterable

In [124]:
%%writefile ../timeDP2/utils/functions.py

import numpy as np
from scipy import interpolate

def make_extended_vector(vector : np.array):
    '''
    This function takes a given function and outputs a extended version of the vector
    The extended values are the linear interpolated values
    '''
    length_of_timestep = len(vector)
    xs = [2*x+1 for x in range(0,length_of_timestep)]
    new_xs = [x for x in range(0,2*length_of_timestep)]
    
    f = interpolate.interp1d(xs, vector, fill_value='extrapolate')
    
    return f(new_xs)


def make_coordinate_list(x_vector : list, y_vector : list, step: int):
    coordinate_list = [(x,y) for x, y in zip(x_vector, y_vector)][::step]
    return coordinate_list


def calculate_gradient(two_points:tuple, dx=1):
    '''
    This function calculates the gradient of a two point.
    The precondition is that the dx of a given two points is 1. But this can be changed.
    two_points : (x_0, x_1) 
    '''
    if dx <= 0 : 
        raise ValueError('dx should be larger than 0')
    
    x0, x1 = two_points
    grad = (x1 - x0)/dx
    return grad


def give_calculated_gradient_list(extended_vector:list, step=2):
    '''
    This function takes the extended vector as its input and calculates the gradient.
    It outputs the gradient for every point.
    step : step size to calculate the gradient. default : 2
    '''
    number_of_chunks = int(len(extended_vector) / 2)
    chunks = [(extended_vector[num*2],extended_vector[num*2+1]) for num in range(0,number_of_chunks)]
    return list(map(calculate_gradient,chunks))


def calculate_linear_function_form(coordinate: tuple, gradient: float):
    '''
    This function calculates the linear function parameters.
    coordinate : (x,y)
    y = ax + b
    '''
    x, y = coordinate
    b = y - x*gradient 
    return gradient, b

Overwriting ../timeDP2/utils/functions.py


In [123]:
make_coordinate_list([1,2,3,4,5,6],[7,8,9,10,11,12], 2)

[(1, 7), (3, 9), (5, 11)]

Now we make the noise algorithm and algorithm to restore the original form

In [127]:
%%writefile ../timeDP2/algorithm/timedp_algorithm.py
# import os, sys
# sys.path.append('../')

from timeDP2.utils.functions import *
from timeDP2.mechanism import bounded_laplace_mechanism as BLM
from timeDP2.mechanism import laplace_mechanism as LM

class TimeDP:
    
    def __init__(self, epsilon, delta, mechanism_type : str):
        '''
        This class is a noise giving class. It takes the gradient and calculates new synthezied series data
        you can choose two types of mechanism. Original laplace mechanism or Bounded Laplace Mechanism
        mechanism_type : 1) laplace, 2) bouned_laplace
        '''
        self.epsilon = epsilon
        self.delta = delta 
        self.mechanism_type = mechanism_type
        self.mechanism = self._dp_mechanism()
    
    def _dp_mechanism(self):
        if self.mechanism_type == 'laplace':
            return LM.laplace_mechanism
        else :
            return BLM.boundedlaplacemechanism
        
    def calculate_dp_value(self, val, sens):
        '''
        requires value(val) and sensitivity(sens)
        '''
        if self.mechanism_type == 'laplace':
            return self.mechanism(value=val, sensitivity=sens, epsilon=self.epsilon)
        else :
            return self.mechanism(value=val, D=None, b=0.1, epsilon=self.epsilon, delta = self.delta)


class Vector_creator:
    
    def __init__(self, vector: np.array, timedp: object):
        self.vector = vector
        self.vector_length = len(vector)
        self.timedp = timedp
        self.mechanism_type =timedp.mechanism_type
        self.extended_vector = self.make_extended_vector()
        self.coordinates = self.make_coordinates()
        self.gradient_list = self.prepare_for_gradient()
        self.new_gradients = self.make_new_gradient()
        self.new_function_forms = self.calculate_function_form()
        self.new_vector = self.make_new_vector()
        
    def make_extended_vector(self):
        '''
        makes extended vector
        '''
        return make_extended_vector(vector=self.vector)
    
    def make_coordinates(self):
        '''
        makes x and y coordinate list 
        '''
        extend_vector_length = self.vector_length * 2
        xs = [x for x in x in range(0, extend_vector_length)]
        return make_coordinate_list(xs, self.extended_vector, 2)
    
    
    def prepare_for_gradient(self):
        return give_calculated_gradient_list(self.extended_vector, step=2)
    
    def create_boundary(self, gradient):
        if gradient < 0 :
            return 2*gradient, 0
        elif gradient > 0 :
            return 0, 2*gradient
        else :
            return -1, 1
            
    def create_boundary_list(self):
        return list(map(self.create_boundary, self.gradient_list))
    
    def make_new_gradient(self):
        if self.mechanism_type == 'laplace':
            return list(map(self.timedp(sens=0.1), self.gradient_list))
        else :
            boundary_list = self.create_boundary_list()
            return list(map(lambda x, y : self.timedp(x, y), self.gradient_list, boundary_list))
    
    def calculate_function_form(self):
        '''
        list of (gradient and b)
        '''
        return list(map(lambda x, y : calculate_linear_function_form(x,y), self.coordinates, self.gradient_list))
    
    def make_new_value_based_on_new_gradient(self, gradient, b, x):
        y = x*gradient + b
        return y
    
    def make_new_vector(self):
        extend_vector_length = self.vector_length * 2
        xs = [x for x in x in range(0, extend_vector_length)][1::2]
        return [self.make_new_value_based_on_new_gradient(g,b,x) for (g, b), x, in zip(self.new_function_forms, xs)]
        
        
        


Overwriting ../timeDP2/algorithm/timedp_algorithm.py


In [8]:
[ for (i,u), p in zip([(1,2),(3,4),(5,6)],['a','b','c'])]

[<function __main__.<listcomp>.<lambda>(x, y, z)>,
 <function __main__.<listcomp>.<lambda>(x, y, z)>,
 <function __main__.<listcomp>.<lambda>(x, y, z)>]

In [94]:
l = [1,2,3,4,5,6]
len(l)

6