In [1]:
import numpy as np
import pandas as pd
from scipy.stats import laplace
import scipy.stats
import psycopg2
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime


In [2]:
param_dic = {
    "host"      : "localhost",
    "database"  : "bachelorBesoeg2014",
    "user"      : "postgres",
    "password"  : "password",
    "port"      : "5432"
}

def execQuery(params_dic,query):
    try:
        connection = psycopg2.connect(**params_dic)
        cursor = connection.cursor()
        cursor.execute(query)
        record = cursor.fetchall()
        return record
    except (Exception, psycopg2.Error) as error :
        connection = False
        print ("Error while connecting to PostgreSQL", error)
    finally:
        if(connection):
            cursor.close()
            connection.close()
            print("Executed query and closed connection.")
#"""select count(*) as number_of_dates, count(DISTINCT  count_) as distinct_count from _775147;"""
query = """select time_ from _775147;"""
result = execQuery(param_dic, query)
dates = [(date[0]) for date in result]

Executed query and closed connection.
<class 'list'>
<class 'tuple'>
<class 'datetime.date'>


In [3]:
query = """select count_ from _775147;"""
result = execQuery(param_dic, query)
"""
print(result)
print(type(result))
print(type(result[0]))
print((str(result[0][0])))
"""
counts = [(count[0]) for count in result]

Executed query and closed connection.


In [4]:
#counts

In [8]:
T = 1794
print(np.log2(1794))
print(int(np.ceil(np.log2(1794))))
print(2**np.ceil(np.log2(1794)))


10.80896417491926
11
2048.0


In [12]:
def get_index(date_idx, n_layers):
    """Calculates the path of index in full binary string

    Parameters:
    date_idx (int): The node in the bouttom layer we want to calculate a path to. 
    The bottom layer has index from 0 to 2**h-1
    n_layers (int): The height of the full binary tree. 

    Returns:
    list: of index in the path from the starting from the bottom and going up

    """
    idx = []
    for i in np.arange(0,n_layers):
        if i == 0:
            idx.append(int(date_idx))
        else:
            idx.append(int(idx[i-1]//2))
    idx.append(0)
    return idx

In [13]:
T = len(dates)
print(T)
T = 2**np.ceil(np.log2(T))
print(T)
n_layers = int(np.log2(T))
print(f'n_layers = {n_layers}')
print(get_index(2**np.ceil(np.log2(T))-1,n_layers))

1794
2048.0
n_layers = 11
[2047, 1023, 511, 255, 127, 63, 31, 15, 7, 3, 1, 0]


In [14]:
print(get_index(0,1))
print(get_index(0,2))
print(get_index(0,3))
print(get_index(12,4))

[0, 0]
[0, 0, 0]
[0, 0, 0, 0]
[12, 6, 3, 1, 0]


In [15]:
print(get_index(0,n_layers))

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [16]:
np.log2(T)

11.0

In [17]:
print(2**np.ceil(0))

1.0


In [18]:
print( type(laplace(scale=(np.log2(T))/epsilon).rvs(int(np.ceil(np.log2(1794))))))
print( laplace(scale=(np.log2(T))/epsilon).rvs(int(np.ceil(np.log2(1794)))))
print( type(laplace(scale=(np.log2(T))/epsilon).rvs(1)))
print( laplace(scale=(np.log2(T))/epsilon).rvs(1))

<class 'numpy.ndarray'>
[-35.28586081  -1.23109471   2.55302941 -30.80437906  -1.46634152
   8.90055945 -32.6139728   -7.84837736   1.36831592 -11.7462362
  23.95923493]
<class 'numpy.ndarray'>
[-35.07773817]


In [19]:
class con_obs:
    
    def __init__(self, T, epsilon, dates, counts):
        
        self.T = int(2**np.ceil(np.log2(T)))
        self.epsilon = epsilon
        self.zeta = (np.log2(T))/epsilon
        self.dates = dates
        
        self.n_layers = int(np.log2(self.T))
        
        self.laplaces = self.__init_laplace()
        
        values = np.arange(0,len(dates))
        zip_iterator = zip(dates, values)
        self.idx_dict =  dict(zip_iterator)
        
        self.real_counts = counts
        
        self.noise_counts = self.__process(self.real_counts)
        
        
    def __init_laplace(self):
        laplaces = []
        for i in np.arange(0,self.n_layers+1):
            rvs = laplace(scale=self.zeta).rvs(int(2**np.ceil(i)))
            laplaces.append(rvs)
        
        return laplaces
    
    
    def __process(self, counts):
        noise_counts = np.zeros(len(dates))
        for idx, date_count in enumerate(counts):
            indices = self.get_index(idx,self.n_layers)
            indices.reverse()
            laplace_sum = 0.0
            for laplace_idx, laplace_row in enumerate(self.laplaces):
                laplace_sum = laplace_sum + laplace_row[indices[laplace_idx]]
            noise_counts[idx] = date_count  +  noise_counts[idx-1] + laplace_sum
        return noise_counts
    
    def get_index(self, date_idx, n_layers):
        """Calculates the path of index in full binary string

        Parameters:
        date_idx (int): The node in the bouttom layer we want to calculate a path to. 
        The bottom layer has index from 0 to 2**h-1
        n_layers (int): The height of the full binary tree. 

        Returns:
        list: of index in the path from the starting from the bottom and going up

        """
        idx = []
        for i in np.arange(0,n_layers):
            if i == 0:
                idx.append(int(date_idx))
            else:
                idx.append(int(idx[i-1]//2))
        idx.append(0)
        return idx
    
    def answer(self, dates):
                """Calculates the path of index in full binary string

        Parameters:
        date_idx (int): The node in the bouttom layer we want to calculate a path to. 
        The bottom layer has index from 0 to 2**h-1
        n_layers (int): The height of the full binary tree. 

        Returns:
        float: of index in the path from the starting from the bottom and going up
        """
        if len(dates) < 2:
            date_obj_0 = datetime.strptime(dates[0],'%Y-%m-%d').date()
            #print(date_obj_0)
            #print(type(date_obj_0))
            idx = self.idx_dict[date_obj_0]
            #Index is not 0
            if idx:
                return self.noise_counts[idx] - self.noise_counts[idx-1]            
            else:
                return self.noise_counts[idx]
        else:
            """
            print(self.noise_counts[0:4])
            print(self.noise_counts[self.idx_dict[dates[0]]])
            print(self.noise_counts[self.idx_dict[dates[1]]] )
            print(f'date_obj_0 idx = {self.idx_dict[date_obj_0]}')
            print(f'date_obj_1 idx = {self.idx_dict[date_obj_1]}')
            print(f'noise_counts[{self.idx_dict[date_obj_0]}] = {self.noise_counts[self.idx_dict[date_obj_0]]}')
            print(f'noise_counts[{self.idx_dict[date_obj_1]}] = {self.noise_counts[self.idx_dict[date_obj_1]]}')
            """
            date_obj_0 = datetime.strptime(dates[0],'%Y-%m-%d').date()
            date_obj_1 = datetime.strptime(dates[1],'%Y-%m-%d').date()
            
            idx_0 = self.idx_dict[date_obj_0]
            idx_1 = self.idx_dict[date_obj_1]
            
            #idx_0 is not 0
            if idx_0:
                return self.noise_counts[idx_1] - self.noise_counts[idx_0-1]            
            else:
                return self.noise_counts[idx_1]
            
        
    def real_answer(self, dates):
        if len(dates) < 2:
            date_obj_0 = datetime.strptime(dates[0],'%Y-%m-%d').date()
            return self.real_counts[self.idx_dict[date_obj_0]]
        else:
            date_obj_0 = datetime.strptime(dates[0],'%Y-%m-%d').date()
            date_obj_1 = datetime.strptime(dates[1],'%Y-%m-%d').date()
            sum_ = np.sum(self.real_counts[self.idx_dict[date_obj_0]: self.idx_dict[date_obj_1]+1])  
            return sum_

        
T = len(dates)
epsilon = 0.7
c_o = con_obs(T, epsilon, dates, counts)

In [20]:
print(c_o.answer(('2014-01-08','2014-01-09')))
print(c_o.real_answer(('2014-01-08','2014-01-09')))

print(c_o.answer(('2014-01-20',)))
print(c_o.real_answer(('2014-01-20',)))

print(c_o.answer(('2014-01-02','2014-01-05')))
print(c_o.real_answer(('2014-01-02','2014-01-05')))

535.0211868711058
538
338.3931718058029
315
406.342082191646
488
