# Noise Generation

In [11]:
from ast import literal_eval
import collections
import os
import re

import pandas as pd
import numpy as np

In [12]:
PROJECT_ROOT = '..'
BUILD_PATH = os.path.join(PROJECT_ROOT, 'build')
TRAJ_FILE = os.path.join(BUILD_PATH, 'selected.Jan.csv')

### Hyperparameters

In [13]:
theta = 0.25

## Compute Risk

### Construct ITDs

In [14]:
class Trajectory(tuple):
    """ Movement trajectory (Def. 1). Different from the definition,
    we do not record time in trjectory, which should not affect the
    computation. """
    def __init__(self, locations):
        self.locations = tuple(locations)
        
    def __hash__(self):
       return hash(self.locations)
    
    def __repr__(self):
        return '<Trajectory {}>'.format(str(self.locations))
    
class ITDBuilder(object):
    """ The builder class for ITD. """
    def __init__(self, uid):
        self.uid = uid
        self.trajs = collections.Counter()
        
    def add(self, trajectory):
        self.trajs[trajectory] += 1
        
class ITD(object):
    """ Individual trajectory database (Def. 2, 3, 4, 5). """
    def __init__(self, builder, traj_freq):
        self.uid = builder.uid
        self.traj2idx = {}
        self.idx2traj = {}
        self.freq = []
        self.risk = []
        for idx, (traj, cnt) in enumerate(builder.trajs.items()):
            self.idx2traj[idx] = traj
            self.traj2idx[traj] = idx
            self.freq += cnt,                    # Def. 3
            self.risk += cnt / traj_freq[traj],  # Def. 5, Eq. 2
        self.privacy_risk = sum(self.risk)       # Def. 4, Eq. 1
        
    def riskest(self):
        # Note that if there is multiple traj. with the maximum risk,
        # this methed select the one that fisrt appears.
        idx, _ = max(enumerate(self.risk), key=lambda x: x[1])
        return self.idx2traj[idx]
        
    def __repr__(self):
        return '<ITD uid:{}>'.format(self.uid)

In [15]:
# Load data
df = pd.read_csv(open(TRAJ_FILE, 'r'),
                 header=0,
                 names=['uid', 'date', 'traj_site', 'traj_arr'],
                 parse_dates=['date'])
df['traj_site'] = df['traj_site'].apply(lambda x: literal_eval(x))
df['traj_arr'] = df['traj_arr'].apply(lambda x: literal_eval(x))

traj_freq = collections.Counter()
itd_bdlrs = {}
for _, (uid, date, sites, areas) in df.iterrows():
    # Costruct trajectory using area codes.
    traj = Trajectory(areas)
    if uid not in itd_bdlrs:
        itd_bdlrs[uid] = ITDBuilder(uid)
    itd_bdlrs[uid].add(traj)
    traj_freq[traj] += 1
    
# A mapping from user ID to the crossponding ITD
ITDS = {bdlr.uid: ITD(bdlr, traj_freq) for bdlr in itd_bdlrs.values()}

#### Print Examples (Optional)

In [16]:
print(ITDS[6].idx2traj)
print(ITDS[6].freq)
print(ITDS[6].risk)
print(ITDS[6].privacy_risk)
print(ITDS[6].riskest())

{0: <Trajectory (70, 73)>, 1: <Trajectory (6, 7, 3)>, 2: <Trajectory (3,)>, 3: <Trajectory (9, 7)>, 4: <Trajectory (9, 4)>, 5: <Trajectory (4, 3, 6, 10, 19, 11, 20, 19)>, 6: <Trajectory (70,)>}
[1, 1, 1, 2, 1, 1, 5]
[1.0, 0.5, 0.00078003120124805, 0.25, 0.5, 1.0, 0.5]
3.750780031201248
<Trajectory (70, 73)>


### Find ITD Relations

In [17]:
# Map the riskest traj. to their according ITDs
riskest2uid = collections.defaultdict(list)
for uid, itd in ITDS.items():
    riskest2uid[itd.riskest()] += uid,
    
# Map traj. to their containing ITDs
traj2uid = collections.defaultdict(list)
for uid, itd in ITDS.items():
    for traj in itd.traj2idx:
        traj2uid[traj] += uid,

RELATION = collections.defaultdict(lambda: [[], []])
for uid in ITDS:
    riskest = ITDS[uid].riskest()
    # find string relations
    RELATION[uid][0].extend(riskest2uid[riskest]) 
    # find weak relations
    for uid_ in traj2uid[riskest]:
        if uid_ != uid:
            RELATION[uid_][1] += uid,

#### Print Examples (Optional)

In [18]:
print('Reations with user 6')
print('strong:', RELATION[6][0])
print('weak:  ', RELATION[6][1])

Reations with user 6
strong: [6]
weak:   [105, 182, 188, 487, 587, 681, 698, 741, 1052, 1370, 1585, 1795, 1971, 1972, 2025, 2219, 2278, 2393, 2449, 2583, 2592, 2709]
