### Time Duration Testing
**CSS-490** Demo Timing

**Topics**: Privacy, Unicity

DOI: 10.1038/srep01376

**Importance**: Importance of Time Dimension (range of collected data)

In [1]:
import pandas as pd 
import numpy as np 
from lib import preprocess, unique, geoSanitize as geo

In [2]:
## Proof: One User Compared to themselves
# One User for a single month
data = geo.filterUserMonthRange('000', '2008-10', '2008-11')
# Only keep 2 decimals of lon/lat location
oneUserMonth = unique.locationPrecision(data, 2)
# Create location ids for each unique location
oneUserMonth['loc_id'] = unique.generateLocationID(oneUserMonth)
print(oneUserMonth, end='\n\n')

42 unique location ids of 
1 total number of users. 

      UID  Latitude  Longitude                Time  loc_id
0     000     39.98     116.32 2008-10-23 02:53:04       0
1     000     39.98     116.32 2008-10-23 02:53:10       0
2     000     39.98     116.32 2008-10-23 02:53:15       0
3     000     39.98     116.32 2008-10-23 02:53:20       0
4     000     39.98     116.32 2008-10-23 02:53:25       0
...   ...       ...        ...                 ...     ...
7740  000     39.99     116.33 2008-11-23 10:29:48      26
7741  000     39.99     116.33 2008-11-23 10:29:53      26
7742  000     39.99     116.33 2008-11-23 10:29:58      26
7743  000     39.99     116.33 2008-11-23 10:30:03      26
7744  000     39.99     116.33 2008-11-23 10:30:08      26

[11372 rows x 5 columns]



In [4]:
# Therefore: With one in a population, they are completely unique
unique.inTheCrowd(5, oneUserMonth, oneUserMonth.groupby('UID'))

---
 Final Uniqueness Rate: 1.0 
---


1.0

In [3]:
## Test: Two Users in a single month
# Two users for a single month
data = geo.filterbyMonthRange(['000', '001'], '2008-10', '2009-11')
# Only keep 2 decimals
twoUsersOneMonth = unique.locationPrecision(data, 2)
# gen loc ids
twoUsersOneMonth['loc_id'] = unique.generateLocationID(twoUsersOneMonth)
print(twoUsersOneMonth, end="\n\n")

598 unique location ids of 
2 total number of users. 

       UID  Latitude  Longitude                Time  loc_id
0      000     39.98     116.32 2008-10-23 02:53:04       0
1      000     39.98     116.32 2008-10-23 02:53:10       0
2      000     39.98     116.32 2008-10-23 02:53:15       0
3      000     39.98     116.32 2008-10-23 02:53:20       0
4      000     39.98     116.32 2008-10-23 02:53:25       0
...    ...       ...        ...                 ...     ...
34985  001     39.98     116.33 2008-12-15 00:30:58      32
34986  001     39.98     116.33 2008-12-15 00:31:03      32
34987  001     39.98     116.33 2008-12-15 00:31:08      32
34988  001     39.98     116.33 2008-12-15 00:31:13      32
34989  001     39.98     116.33 2008-12-15 00:31:18      32

[282477 rows x 5 columns]



In [9]:
u = unique.inTheCrowd(5, twoUsersOneMonth, twoUsersOneMonth.groupby('UID'))
userSimilarity = 100 - (u*100)
print(f'{round(userSimilarity)}% similarity')

## Therefore, User 000 and 001 roughly share 4% of their location history

i:440 	 rate = 0.9996
---
 Final Uniqueness Rate: 0.9996 
---
0% similarity


In [4]:
## Test: All Users within a single month
# 182 Users 
data = geo.filterbyMonthRange(fromDate='2008-10', toDate='2008-11')
# Only keep 2 precion 
allUserMonth = unique.locationPrecision(data, 2)
# gen loc id
allUserMonth['loc_id'] = unique.generateLocationID(allUserMonth)
print(allUserMonth, end="\n\n")

15744 unique location ids of 
41 total number of users. 

       UID  Latitude  Longitude        Date      Time  loc_id
0      000     39.98     116.32  2008-10-23  02:53:04       0
1      000     39.98     116.32  2008-10-23  02:53:10       0
2      000     39.98     116.32  2008-10-23  02:53:15       0
3      000     39.98     116.32  2008-10-23  02:53:20       0
4      000     39.98     116.32  2008-10-23  02:53:25       0
...    ...       ...        ...         ...       ...     ...
25982  179     40.01     116.32  2008-11-29  08:15:52       8
25983  179     40.01     116.32  2008-11-29  08:15:54       8
25984  179     40.01     116.32  2008-11-29  08:15:56       8
25985  179     40.01     116.32  2008-11-29  08:15:58       8
25986  179     40.01     116.32  2008-11-29  08:16:00       8

[2946829 rows x 6 columns]



In [5]:
u = unique.inTheCrowd(5, allUserMonth, allUserMonth.groupby('UID'))
userSimilarity = 100 - (u*100)
print(f'{round(userSimilarity)}% similarity')

## Therefore, All users roughly share 31% of their location history

i:3 	 rate = 0.9996
i:5 	 rate = 0.9992
i:16 	 rate = 0.9988
i:24 	 rate = 0.9984
i:27 	 rate = 0.998
i:32 	 rate = 0.9976
i:39 	 rate = 0.9972
i:42 	 rate = 0.9968
i:52 	 rate = 0.9964
i:66 	 rate = 0.996
i:68 	 rate = 0.9956
i:79 	 rate = 0.9952
i:89 	 rate = 0.9948
i:95 	 rate = 0.9944
i:96 	 rate = 0.994
i:105 	 rate = 0.9936
i:128 	 rate = 0.9932
i:135 	 rate = 0.9928
i:138 	 rate = 0.9924
i:154 	 rate = 0.992
i:175 	 rate = 0.9916
i:187 	 rate = 0.9912
i:188 	 rate = 0.9908
i:193 	 rate = 0.9904
i:198 	 rate = 0.99
i:199 	 rate = 0.9896
i:208 	 rate = 0.9892
i:212 	 rate = 0.9888
i:235 	 rate = 0.9884
i:244 	 rate = 0.988
i:248 	 rate = 0.9876
i:260 	 rate = 0.9872
i:266 	 rate = 0.9868
i:271 	 rate = 0.9864
i:273 	 rate = 0.986
i:298 	 rate = 0.9856
i:305 	 rate = 0.9852
i:320 	 rate = 0.9848
i:321 	 rate = 0.9844
i:332 	 rate = 0.984
i:333 	 rate = 0.9836
i:345 	 rate = 0.9832
i:349 	 rate = 0.9828
i:359 	 rate = 0.9824
i:360 	 rate = 0.982
i:361 	 rate = 0.9816
i:370 	 rate = 