In [None]:
# Library used: https://github.com/glassonion1/anonypy

In [None]:
"""
This notebook k-anonymized tabular data based on Mondrian algorithm
"""

In [1]:
import anonypy
import pandas as pd


In [2]:
# Load custom made dataset
data = pd.read_csv("/home/ammar/Desktop/LMU/Data Security NLP/data/data_datasecurity.csv")
del data["Unnamed: 0"]

In [3]:
data.shape

(809, 13)

In [None]:
# Basic data description

In [5]:
data.unstack().groupby(level=0).nunique()

AGE                  45
AGENCY                1
ASSIGNMENT           67
CITY_RESIDENT?        2
DEGREE               12
EMAIL               809
FIRST_NAME          762
GENDER                2
IP_ADDRESS          809
LAST_NAME           804
RACE/ETHNICITY       10
TITLE                10
YEARS_IN_SERVICE     41
dtype: int64

In [6]:
list(data.columns)

['TITLE',
 'ASSIGNMENT',
 'RACE/ETHNICITY',
 'AGE',
 'YEARS_IN_SERVICE',
 'DEGREE',
 'CITY_RESIDENT?',
 'AGENCY',
 'FIRST_NAME',
 'LAST_NAME',
 'EMAIL',
 'GENDER',
 'IP_ADDRESS']

In [7]:
data.head()

Unnamed: 0,TITLE,ASSIGNMENT,RACE/ETHNICITY,AGE,YEARS_IN_SERVICE,DEGREE,CITY_RESIDENT?,AGENCY,FIRST_NAME,LAST_NAME,EMAIL,GENDER,IP_ADDRESS
0,POLICE OFFICER,ADULT/JUVENILE BOOKING,AFRICAN AMERICAN,42.0,7.0,HGH SCHOOL GRADUATE,YES,BPD,Nat,Bilsland,nbilsland1@fda.gov,FEMALE,61.238.184.75
1,POLICE OFFICER,ADULT/JUVENILE BOOKING,AFRICAN AMERICAN,51.0,15.0,B.A. OR B.S. COMPLETED & RECEIVED,NO,BPD,Myrlene,Antoniutti,mantoniutti4@smugmug.com,FEMALE,19.38.52.95
2,POLICE OFFICER,ADULT/JUVENILE BOOKING,AFRICAN AMERICAN,44.0,15.0,HGH SCHOOL GRADUATE,NO,BPD,Kellie,Runacres,krunacres5@unicef.org,FEMALE,98.182.132.18
3,POLICE OFFICER,ADULT/JUVENILE BOOKING,AFRICAN AMERICAN,41.0,19.0,1 YEAR COLLEGE OR LESS,NO,BPD,Naoma,Losemann,nlosemann6@umich.edu,FEMALE,80.0.24.91
4,POLICE OFFICER,ANALYTICAL INTELLIGENCE SEC,AFRICAN AMERICAN,41.0,18.0,2 YEARS COLLEGE; NO A. A.,NO,BPD,Cinda,Parlott,cparlotta@histats.com,FEMALE,195.64.119.229


In [12]:

columns = list(data.columns)
categorical = set(('TITLE', 'ASSIGNMENT','RACE/ETHNICITY','DEGREE','CITY_RESIDENT?','AGENCY',"FIRST_NAME","LAST_NAME",'GENDER',"IP_ADDRESS"))

def main():
    df = pd.DataFrame(data=data, columns=columns)

    for name in categorical:
        df[name] = df[name].astype("category")

    feature_columns = ['TITLE', 'ASSIGNMENT','DEGREE','CITY_RESIDENT?','GENDER',"AGENCY","FIRST_NAME","LAST_NAME",'GENDER',"IP_ADDRESS"]
    sensitive_columns =  ["AGE","YEARS_IN_SERVICE","RACE/ETHNICITY"]
    ls = []
    for sensitive_column in sensitive_columns:
        p = anonypy.Preserver(df, feature_columns, sensitive_column)
        rows = p.anonymize_k_anonymity(k=2)
        dfn = pd.DataFrame(rows)
        ls.append(dfn)
        print(dfn.shape)
    return ls

In [13]:
out = main()

(784, 11)
(781, 11)
(551, 11)


In [None]:
# 2-anoymity based on each of "AGE","YEARS_IN_SERVICE","RACE/ETHNICITY"

In [15]:
out[0].head()

Unnamed: 0,TITLE,ASSIGNMENT,DEGREE,CITY_RESIDENT?,GENDER,AGENCY,FIRST_NAME,LAST_NAME,IP_ADDRESS,AGE,count
0,"POLICE COLONEL,POLICE SERGEANT EID","NORTHERN DISTRICT,NEIGHBORHOOD PATROL DIV,NORT...",B.A. OR B.S. COMPLETED & RECEIVED,"NO,YES",FEMALE,BPD,"Marjorie,Mirabella,Bernie","Ciciotti,Withey,Feaver","35.134.6.29,111.165.164.62,198.99.11.154",39.0,3
1,"POLICE LIEUTENANT EID,POLICE SERGEANT EID","DEPLOYMENT UNIT,EQUAL OPPORTUNITY & DIVERSITY SEC",B.A. OR B.S. COMPLETED & RECEIVED,NO,MALE,BPD,"Murvyn,Judon","Bracci,Kewish","73.118.2.193,249.41.86.182",38.0,1
2,"POLICE LIEUTENANT EID,POLICE SERGEANT EID","DEPLOYMENT UNIT,EQUAL OPPORTUNITY & DIVERSITY SEC",B.A. OR B.S. COMPLETED & RECEIVED,NO,MALE,BPD,"Murvyn,Judon","Bracci,Kewish","73.118.2.193,249.41.86.182",43.0,1
3,"POLICE LIEUTENANT EID,POLICE MAJOR/DIRECTOR","GENERAL ACCOUNTABILITY OFFICE,INFORMATION SERV...",B.A. OR B.S. COMPLETED & RECEIVED,NO,FEMALE,BPD,"Beverley,Maurita","Welsby,Postins","134.185.169.71,235.15.164.191",39.0,1
4,"POLICE LIEUTENANT EID,POLICE MAJOR/DIRECTOR","GENERAL ACCOUNTABILITY OFFICE,INFORMATION SERV...",B.A. OR B.S. COMPLETED & RECEIVED,NO,FEMALE,BPD,"Beverley,Maurita","Welsby,Postins","134.185.169.71,235.15.164.191",51.0,1


In [18]:
out[1].head()

Unnamed: 0,TITLE,ASSIGNMENT,DEGREE,CITY_RESIDENT?,GENDER,AGENCY,FIRST_NAME,LAST_NAME,IP_ADDRESS,YEARS_IN_SERVICE,count
0,"POLICE COLONEL,POLICE SERGEANT EID","NORTHERN DISTRICT,NEIGHBORHOOD PATROL DIV,NORT...",B.A. OR B.S. COMPLETED & RECEIVED,"NO,YES",FEMALE,BPD,"Marjorie,Mirabella,Bernie","Ciciotti,Withey,Feaver","35.134.6.29,111.165.164.62,198.99.11.154",7.0,1
1,"POLICE COLONEL,POLICE SERGEANT EID","NORTHERN DISTRICT,NEIGHBORHOOD PATROL DIV,NORT...",B.A. OR B.S. COMPLETED & RECEIVED,"NO,YES",FEMALE,BPD,"Marjorie,Mirabella,Bernie","Ciciotti,Withey,Feaver","35.134.6.29,111.165.164.62,198.99.11.154",10.0,1
2,"POLICE COLONEL,POLICE SERGEANT EID","NORTHERN DISTRICT,NEIGHBORHOOD PATROL DIV,NORT...",B.A. OR B.S. COMPLETED & RECEIVED,"NO,YES",FEMALE,BPD,"Marjorie,Mirabella,Bernie","Ciciotti,Withey,Feaver","35.134.6.29,111.165.164.62,198.99.11.154",17.0,1
3,"POLICE LIEUTENANT EID,POLICE SERGEANT EID","DEPLOYMENT UNIT,EQUAL OPPORTUNITY & DIVERSITY SEC",B.A. OR B.S. COMPLETED & RECEIVED,NO,MALE,BPD,"Murvyn,Judon","Bracci,Kewish","73.118.2.193,249.41.86.182",16.0,1
4,"POLICE LIEUTENANT EID,POLICE SERGEANT EID","DEPLOYMENT UNIT,EQUAL OPPORTUNITY & DIVERSITY SEC",B.A. OR B.S. COMPLETED & RECEIVED,NO,MALE,BPD,"Murvyn,Judon","Bracci,Kewish","73.118.2.193,249.41.86.182",20.0,1


In [19]:
out[2].head()

Unnamed: 0,TITLE,ASSIGNMENT,DEGREE,CITY_RESIDENT?,GENDER,AGENCY,FIRST_NAME,LAST_NAME,IP_ADDRESS,RACE/ETHNICITY,count
0,"POLICE COLONEL,POLICE SERGEANT EID","NORTHERN DISTRICT,NEIGHBORHOOD PATROL DIV,NORT...",B.A. OR B.S. COMPLETED & RECEIVED,"NO,YES",FEMALE,BPD,"Marjorie,Mirabella,Bernie","Ciciotti,Withey,Feaver","35.134.6.29,111.165.164.62,198.99.11.154",AFRICAN AMERICAN,1
1,"POLICE COLONEL,POLICE SERGEANT EID","NORTHERN DISTRICT,NEIGHBORHOOD PATROL DIV,NORT...",B.A. OR B.S. COMPLETED & RECEIVED,"NO,YES",FEMALE,BPD,"Marjorie,Mirabella,Bernie","Ciciotti,Withey,Feaver","35.134.6.29,111.165.164.62,198.99.11.154",WHITE,2
2,"POLICE LIEUTENANT EID,POLICE SERGEANT EID","DEPLOYMENT UNIT,EQUAL OPPORTUNITY & DIVERSITY SEC",B.A. OR B.S. COMPLETED & RECEIVED,NO,MALE,BPD,"Murvyn,Judon","Bracci,Kewish","73.118.2.193,249.41.86.182",WHITE,2
3,"POLICE LIEUTENANT EID,POLICE MAJOR/DIRECTOR","GENERAL ACCOUNTABILITY OFFICE,INFORMATION SERV...",B.A. OR B.S. COMPLETED & RECEIVED,NO,FEMALE,BPD,"Beverley,Maurita","Welsby,Postins","134.185.169.71,235.15.164.191",AFRICAN AMERICAN,1
4,"POLICE LIEUTENANT EID,POLICE MAJOR/DIRECTOR","GENERAL ACCOUNTABILITY OFFICE,INFORMATION SERV...",B.A. OR B.S. COMPLETED & RECEIVED,NO,FEMALE,BPD,"Beverley,Maurita","Welsby,Postins","134.185.169.71,235.15.164.191",WHITE,1
