# ID Card Identification

---

## Import modules

In [1]:
# Module for data manipulation
import pandas as pd
# Module for linear algebra calculation
import numpy as np
# Module for timing
from datetime import datetime
# Module for binary search
from bisect import bisect_left

In [2]:
def binary_search(a, x):
    elem = bisect_left(a, x)
    # check the data
    status = False
    if elem != len(a) and a[elem] == x:
        status = True
    return status

In [3]:
ID = int('1234567891234567')
ID

1234567891234567

In [4]:
# Check length of ID
def checkLength(ID):
    length = len(str(ID))
    # check length
    status = False
    if length == 16:
        status = True
    return (status, length)

In [32]:
checkLength(ID)

(True, 16)

In [30]:
# Check DOB
def checkDOB(ID):
    dob = str(ID)[6:12]
    dob_person = int(dob)
    # Check the woman's dob
    if dob_person > 400000:
        dob_person = dob_person - 400000
    # Convert into datetime
    try:
        dob_date = datetime.strptime(str(dob_person), '%d%m%y')
        if dob_date > datetime.now():
            dob_date = dob_date.replace(year = dob_date.year - 100)
    except:
        dob_date = None
    # Check age
    status = False
    age_int = None
    if dob_date != None:
        age = (datetime.now() - dob_date).days / 365.2425
        dob_date = dob_date.strftime('%d-%m-%Y')
        if age >= 17:
            age_int = int(age)
            status = True
    return (status, dob_date, age_int)

In [31]:
checkDOB(ID)

(False, None, None)

In [33]:
# Check gender
def checkGender(ID):
    gender = str(ID)[6:7]
    # Check status
    status = False
    sex = None
    if int(gender) in range(8):
        if int(gender) in range(4):
            sex = 'Man'
        else:
            sex = 'Woman'
        status = True
    return (status, sex)

In [34]:
checkGender(ID)

(True, 'Woman')

In [36]:
# Check computerized last number
def checkComputerizedLastNumber(ID):
    last_num = str(ID)[12:]
    # Check last number
    status = False
    if status != '0000':
        status = True
    return (status, last_num)

In [37]:
checkComputerizedLastNumber(ID)

(True, '4567')

In [38]:
# Load the data
df = pd.read_csv('../data/csv/Admin Area Code - ID.csv', sep = ',', dtype = {'code': object})

In [39]:
df.head()

Unnamed: 0,code,province,district,subdistrict
0,110101,Aceh,Kab. Aceh Selatan,Bakongan
1,110102,Aceh,Kab. Aceh Selatan,Kluet Utara
2,110103,Aceh,Kab. Aceh Selatan,Kluet Selatan
3,110104,Aceh,Kab. Aceh Selatan,Labuhan Haji
4,110105,Aceh,Kab. Aceh Selatan,Meukek


In [40]:
str(ID)[:6]

'123456'

In [41]:
def checkLength(ID, data):
    admin_code = str(ID)[:6]
    # Check the admin area
    status = False
    prov, district, subdistrict = None, None, None
    bin_status = binary_search(a = data['code'], x = admin_code)
    if bin_status:
        status = True
        # Get the values
        prov, district, subdistrict = df[df['code'] == admin_code].values.tolist()[0][1:]
    return (status, prov, district, subdistrict)

In [42]:
checkLength(ID, df)

(False, None, None, None)

---

In [43]:
class identificationID:
    def __init__(self, ID):
        self.ID = ID
    # Get value
    def get_value(self):
        return str(self.ID)
    # Check length of ID
    def checkLength(self):
        length = len(str(self.ID))
        # check length
        status = False
        if length == 16:
            status = True
        return (status, length)
    # Check administrative area
    def checkAdminArea(self, data):
        admin_code = str(self.ID)[:6]
        # Check the admin area
        status = False
        prov, district, subdistrict = None, None, None
        bin_status = binary_search(a = data['code'], x = admin_code)
        if bin_status:
            status = True
            # Get the values
            prov, district, subdistrict = df[df['code'] == admin_code].values.tolist()[0][1:]
        return (status, prov, district, subdistrict)
    # Check DOB
    def checkDOB(self):
        dob = str(self.ID)[6:12]
        dob_person = int(dob)
        # Check the woman's dob
        if dob_person > 400000:
            dob_person = dob_person - 400000
        # Convert into datetime
        try:
            dob_date = datetime.strptime(str(dob_person), '%d%m%y')
            if dob_date > datetime.now():
                dob_date = dob_date.replace(year = dob_date.year - 100)
        except:
            dob_date = None
        # Check age
        status = False
        age_int = None
        if dob_date != None:
            age = (datetime.now() - dob_date).days / 365.2425
            dob_date = dob_date.strftime('%d-%m-%Y')
            if age >= 17:
                age_int = int(age)
                status = True
        return (status, dob_date, age_int)
    # Check Gender
    def checkGender(self):
        gender = str(self.ID)[6:7]
        # Check status
        status = False
        sex = None
        if int(gender) in range(8):
            if int(gender) in range(4):
                sex = 'Man'
            else:
                sex = 'Woman'
            status = True
        return (status, sex)
    # Check computerized number
    def checkComputerizedNumber(self):
        last_num = str(self.ID)[12:]
        # Check last number
        status = False
        if status != '0000':
            status = True
        return (status, last_num)

In [44]:
NIK = identificationID('1234567891234567')

In [45]:
NIK.checkLength()

(True, 16)

In [46]:
NIK.checkAdminArea(data = df)

(False, None, None, None)

In [47]:
NIK.checkDOB()

(False, None, None)

In [48]:
NIK.checkGender()

(True, 'Woman')

In [49]:
NIK.checkComputerizedNumber()

(True, '4567')

---

In [50]:
NIKs = ['1234567891234567', '1234567891234567', '1234567891234567', '1234567891234567']

In [52]:
# Load the data
df = pd.read_csv('../data/csv/Admin Area Code - ID.csv', sep = ',', dtype = {'code': object})

In [53]:
df.head()

Unnamed: 0,code,province,district,subdistrict
0,110101,Aceh,Kab. Aceh Selatan,Bakongan
1,110102,Aceh,Kab. Aceh Selatan,Kluet Utara
2,110103,Aceh,Kab. Aceh Selatan,Kluet Selatan
3,110104,Aceh,Kab. Aceh Selatan,Labuhan Haji
4,110105,Aceh,Kab. Aceh Selatan,Meukek


In [54]:
# Variables for summaries
sum_number = 0
sum_valid_number = 0
sum_length = 0
sum_area = 0
sum_dob = 0
sum_gender = 0
sum_computerized = 0

# json objects
data_full = {}
data_limited = {}

# Looping
for i in range(len(NIKs)):
    # Assign NIK into object
    NIK = identificationID(NIKs[i])
    NIK_value = NIK.get_value()
    
    # NIK's length
    stat_length, nik_length = NIK.checkLength()
    # NIK's admin area
    stat_area, nik_prov, nik_district, nik_subdistrict = NIK.checkAdminArea(data = df)
    # NIK's DOB
    stat_dob, nik_dob, nik_age = NIK.checkDOB()
    # NIK's gender
    stat_gender, nik_gender = NIK.checkGender()
    # NIK's last computerized number
    stat_comp, nik_comp = NIK.checkComputerizedNumber()
    
    # Summary
    valid_all = stat_length & stat_area & stat_dob & stat_gender & stat_comp
    sum_number += 1
    sum_valid_number += valid_all
    sum_length += stat_length
    sum_area += stat_area
    sum_dob += stat_dob
    sum_gender += stat_gender
    sum_computerized += stat_comp
    
    # Convert into json - data full
    dic_data_full = {
        NIK_value: {
            'data': {
                'length': {
                    'value': NIK_value,
                    'valid': stat_length
                },
                'area': {
                    'value': {
                        'province': nik_prov,
                        'district': nik_district,
                        'subdistrict': nik_subdistrict
                    },
                    'valid': stat_area
                    },
                'dob': {
                    'value': {
                        'dob': nik_dob,
                        'age': nik_age
                    },
                    'valid': stat_dob
                },
                'gender': {
                    'value': nik_gender,
                    'valid': stat_gender
                },
                'computerized': {
                    'value': nik_comp,
                    'valid': stat_comp
                }
            },
            'valid': valid_all
        }
    }
    # Convert into json - data limited
    dic_data_limited = {
        NIK_value: valid_all
    }
    
    # Append into json
    data_full = {**data_full, **dic_data_full}
    data_limited = {**data_limited, **dic_data_limited}

# Convert into json - summary
summary = {
    'number': sum_number,
    'valid_number': sum_valid_number,
    'valid': {
        'length': sum_length,
        'area': sum_area,
        'dob': sum_dob,
        'gender': sum_gender,
        'computerized': sum_computerized
    }
}
# Convert into json - master
json_data_full = {'message': 'success','data': data_full, 'summary': summary}
json_data_limited = {'message': 'success','data': data_limited, 'summary': summary}

In [55]:
summary

{'number': 4,
 'valid_number': 0,
 'valid': {'length': 4, 'area': 0, 'dob': 0, 'gender': 4, 'computerized': 4}}

In [56]:
data_full

{'1234567891234567': {'data': {'length': {'value': '1234567891234567',
    'valid': True},
   'area': {'value': {'province': None, 'district': None, 'subdistrict': None},
    'valid': False},
   'dob': {'value': {'dob': None, 'age': None}, 'valid': False},
   'gender': {'value': 'Woman', 'valid': True},
   'computerized': {'value': '4567', 'valid': True}},
  'valid': False}}

In [57]:
len(data_full)

1

In [58]:
json_data_full

{'message': 'success',
 'data': {'1234567891234567': {'data': {'length': {'value': '1234567891234567',
     'valid': True},
    'area': {'value': {'province': None,
      'district': None,
      'subdistrict': None},
     'valid': False},
    'dob': {'value': {'dob': None, 'age': None}, 'valid': False},
    'gender': {'value': 'Woman', 'valid': True},
    'computerized': {'value': '4567', 'valid': True}},
   'valid': False}},
 'summary': {'number': 4,
  'valid_number': 0,
  'valid': {'length': 4, 'area': 0, 'dob': 0, 'gender': 4, 'computerized': 4}}}

In [59]:
data_limited

{'1234567891234567': False}

In [60]:
len(data_limited)

1

In [61]:
json_data_limited

{'message': 'success',
 'data': {'1234567891234567': False},
 'summary': {'number': 4,
  'valid_number': 0,
  'valid': {'length': 4, 'area': 0, 'dob': 0, 'gender': 4, 'computerized': 4}}}