# Dewan Pers - Geocode

---

## Import modules

In [1]:
# Module for data manipulation
import pandas as pd
# Module for linear algebra
import numpy as np
# Module for reverse geocode - OpenstreetMap API
import geopy
from geopy.geocoders import Nominatim
# Module for reverse geocode - limit
from geopy.extra.rate_limiter import RateLimiter
from geopy.exc import GeocoderTimedOut
# Module for time calculation
import time

## Geocode

**Load the data**

In [2]:
df = pd.read_csv('../data/raw/raw_dewan_pers.csv', sep = ';')
print('Dimension of data: {rows} rows and {cols} columns'.format(rows = len(df), cols = len(df.columns)))
df.head()

Dimension of data: 1621 rows and 13 columns


Unnamed: 0,Number,Nama Media,Jenis Media,Penanggung Jawab,Pemimpin Redaksi,Badan Hukum,Provinsi,Alamat,Telp,Email,Website,Status,Tgl Approve
0,1,kaltimtoday.co,Siber,Maturidi,Maturidi,Media mandiri perkasa,Kalimantan Timur,"Jalan a wahab syahrani gang walet 1 No 2, sama...",082216557666,kaltimtoday99@gmail.com,www.kaltimtoday.co,Terverifikasi Administratif,2021-07-05
1,2,Tekape.co,Siber,Indar Ismail Jamaluddin,Indar Ismail Jamaluddin,Tempat Kreatifitas Anak Muda Pore,Sulawesi Selatan,"Perumahan New Graha Jannah Zarindah, Blok B3/3...",0853 9696 8189,redaksi@tekape.co | redaksitekape@gmail.com,https://tekape.co/,Terverifikasi Administratif,2021-07-05
2,3,digtara.com,Siber,Irwansyah Putra Nasution,Irwansyah Putra Nasution,MEDIA IBE NUSANTARA,Sumatera Utara,Komplek Taman Setia Budi Indah Blok JJ 10,0821-6648-2003/0852-6109-5279,redaksidigtara@gmail.com,digtara.com,Terverifikasi Administratif dan Faktual,2021-06-30
3,4,CNBC Indonesia,Televisi,Wahyu Daniel Panjaitan,Wahyu Daniel Panjaitan,Trans Berita Bisnis,DKI Jakarta,"Gedung Transmedia Lantai 3A, Jl. Kapten P. Ten...",021-79177000,legaltbb@gmail.com,www.cnbcindonesia.com,Terverifikasi Administratif dan Faktual,2021-06-30
4,5,Swara Kaltim,Cetak,Muhammad Syafranuddin,Mohammad Abdun Kuddu,Media Swara Kaltim,Kalimantan Timur,JL. Gerilya Gg. Keluarga Rt. 102 No. 027 Kelur...,0541 731240,mediaswarakaltim@yahoo.com,https://swarakaltim.com,Terverifikasi Administratif dan Faktual,2021-06-30


**Create a geocode function**

In [3]:
def geocoder(
    area_admin: str
    ):
    # Create vectorizer
    time.sleep(1)
    geolocator = Nominatim(user_agent = 'myGeocoder')
    geocode = RateLimiter(geolocator.geocode, min_delay_seconds = 1.5)
    # Create a list
    list_split = list(area_admin.split(' '))
    try:
        status = True
        index = len(list_split)
        while status:
            location = geocode(' '.join(list_split[::-1][0:index]), timeout = 10000)
            status = (location == None)
            index -= 1
    except GeocoderTimedOut as e:
        print('Error: geocode failed on input {} with message {}'.format(area_admin, e.message))
    try:
        json_result = {
            'latitude': location.latitude,
            'longitude': location.longitude,
            'address': location.address
            }
    except:
        json_result = {
            'latitude': None,
            'longitude': None,
            'address': None
            }
    return json_result['address'], json_result['latitude'], json_result['longitude']

**Trial and error**

In [4]:
location = 'Griya Nusa Pesona Blok I No 4 Kelurahan Talang Kelapa Kecamatan Alang-Alang Lebar'
print(location)

Griya Nusa Pesona Blok I No 4 Kelurahan Talang Kelapa Kecamatan Alang-Alang Lebar


In [5]:
loc, lat, long = geocoder(area_admin = location)
print('Location from OSM API: {loc}'.format(loc = loc))
print('Latitude             : {lat}'.format(lat = lat))
print('Longitude            : {long}'.format(long = long))

Location from OSM API: Alang Alang Lebar, Palembang, Sumatera Selatan, 30154, Indonesia
Latitude             : -2.9165496
Longitude            : 104.6814308


**Implement to data frame**

In [None]:
df[['Address Geocoder', 'Latitude', 'Longitude']] = df.apply(
    lambda x: geocoder(x['Alamat']),
    axis = 1,
    result_type = 'expand'
)

In [None]:
loc_idx = []
lat_idx = []
long_idx = []
time_idx = []
elem_idx = []
for idx in range(len(df)):
    location = df.loc[idx,'Alamat']
    # Time
    start_time = time.time()
    loc, lat, long = geocoder(area_admin = location)
    # Time
    end_time = time.time()
    # Append the value
    elem_idx.append(idx)
    time_idx.append(round(end_time - start_time, ndigits = 2))
    loc_idx.append(loc)
    lat_idx.append(lat)
    long_idx.append(long)
    # Print the index
    print(idx, '->', loc)

In [None]:
df.head()