-
Notifications
You must be signed in to change notification settings - Fork 2
/
kodepos.py
62 lines (52 loc) · 1.83 KB
/
kodepos.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/env python
"""
Indonesian post codes scraper
Data source from www.nomor.net
Author: Agus Ibrahim - http://fb.me/mynameisagoes
Credits:
www.nomor.net
https://github.com/rufuspollock/csv2sqlite
http://www.crummy.com/software/BeautifulSoup
http://python-requests.org
"""
import requests
import os
import csv
import sys
import csv2sqlite
from bs4 import BeautifulSoup
from datetime import datetime
startTime = datetime.now()
arg = sys.argv
mydir = os.path.dirname(__file__)
ke, x = 1, 0
perpage = len(arg) > 1 and (arg[1].isdigit() and int(arg[1]) or 1000) or 1000
limitpage = len(arg) > 2 and (arg[2].isdigit() and int(arg[2]) or 84) or 84
fname = len(arg) > 3 and arg[3] or "data.csv"
headers = {'User-Agent': 'Mozilla/5.0 AgusIbrahim/45265'}
f = open(os.path.join(mydir, fname), "w")
tab = "no,kodepos,kel,kodewilayah,kec,dt2,kota,prov"
out = csv.writer(f)
out.writerow(tab.split(","))
def parse(ss):
global x
soup = BeautifulSoup(ss, 'html.parser')
for data in soup.findAll("tr", bgcolor="#ccffff"):
row = [s.text for s in data.findAll("td")]
out.writerow(row)
x += 1
while True:
if ke>1: w="https://www.nomor.net/_kodepos.php?_i=desa-kodepos&daerah=&jobs=&perhal=%s&urut=8&asc=0001000&sby=000000&no1=%s&no2=%s&kk=%s"%(perpage, (perpage*(ke-2))+1, ((perpage*(ke-1))+1)-1, ke)
else: w="https://www.nomor.net/_kodepos.php?_i=desa-kodepos&daerah=&jobs=&perhal=%s&sby=000000&asc=0001000&urut=8"%perpage
c = requests.get(w, headers=headers).text
print(c)
if c.find("#ccffff") < 1 or ke > limitpage:
break
parse(c)
ke += 1
f.close()
endTime = datetime.now() - startTime
print("Convert into sqlite...")
csv2sqlite.convert(os.path.join(mydir, fname), os.path.join(
mydir, os.path.splitext(fname)[0]+".db"), "kodepos")
print("All Done, %s data downloaded. time %s" % (x, endTime))