## Establishing a connection with neonDB and testing the connection

In [None]:
import sys
sys.version

'3.10.12 (main, Jul 29 2024, 16:56:48) [GCC 11.4.0]'

In [None]:
from google.colab import userdata
from sqlalchemy import create_engine, URL
from sqlalchemy.orm import sessionmaker
from sqlalchemy.sql import text

url = URL.create(
    drivername  = "postgresql+psycopg2",
    username    = userdata.get('neondb_uid'),
    password    = userdata.get('neondb_pwd'),
    host        = userdata.get('neondb_host'),
    port        = 5432,
    database    = "neondb",
)

engine = create_engine(url, connect_args={'sslmode': "allow"} ,echo=True)
session_pool = sessionmaker(bind=engine)
with session_pool() as session:
    session.execute(text("SELECT 1"))

2024-09-12 09:31:05,968 INFO sqlalchemy.engine.Engine select pg_catalog.version()


INFO:sqlalchemy.engine.Engine:select pg_catalog.version()


2024-09-12 09:31:05,970 INFO sqlalchemy.engine.Engine [raw sql] {}


INFO:sqlalchemy.engine.Engine:[raw sql] {}


2024-09-12 09:31:06,071 INFO sqlalchemy.engine.Engine select current_schema()


INFO:sqlalchemy.engine.Engine:select current_schema()


2024-09-12 09:31:06,074 INFO sqlalchemy.engine.Engine [raw sql] {}


INFO:sqlalchemy.engine.Engine:[raw sql] {}


2024-09-12 09:31:06,173 INFO sqlalchemy.engine.Engine show standard_conforming_strings


INFO:sqlalchemy.engine.Engine:show standard_conforming_strings


2024-09-12 09:31:06,175 INFO sqlalchemy.engine.Engine [raw sql] {}


INFO:sqlalchemy.engine.Engine:[raw sql] {}


2024-09-12 09:31:06,272 INFO sqlalchemy.engine.Engine BEGIN (implicit)


INFO:sqlalchemy.engine.Engine:BEGIN (implicit)


2024-09-12 09:31:06,275 INFO sqlalchemy.engine.Engine SELECT 1


INFO:sqlalchemy.engine.Engine:SELECT 1


2024-09-12 09:31:06,282 INFO sqlalchemy.engine.Engine [generated in 0.00742s] {}


INFO:sqlalchemy.engine.Engine:[generated in 0.00742s] {}


2024-09-12 09:31:06,381 INFO sqlalchemy.engine.Engine ROLLBACK


INFO:sqlalchemy.engine.Engine:ROLLBACK


In [None]:
import random
import time

def wait_some_seconds():
  actual_seconds = int(random.random() * 10)
  # print(f"Waiting for {actual_seconds} seconds")
  time.sleep(actual_seconds)

print("Testing")
wait_some_seconds()
print("After some waiting")

Testing
After some waiting


# Database Initializations
The following code to be saved in a file named database.py

In [6]:
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import DeclarativeBase


DATABASE_URL = 'sqlite:///./singapore_addresses.db'
# DATABASE_URL = 'sqlite:////content/drive/MyDrive/singapore_addresses.db'

# engine = create_engine(DATABASE_URL, connect_args={'check_same_thread': False})
engine = create_engine(DATABASE_URL)
session_pool = sessionmaker(autocommit=False, autoflush=False, bind=engine)
class Base(DeclarativeBase):
    pass


In [1]:
import os
os.path.exists('singapore_addresses.db')

True

In [3]:
import pytz
pytz.all_timezones

['Africa/Abidjan',
 'Africa/Accra',
 'Africa/Addis_Ababa',
 'Africa/Algiers',
 'Africa/Asmara',
 'Africa/Asmera',
 'Africa/Bamako',
 'Africa/Bangui',
 'Africa/Banjul',
 'Africa/Bissau',
 'Africa/Blantyre',
 'Africa/Brazzaville',
 'Africa/Bujumbura',
 'Africa/Cairo',
 'Africa/Casablanca',
 'Africa/Ceuta',
 'Africa/Conakry',
 'Africa/Dakar',
 'Africa/Dar_es_Salaam',
 'Africa/Djibouti',
 'Africa/Douala',
 'Africa/El_Aaiun',
 'Africa/Freetown',
 'Africa/Gaborone',
 'Africa/Harare',
 'Africa/Johannesburg',
 'Africa/Juba',
 'Africa/Kampala',
 'Africa/Khartoum',
 'Africa/Kigali',
 'Africa/Kinshasa',
 'Africa/Lagos',
 'Africa/Libreville',
 'Africa/Lome',
 'Africa/Luanda',
 'Africa/Lubumbashi',
 'Africa/Lusaka',
 'Africa/Malabo',
 'Africa/Maputo',
 'Africa/Maseru',
 'Africa/Mbabane',
 'Africa/Mogadishu',
 'Africa/Monrovia',
 'Africa/Nairobi',
 'Africa/Ndjamena',
 'Africa/Niamey',
 'Africa/Nouakchott',
 'Africa/Ouagadougou',
 'Africa/Porto-Novo',
 'Africa/Sao_Tome',
 'Africa/Timbuktu',
 'Africa/

# Database Models

In [4]:
from database import Base
from sqlalchemy import Column, Integer, String, DECIMAL, DateTime, ForeignKey, TEXT, types
from datetime import datetime
from sqlalchemy.orm import relationship
from sqlalchemy.orm import declared_attr
from sqlalchemy.sql import func
import pytz

class TimestampMixin:
  created_at =        Column(DateTime, default=func.now(tz=pytz.timezone('Singapore')), nullable=False)
  updated_at =        Column(DateTime, default=func.now(tz=pytz.timezone('Singapore')), onupdate=func.now(), nullable=False)

class Location(TimestampMixin, Base):
  __tablename__ = 'locations'
  id =                Column(Integer, primary_key=True, index=True)
  name =              Column(String, unique=False, index=True)
  latitude =          Column(DECIMAL)
  longitude =         Column(DECIMAL)
  postal_code =       Column(String, ForeignKey('postal_code.postal_code'), nullable=True,
                             index=True, unique=False)
  postal_code_index = relationship('PostalCode', foreign_keys=[postal_code])

class PostalCode(TimestampMixin, Base):
  __tablename__ = 'postal_code'
  postal_code =       Column(String, primary_key=True, index=True)
  location_id =       Column(Integer, ForeignKey('locations.id'), nullable=True,
                             index=True)

class OneMapResponse(TimestampMixin, Base):
  __tablename__ =     'onemap_response'
  id =                Column(Integer, primary_key=True, index=True)
  total_pages =       Column(Integer)
  page_number =       Column(Integer)
  total_records =     Column(Integer)
  record_index =      Column(Integer)
  response =          Column(TEXT)
  postal_code =       Column(String, ForeignKey('postal_code.postal_code'), nullable=True,
                             index=True, unique=False)
  postal_code_index = relationship('PostalCode', foreign_keys=[postal_code])




# Run the following line only when the file is new

In [None]:
# if location and postalcode table does not exist, run the following command


Base.metadata.create_all(bind=engine)

In [7]:
from sqlalchemy import inspect
from pprint import pprint
inspector = inspect(engine)
schemas = inspector.get_schema_names()

for schema in schemas:
  print("schema: %s" % schema)
  for table_name in inspector.get_table_names(schema=schema):
    print("Table: %s" % table_name)
    for column in inspector.get_columns(table_name, schema=schema):
      print(f"\t{column}")

schema: main
Table: locations
	{'name': 'id', 'type': INTEGER(), 'nullable': False, 'default': None, 'primary_key': 1}
	{'name': 'name', 'type': VARCHAR(), 'nullable': True, 'default': None, 'primary_key': 0}
	{'name': 'latitude', 'type': DECIMAL(), 'nullable': True, 'default': None, 'primary_key': 0}
	{'name': 'longitude', 'type': DECIMAL(), 'nullable': True, 'default': None, 'primary_key': 0}
	{'name': 'postal_code', 'type': VARCHAR(), 'nullable': True, 'default': None, 'primary_key': 0}
	{'name': 'created_at', 'type': DATETIME(), 'nullable': False, 'default': None, 'primary_key': 0}
	{'name': 'updated_at', 'type': DATETIME(), 'nullable': False, 'default': None, 'primary_key': 0}
Table: onemap_response
	{'name': 'id', 'type': INTEGER(), 'nullable': False, 'default': None, 'primary_key': 1}
	{'name': 'total_pages', 'type': INTEGER(), 'nullable': True, 'default': None, 'primary_key': 0}
	{'name': 'page_number', 'type': INTEGER(), 'nullable': True, 'default': None, 'primary_key': 0}
	{'

In [8]:
!ls -la

'ls' is not recognized as an internal or external command,
operable program or batch file.


# Make API calls to OneMap to get postal code translation to geo lat lng.

In [None]:
with session_pool() as session:
  postalCode = session.query(PostalCode).filter(PostalCode.postal_code=='179094').one_or_none()
  if postalCode is None:
    print('is None')
  else:
    print(postalCode.postal_code)


is None


In [None]:
!jupyter-kernelspec list

Available kernels:
  ir         /usr/local/share/jupyter/kernels/ir
  python3    /usr/local/share/jupyter/kernels/python3


In [5]:
import pandas as pd

# read the postgresql table
table_df = pd.read_sql_table(
    "locations",
    con=engine, index_col=['id'])
table_df

Unnamed: 0_level_0,name,total_pages,page_number,latitude,longitude,postal_code,created_at,updated_at
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,DBS EVERTON PARK,3,1,1.275499,103.841398,080001,2024-09-22 03:22:16,2024-09-22 03:22:16
2,PINNACLE @ DUXTON,3,2,1.275499,103.841398,080001,2024-09-22 03:22:16,2024-09-22 03:22:16
3,THE PINNACLE@DUXTON,3,3,1.275499,103.841398,080001,2024-09-22 03:22:16,2024-09-22 03:22:16
4,MOUNT FABER GREEN,1,1,1.279457,103.817898,090001,2024-09-22 03:22:16,2024-09-22 03:22:16
5,FC,2,1,1.285242,103.779092,110001,2024-09-22 03:22:16,2024-09-22 03:22:16
...,...,...,...,...,...,...,...,...
23414,157D TAMPINES ROAD SINGAPORE 535148,1,1,1.362176,103.891889,535148,2024-10-01 03:39:31,2024-10-01 03:39:31
23415,PRINCETON VALE,1,1,1.366997,103.879585,545148,2024-10-01 03:39:31,2024-10-01 03:39:31
23416,SERANGOON GARDEN ESTATE,1,1,1.369806,103.863581,555148,2024-10-01 03:39:31,2024-10-01 03:39:31
23417,YEW LIAN PARK,1,1,1.351319,103.834363,575148,2024-10-01 03:39:32,2024-10-01 03:39:32


In [7]:
table_df.to_excel('/content/drive/MyDrive/locations.xlsx', index=False)

In [18]:
# do a pivot table of the dataframe, with the column as postal_code, and the max of total_pages ,  sorted in descending order
table_df.pivot_table(index='postal_code', values='total_pages', aggfunc='max').sort_values(by='total_pages', ascending=False)

# table_df.pivot_table(index='postal_code', columns=['Highest', 'Number'], values=['total_pages', 'total_pages'], aggfunc={'Highest':'max', 'Number':'count'}).sort_values(by='total_pages', ascending=False)
# table_df.pivot_table(index='postal_code', values='name', aggfunc='count').sort_values(by='name', ascending=False)
# {'Score': 'mean', 'Grade': 'first'}

Unnamed: 0_level_0,total_pages
postal_code,Unnamed: 1_level_1
574369,19
574349,18
423637,18
574370,17
423950,15
...,...
530620,1
530619,1
530618,1
530617,1


In [None]:
max_page = table_df['total_pages'].max()
table_df[table_df['total_pages'] == max_page]

Unnamed: 0_level_0,name,total_pages,page_number,latitude,longitude,postal_code,created_at,updated_at
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
18871,OCBC UPPER THOMSON ROAD - 7 ELEVEN,19,1,1.353398,103.834645,574369,2024-09-26 03:45:00,2024-09-26 03:45:00
18872,THOMSON GARDEN ESTATE,19,2,1.353398,103.834645,574369,2024-09-26 03:45:00,2024-09-26 03:45:00
18873,THOMSON GARDEN ESTATE,19,3,1.353398,103.834645,574369,2024-09-26 03:45:00,2024-09-26 03:45:00
18874,THOMSON GARDEN ESTATE,19,4,1.353398,103.834645,574369,2024-09-26 03:45:00,2024-09-26 03:45:00
18875,THOMSON GARDEN ESTATE,19,5,1.353398,103.834645,574369,2024-09-26 03:45:00,2024-09-26 03:45:00
18876,THOMSON GARDEN ESTATE,19,6,1.353398,103.834645,574369,2024-09-26 03:45:00,2024-09-26 03:45:00
18877,THOMSON GARDEN ESTATE,19,7,1.353398,103.834645,574369,2024-09-26 03:45:00,2024-09-26 03:45:00
18878,THOMSON GARDEN ESTATE,19,8,1.353398,103.834645,574369,2024-09-26 03:45:00,2024-09-26 03:45:00
18879,THOMSON GARDEN ESTATE,19,9,1.353398,103.834645,574369,2024-09-26 03:45:00,2024-09-26 03:45:00
18880,THOMSON GARDEN ESTATE,19,10,1.353398,103.834645,574369,2024-09-26 03:45:00,2024-09-26 03:45:00


In [None]:
table_df['postal_code_number'] = table_df['postal_code'].astype(int)

new_df = pd.DataFrame(columns=['start code', 'end code', 'record count'])
# count the number of records in table_df where postal_code_number is between 0 to 10000
total = 0
for i in range(99):
  start = i*10000
  end = start + 9999
  count = table_df[(table_df['postal_code_number'] >= start) & (table_df['postal_code_number'] <= end)].count()
  if count['postal_code'] == 0:
    continue
  new_df.loc[len(new_df)] = [start, end, count['postal_code']]

  # print(f"{start:06d}-{end:06d}: {count['postal_code']}")
  total += count['postal_code']
display(new_df)
print(f"{'Total':13s}: {total}")


Unnamed: 0,start code,end code,record count
0,50000,59999,27
1,80000,89999,60
2,90000,99999,84
3,100000,109999,109
4,110000,119999,30
5,120000,129999,342
6,130000,139999,33
7,140000,149999,284
8,150000,159999,202
9,160000,169999,242


Total        : 15444


In [None]:
singapore_postal_district = '''
01 01,02,03,04,05,06 Raffles Place,Cecil,Marina,People's Park
02 07,08 Anson,Tanjong Pagar
03 14,15,16 Queenstown,Tiong Bahru
04 09,10 Telok Blangah,Harbourfront
05 11,12,13 Pasir Panjang,Hong Leong Garden,Clementi New Town
06 17 High Street,Beach Road (part)
07 18,19 Middle Road,Golden Mile
08 20,21 Little India
09 22,23 Orchard,Cairnhill,River Valley
10 24,25,26,27 Ardmore,Bukit Timah,Holland Road,Tanglin
11 28,29,30 Watten Estate,Novena,Thomson
12 31,32,33 Balestier,Toa Payoh,Serangoon
13 34,35,36,37 Macpherson,Braddell
14 38,39,40,41 Geylang,Eunos
15 42,43,44,45 Katong,Joo Chiat,Amber Road
16 46,47,48 Bedok,Upper East Coast,Eastwood,Kew Drive
17 49,50,81 Loyang,Changi
18 51,52 Tampines,Pasir Ris
19 53,54,55,82 Serangoon Garden,Hougang,Punggol
20 56,57 Bishan,Ang Mo Kio
21 58,59 Upper Bukit Timah,Clementi Park,Ulu Pandan
22 60,61,62,63,64 Jurong
23 65,66,67,68 Hillview,Dairy Farm,Bukit Panjang,Choa Chu Kang
24 69,70,71 Lim Chu Kang,Tengah
25 72,73 Kranji,Woodgrove
26 77,78 Upper Thomson,Springleaf
27 75,76 Yishun,Sembawang
28 79,80 Seletar
'''

table_df['postal_code_number'] = table_df['postal_code'].astype(int)
df = pd.DataFrame(columns=['Postal Sector', 'Codes', 'Street Names', 'Records Count'])
for line in singapore_postal_district.split('\n'):
  if not line:
    continue
  try:
    _ = line.split(' ')
    postal_sector = _[0]
    codes = _[1] #.split(',')
    records_count = 0
    for postal_district in codes.split(','):
      start = int(postal_district) * 10000
      end = start + 9999
      count = table_df[(table_df['postal_code_number'] >= start) & (table_df['postal_code_number'] <= end)].count()
      records_count += count['postal_code']
      # print(f"{start:06d}-{end:06d}: {count['postal_code']}")
    street_names = ' '.join(_[2:]).replace(',', ', ')
    df.loc[len(df)] = [postal_sector, codes, street_names, records_count]
  except ValueError as e:
    print (f"str(e): {line}")
df.set_index('Postal Sector', inplace=True)
df

Unnamed: 0_level_0,Codes,Street Names,Records Count
Postal Sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,10203040506,"Raffles Place, Cecil, Marina, People's Park",27
2,708,"Anson, Tanjong Pagar",60
3,141516,"Queenstown, Tiong Bahru",728
4,910,"Telok Blangah, Harbourfront",193
5,111213,"Pasir Panjang, Hong Leong Garden, Clementi New...",405
6,17,"High Street, Beach Road (part)",0
7,1819,"Middle Road, Golden Mile",56
8,2021,Little India,128
9,2223,"Orchard, Cairnhill, River Valley",0
10,24252627,"Ardmore, Bukit Timah, Holland Road, Tanglin",137


In [None]:
df['Records Count'].sum()

12203