## Establishing a connection with neonDB and testing the connection

In [None]:
import sys
sys.version

In [None]:
from sqlalchemy import create_engine, URL
from sqlalchemy.orm import sessionmaker
from sqlalchemy.sql import text
from settings import NEONDB_UID, NEONDB_HOST, NEONDB_PWD

url = URL.create(
    drivername  = "postgresql+psycopg2",
    username    = NEONDB_UID,
    password    = NEONDB_PWD,
    host        = NEONDB_HOST,
    port        = 5432,
    database    = "neondb",
)

engine = create_engine(url, connect_args={'sslmode': "allow"} ,echo=True)
session_pool = sessionmaker(bind=engine)
with session_pool() as session:
    session.execute(text("SELECT 1"))

In [None]:
import random
import time

def wait_some_seconds():
  actual_seconds = int(random.random() * 10)
  # print(f"Waiting for {actual_seconds} seconds")
  time.sleep(actual_seconds)

print("Testing")
wait_some_seconds()
print("After some waiting")

# Database Initializations
The following code to be saved in a file named database.py

In [6]:
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import DeclarativeBase


DATABASE_URL = 'sqlite:///./singapore_addresses.db'
# DATABASE_URL = 'sqlite:////content/drive/MyDrive/singapore_addresses.db'

# engine = create_engine(DATABASE_URL, connect_args={'check_same_thread': False})
engine = create_engine(DATABASE_URL)
session_pool = sessionmaker(autocommit=False, autoflush=False, bind=engine)
class Base(DeclarativeBase):
    pass


In [None]:
import os
os.path.exists('singapore_addresses.db')

In [None]:
import pytz
import re
[tz for tz in pytz.all_timezones if re.search(pattern='s[a-z]*g', string=tz.lower())]

# Database Models

In [4]:
from database import Base
from sqlalchemy import Column, Integer, String, DECIMAL, DateTime, ForeignKey, TEXT, types
from datetime import datetime
from sqlalchemy.orm import relationship
from sqlalchemy.orm import declared_attr
from sqlalchemy.sql import func
import pytz

class TimestampMixin:
  created_at =        Column(DateTime, default=func.now(tz=pytz.timezone('Singapore')), nullable=False)
  updated_at =        Column(DateTime, default=func.now(tz=pytz.timezone('Singapore')), onupdate=func.now(), nullable=False)

class Location(TimestampMixin, Base):
  __tablename__ = 'locations'
  id =                Column(Integer, primary_key=True, index=True)
  name =              Column(String, unique=False, index=True)
  latitude =          Column(DECIMAL)
  longitude =         Column(DECIMAL)
  postal_code =       Column(String, ForeignKey('postal_code.postal_code'), nullable=True,
                             index=True, unique=False)
  postal_code_index = relationship('PostalCode', foreign_keys=[postal_code])

class PostalCode(TimestampMixin, Base):
  __tablename__ = 'postal_code'
  postal_code =       Column(String, primary_key=True, index=True)
  location_id =       Column(Integer, ForeignKey('locations.id'), nullable=True,
                             index=True)

class OneMapResponse(TimestampMixin, Base):
  __tablename__ =     'onemap_response'
  id =                Column(Integer, primary_key=True, index=True)
  total_pages =       Column(Integer)
  page_number =       Column(Integer)
  total_records =     Column(Integer)
  record_index =      Column(Integer)
  response =          Column(TEXT)
  postal_code =       Column(String, ForeignKey('postal_code.postal_code'),
                             index=True, unique=False)
  postal_code_index = relationship('PostalCode', foreign_keys=[postal_code])




# Run the following line only when the file is new

In [None]:
# if location and postalcode table does not exist, run the following command

Base.metadata.create_all(bind=engine)

In [None]:
from sqlalchemy import inspect
from pprint import pprint
inspector = inspect(engine)
schemas = inspector.get_schema_names()

for schema in schemas:
  print("schema: %s" % schema)
  for table_name in inspector.get_table_names(schema=schema):
    print("Table: %s" % table_name)
    for column in inspector.get_columns(table_name, schema=schema):
      print(f"\t{column}")

In [None]:
from enum import Enum

class testE(Enum):
  postal_district=0
  start=1
  end=2

print(testE.postal_district.value)


# Make API calls to OneMap to get postal code translation to geo lat lng.

In [None]:
with session_pool() as session:
  postalCode = session.query(PostalCode).filter(PostalCode.postal_code=='179094').one_or_none()
  if postalCode is None:
    print('is None')
  else:
    print(postalCode.postal_code)


In [None]:
!jupyter-kernelspec list

In [None]:
import pandas as pd

# read the postgresql table
table_df = pd.read_sql_table(
    "locations",
    con=engine, index_col=['id'])
table_df

In [7]:
table_df.to_excel('/content/drive/MyDrive/locations.xlsx', index=False)

In [None]:
# do a pivot table of the dataframe, with the column as postal_code, and the max of total_pages ,  sorted in descending order
table_df.pivot_table(index='postal_code', values='total_pages', aggfunc='max').sort_values(by='total_pages', ascending=False)

# table_df.pivot_table(index='postal_code', columns=['Highest', 'Number'], values=['total_pages', 'total_pages'], aggfunc={'Highest':'max', 'Number':'count'}).sort_values(by='total_pages', ascending=False)
# table_df.pivot_table(index='postal_code', values='name', aggfunc='count').sort_values(by='name', ascending=False)
# {'Score': 'mean', 'Grade': 'first'}

In [None]:
max_page = table_df['total_pages'].max()
table_df[table_df['total_pages'] == max_page]

In [None]:
table_df['postal_code_number'] = table_df['postal_code'].astype(int)

new_df = pd.DataFrame(columns=['start code', 'end code', 'record count'])
# count the number of records in table_df where postal_code_number is between 0 to 10000
total = 0
for i in range(99):
  start = i*10000
  end = start + 9999
  count = table_df[(table_df['postal_code_number'] >= start) & (table_df['postal_code_number'] <= end)].count()
  if count['postal_code'] == 0:
    continue
  new_df.loc[len(new_df)] = [start, end, count['postal_code']]

  # print(f"{start:06d}-{end:06d}: {count['postal_code']}")
  total += count['postal_code']
display(new_df)
print(f"{'Total':13s}: {total}")


In [None]:
singapore_postal_district = '''
01 01,02,03,04,05,06 Raffles Place,Cecil,Marina,People's Park
02 07,08 Anson,Tanjong Pagar
03 14,15,16 Queenstown,Tiong Bahru
04 09,10 Telok Blangah,Harbourfront
05 11,12,13 Pasir Panjang,Hong Leong Garden,Clementi New Town
06 17 High Street,Beach Road (part)
07 18,19 Middle Road,Golden Mile
08 20,21 Little India
09 22,23 Orchard,Cairnhill,River Valley
10 24,25,26,27 Ardmore,Bukit Timah,Holland Road,Tanglin
11 28,29,30 Watten Estate,Novena,Thomson
12 31,32,33 Balestier,Toa Payoh,Serangoon
13 34,35,36,37 Macpherson,Braddell
14 38,39,40,41 Geylang,Eunos
15 42,43,44,45 Katong,Joo Chiat,Amber Road
16 46,47,48 Bedok,Upper East Coast,Eastwood,Kew Drive
17 49,50,81 Loyang,Changi
18 51,52 Tampines,Pasir Ris
19 53,54,55,82 Serangoon Garden,Hougang,Punggol
20 56,57 Bishan,Ang Mo Kio
21 58,59 Upper Bukit Timah,Clementi Park,Ulu Pandan
22 60,61,62,63,64 Jurong
23 65,66,67,68 Hillview,Dairy Farm,Bukit Panjang,Choa Chu Kang
24 69,70,71 Lim Chu Kang,Tengah
25 72,73 Kranji,Woodgrove
26 77,78 Upper Thomson,Springleaf
27 75,76 Yishun,Sembawang
28 79,80 Seletar
'''

table_df['postal_code_number'] = table_df['postal_code'].astype(int)
df = pd.DataFrame(columns=['Postal Sector', 'Codes', 'Street Names', 'Records Count'])
for line in singapore_postal_district.split('\n'):
  if not line:
    continue
  try:
    _ = line.split(' ')
    postal_sector = _[0]
    codes = _[1] #.split(',')
    records_count = 0
    for postal_district in codes.split(','):
      start = int(postal_district) * 10000
      end = start + 9999
      count = table_df[(table_df['postal_code_number'] >= start) & (table_df['postal_code_number'] <= end)].count()
      records_count += count['postal_code']
      # print(f"{start:06d}-{end:06d}: {count['postal_code']}")
    street_names = ' '.join(_[2:]).replace(',', ', ')
    df.loc[len(df)] = [postal_sector, codes, street_names, records_count]
  except ValueError as e:
    print (f"str(e): {line}")
df.set_index('Postal Sector', inplace=True)
df

In [None]:
df['Records Count'].sum()