## Establishing a connection with neonDB and testing the connection

In [1]:

from sqlalchemy import create_engine, URL
from sqlalchemy.orm import sessionmaker
from sqlalchemy.sql import text

url = URL.create(
    drivername  = "postgresql+psycopg2",
    username    = userdata.get('neondb_uid'),
    password    = userdata.get('neondb_pwd'),
    host        = userdata.get('neondb_host'),
    port        = 5432,
    database    = "neondb",
)

engine = create_engine(url, connect_args={'sslmode': "allow"} ,echo=True)
session_pool = sessionmaker(bind=engine)
with session_pool() as session:
    session.execute(text("SELECT 1"))

ModuleNotFoundError: No module named 'google'

In [None]:
import random
import time

def wait_some_seconds():
  actual_seconds = int(random.random() * 10)
  # print(f"Waiting for {actual_seconds} seconds")
  time.sleep(actual_seconds)

print("Testing")
wait_some_seconds()
print("After some waiting")

Testing
After some waiting


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Database Initializations
The following code to be saved in a file named database.py

In [3]:
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import DeclarativeBase


# DATABASE_URL = 'sqlite:///./singapore_addresses.db'
DATABASE_URL = 'sqlite:////content/drive/MyDrive/singapore_addresses.db'

# engine = create_engine(DATABASE_URL, connect_args={'check_same_thread': False})
engine = create_engine(DATABASE_URL)
session_pool = sessionmaker(autocommit=False, autoflush=False, bind=engine)
class Base(DeclarativeBase):
    pass


In [None]:
import os
os.path.exists('/content/drive/MyDrive/singapore_addresses.db')

True

# Database Models

In [4]:
# from database import Base
from sqlalchemy import Column, Integer, String, Boolean, DECIMAL, DateTime, ForeignKey, TIMESTAMP
from datetime import datetime
from sqlalchemy.orm import relationship
from sqlalchemy.orm import declared_attr
from sqlalchemy.sql import func

class TimestampMixin:
  created_at = Column(DateTime, default=func.now(), nullable=False)
  updated_at = Column(DateTime, default=func.now(), onupdate=func.now(), nullable=False)

class Location(TimestampMixin, Base):
  __tablename__ = 'locations'
  id = Column(Integer, primary_key=True, index=True)
  name = Column(String, unique=False, index=True)
  total_pages = Column(Integer)
  page_number = Column(Integer)
  latitude = Column(DECIMAL)
  longitude = Column(DECIMAL)
  postal_code = Column(String, ForeignKey('postal_code.postal_code'), nullable=True,
                       index=True, unique=False)
  postal_code_index = relationship('PostalCode', foreign_keys=[postal_code])

class PostalCode(TimestampMixin, Base):
  __tablename__ = 'postal_code'
  postal_code = Column(String, primary_key=True, index=True)
  location_id = Column(Integer, ForeignKey('locations.id'), nullable=True,
                       index=True)



In [None]:
del Location, PostalCode

# Run the following line only when the file is new

In [None]:
Base.metadata.create_all(bind=engine)

In [None]:
from sqlalchemy import inspect
from pprint import pprint
inspector = inspect(engine)
schemas = inspector.get_schema_names()

for schema in schemas:
  print("schema: %s" % schema)
  for table_name in inspector.get_table_names(schema=schema):
    print("Table: %s" % table_name)
    for column in inspector.get_columns(table_name, schema=schema):
      print(f"\t{column}")

schema: main
Table: locations
	{'name': 'id', 'type': INTEGER(), 'nullable': False, 'default': None, 'primary_key': 1}
	{'name': 'name', 'type': VARCHAR(), 'nullable': True, 'default': None, 'primary_key': 0}
	{'name': 'total_pages', 'type': INTEGER(), 'nullable': True, 'default': None, 'primary_key': 0}
	{'name': 'page_number', 'type': INTEGER(), 'nullable': True, 'default': None, 'primary_key': 0}
	{'name': 'latitude', 'type': DECIMAL(), 'nullable': True, 'default': None, 'primary_key': 0}
	{'name': 'longitude', 'type': DECIMAL(), 'nullable': True, 'default': None, 'primary_key': 0}
	{'name': 'postal_code', 'type': VARCHAR(), 'nullable': True, 'default': None, 'primary_key': 0}
	{'name': 'created_at', 'type': DATETIME(), 'nullable': True, 'default': None, 'primary_key': 0}
	{'name': 'updated_at', 'type': DATETIME(), 'nullable': True, 'default': None, 'primary_key': 0}
Table: postal_code
	{'name': 'postal_code', 'type': VARCHAR(), 'nullable': False, 'default': None, 'primary_key': 1}


In [None]:
!ls -la

total 72
drwxr-xr-x 1 root root  4096 Sep 17 07:37 .
drwxr-xr-x 1 root root  4096 Sep 17 01:55 ..
drwxr-xr-x 4 root root  4096 Sep 13 13:21 .config
drwx------ 6 root root  4096 Sep 17 07:33 drive
drwxr-xr-x 1 root root  4096 Sep 13 13:22 sample_data
-rw-r--r-- 1 root root 49152 Sep 17 07:37 singapore_addresses.db


In [5]:
from google.colab import userdata
userdata.get('onemap') # expires 20 Sep

'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiI5OWI5NWM0YmViZmNlMDlhZGJlODNjZjg1YjFhNjBhOCIsImlzcyI6Imh0dHA6Ly9pbnRlcm5hbC1hbGItb20tcHJkZXppdC1pdC0xMjIzNjk4OTkyLmFwLXNvdXRoZWFzdC0xLmVsYi5hbWF6b25hd3MuY29tL2FwaS92Mi91c2VyL3Bhc3N3b3JkIiwiaWF0IjoxNzI2NTU1MDA2LCJleHAiOjE3MjY4MTQyMDYsIm5iZiI6MTcyNjU1NTAwNiwianRpIjoiczQ2RGoxSHdKdUdrcWhKUSIsInVzZXJfaWQiOjQ0MjksImZvcmV2ZXIiOmZhbHNlfQ.iqxy504MMuejNh5KgJAxFCUiU7B12aTLr71YDs1QDXo'

# Make API calls to OneMap to get postal code translation to geo lat lng.

In [15]:
import requests
import json
from IPython.display import Markdown, display

headers = {"Authorization": userdata.get('onemap')}

url = "https://www.onemap.gov.sg/api/common/elastic/search"

params = {
    'searchVal'     : "000001",
    'returnGeom'    : "Y",
    'getAddrDetails': "N",
    'pageNum'       : "1"
}

# insert new locations record
char_count = 0
counter = 0
for j in range(122, 131):
  for i in range(1, 100):
    # wait_some_seconds()   # Throttling effect
    postal_code = f"{i:02d}{j:04d}"
    params['searchVal'] = postal_code
    response = requests.request("GET", url, headers=headers, params=params)
    try:
      r = json.loads(response.text)
    except json.JSONDecodeError as e:
      print(f"{str(e)}: [{response.text}]")
      continue
    r = json.loads(response.text)
    if r['found']:
      for index, row in enumerate(r['results']):
        # Create a session from the sessionmaker
        with session_pool() as session:
          # Query for the location where postal_code, page_number and name matches DB
          record = session.query(Location).filter(
              Location.postal_code==postal_code,
              Location.page_number==index,
              Location.name==params['searchVal']).one_or_none()
          if record:
            display_str = f"{params['searchVal']} | {r['found']:2d} | {r['totalNumPages']:2d} | {record.page_number} | [{record.latitude:1.14f}] | [{record.longitude:3.12f}] | {record.name}"
            continue
          else:
            display_str = f"{params['searchVal']} | {r['found']:2d} | {r['totalNumPages']:2d} | {index} | {row['LATITUDE']:16s} | {row['LONGITUDE']:16s} | {row['SEARCHVAL']}"
          print(display_str)
          char_count += len(display_str)

          counter += 1
          newLocation = Location(name=row['SEARCHVAL'],
                                latitude=r['results'][0]['LATITUDE'],
                                longitude=r['results'][0]['LONGITUDE'],
                                total_pages=r['found'],
                                page_number=index+1)
          session.add(newLocation)

          # check if postal code already exist in PostalCode, if not exist insert new Postal code
          postalCode = session.query(PostalCode).filter(PostalCode.postal_code==postal_code).one_or_none()
          if postalCode is None:
            newPostalCode = PostalCode(postal_code=postal_code)
            session.add(newPostalCode)
            newLocation.postal_code_index = newPostalCode
          else:
            newLocation.postal_code_index = postalCode
          session.commit()
  #  else:
  #    print(f"{params['searchVal']} | {r['found']:2d} |    |")
display(Markdown('---'))
print(f"{counter} records added; {char_count} characters")


150121 |  1 |  1 | 0 | 1.28662949776774 | 103.804920144204 | 121 BUKIT MERAH LANE 1 SINGAPORE 150121
160121 |  1 |  1 | 0 | 1.28270270194687 | 103.828491163221 | KIM TIAN TOWERS
310121 |  1 |  1 | 0 | 1.3391626233715  | 103.846243540344 | TOA PAYOH VIEW
320121 |  1 |  1 | 0 | 1.31896793997824 | 103.856341994979 | MCNAIR SPRING
350121 |  2 |  1 | 0 | 1.33576029177039 | 103.865596996588 | 121 POTONG PASIR AVENUE 1 SINGAPORE 350121
350121 |  2 |  1 | 1 | 1.33574084734373 | 103.865665596118 | JALAN BESAR TOWN COUNCIL (POTONG PASIR OFFICE)
380121 |  2 |  1 | 0 | 1.3179301794665  | 103.885314531751 | 121 GEYLANG EAST CENTRAL SINGAPORE 380121
380121 |  2 |  1 | 1 | 1.31804549523722 | 103.885311479882 | FC
460121 |  2 |  1 | 0 | 1.32964201790706 | 103.938821071117 | 121 BEDOK NORTH ROAD SINGAPORE 460121
460121 |  2 |  1 | 1 | 1.3296379833018  | 103.93882252492  | PEACEHAVEN BEDOK MULTI-SERVICE CENTRE
470121 |  1 |  1 | 0 | 1.33130517787872 | 103.909992224709 | EUNOS VISTA
510121 |  1 |  1 | 0 

---

29 records added; 2633 characters


In [14]:
with session_pool() as session:
  postalCode = session.query(PostalCode).filter(PostalCode.postal_code=='090001').one_or_none()
  if postalCode is None:
    print('is None')
  else:
    print(postalCode.postal_code)


090001


In [None]:
!rm -rf singapore_addresses.db

In [None]:
!jupyter-kernelspec list

Available kernels:
  ir         /usr/local/share/jupyter/kernels/ir
  python3    /usr/local/share/jupyter/kernels/python3


In [None]:
!pip list

Package                          Version
-------------------------------- ---------------------
absl-py                          1.4.0
accelerate                       0.34.2
aiohappyeyeballs                 2.4.0
aiohttp                          3.10.5
aiosignal                        1.3.1
alabaster                        0.7.16
albucore                         0.0.15
albumentations                   1.4.14
altair                           4.2.2
annotated-types                  0.7.0
anyio                            3.7.1
argon2-cffi                      23.1.0
argon2-cffi-bindings             21.2.0
array_record                     0.5.1
arviz                            0.18.0
asn1crypto                       1.5.1
astropy                          6.1.3
astropy-iers-data                0.2024.9.12.13.29.57
astunparse                       1.6.3
async-timeout                    4.0.3
atpublic                         4.1.0
attrs                            24.2.0
audioread             

# Backing up SQLite db on MyDrive
Do this only if you are not connected directly to the DB on Gdrive

In [None]:
!cp singapore_addresses.db /content/drive/MyDrive

# Restoring from backup

In [None]:
!cp /content/drive/MyDrive/singapore_addresses.db .

In [9]:
import pandas as pd

# read the postgresql table
table_df = pd.read_sql_table(
    "locations",
    con=engine, index_col=['id'])
table_df

Unnamed: 0_level_0,name,total_pages,page_number,latitude,longitude,postal_code,created_at,updated_at
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,DBS EVERTON PARK,3,1,1.275499,103.841398,080001,2024-09-17 09:27:00,2024-09-18 00:56:09
2,PINNACLE @ DUXTON,3,2,1.275499,103.841398,080001,2024-09-17 09:27:00,2024-09-18 00:56:09
3,THE PINNACLE@DUXTON,3,3,1.275499,103.841398,080001,2024-09-17 09:27:00,2024-09-18 00:56:09
4,MOUNT FABER GREEN,1,1,1.279457,103.817898,090001,2024-09-17 09:27:01,2024-09-18 00:56:09
5,FC,2,1,1.285242,103.779092,110001,2024-09-17 09:27:02,2024-09-18 00:56:09
...,...,...,...,...,...,...,...,...
2327,120 MARSILING RISE SINGAPORE 730120,1,1,1.439181,103.780841,730120,2024-09-18 09:18:06,2024-09-18 09:18:06
2328,DBS 120 CANBERRA CRESCENT,2,1,1.445675,103.831508,750120,2024-09-18 09:18:07,2024-09-18 09:18:07
2329,EASTWAVE @ CANBERRA,2,2,1.445675,103.831508,750120,2024-09-18 09:18:07,2024-09-18 09:18:07
2330,CHONG PANG VALE,1,1,1.434821,103.829348,760120,2024-09-18 09:18:08,2024-09-18 09:18:08


In [12]:
# do a pivot table of the dataframe, with the column as postal_code, and the count of postal_code as the value, sorted in descending order
table_df.pivot_table(index='postal_code', values='name', aggfunc='count').sort_values(by='name', ascending=False)

Unnamed: 0_level_0,name
postal_code,Unnamed: 1_level_1
540118,8
330030,6
330025,6
310045,6
270036,5
...,...
320012,1
320011,1
320010,1
320009,1
