In [1]:
from sqlalchemy import create_engine, Column, String, Integer, func, event, text
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.dialects.postgresql import ARRAY
from sqlalchemy.orm import sessionmaker
from geoalchemy2 import Geometry 
from tqdm import tqdm
from shapely.wkt import dumps

import orjson


In [2]:
%%time
with open('13_266069_040_003 L02 PAS.json', 'r') as file:
    data = orjson.loads(file.read())

import shapely
from shapely.geometry import shape

CPU times: user 10.2 s, sys: 2.2 s, total: 12.4 s
Wall time: 13.8 s


In [3]:
data[0]

{'type': 'Feature',
 'geometry': {'type': 'Polygon',
  'coordinates': [[[45837, 20092],
    [45836, 20093],
    [45835, 20094],
    [45834, 20095],
    [45833, 20096],
    [45832, 20096],
    [45831, 20096],
    [45830, 20096],
    [45829, 20096],
    [45828, 20096],
    [45827, 20096],
    [45826, 20096],
    [45825, 20096],
    [45824, 20096],
    [45823, 20096],
    [45822, 20096],
    [45821, 20096],
    [45820, 20097],
    [45819, 20098],
    [45818, 20099],
    [45817, 20100],
    [45816, 20100],
    [45815, 20100],
    [45814, 20100],
    [45813, 20100],
    [45812, 20101],
    [45811, 20102],
    [45810, 20103],
    [45809, 20104],
    [45808, 20104],
    [45807, 20104],
    [45806, 20104],
    [45805, 20104],
    [45804, 20105],
    [45803, 20106],
    [45802, 20107],
    [45801, 20108],
    [45800, 20109],
    [45799, 20110],
    [45798, 20111],
    [45797, 20112],
    [45796, 20113],
    [45795, 20114],
    [45794, 20115],
    [45793, 20116],
    [45792, 20117],
    [45791, 

In [4]:
# Create a base class for our declarative mapping
Base = declarative_base()

# Define your SQLAlchemy model
class GeometryModel(Base):
    __tablename__ = 'geometries'
    id = Column(Integer, primary_key=True)
    name = Column(String)
    centroid = Column(Geometry('POINT'))
    geom = Column(Geometry('POLYGON'))

  Base = declarative_base()


In [5]:
from sqlalchemy_utils import database_exists, create_database
engine = create_engine('postgresql://postgres@localhost:5333/test2')#,echo=True)
print(engine.url)
try:
    create_database(engine.url)
    print("created")
except:
    print("errored")
    pass

postgresql://postgres@localhost:5333/test2
created


In [6]:
# Initialize Spatialite extension
@event.listens_for(engine, "connect")
def connect(dbapi_connection, connection_record):
    with dbapi_connection.cursor() as cursor:
        cursor.execute('CREATE EXTENSION IF NOT EXISTS postgis;')

In [7]:
# Create the table
Base.metadata.create_all(engine)

# Start a session
from sqlalchemy.orm import sessionmaker
Session = sessionmaker(bind=engine)
session = Session()

In [8]:
from tqdm import tqdm

In [9]:
%%time
for _ in range(12):
    batch_size=5_000
    polygons=[]
    with  engine.connect() as conn:
        for geojson in tqdm(data):
            name = geojson["properties"]["classification"]["name"]
            shapely_geom = shape(geojson["geometry"])
            
            polygons.append(GeometryModel(name=name, geom=shapely_geom.wkt,centroid=shapely_geom.centroid.wkt))
        
            if len(polygons) == batch_size:
                session.bulk_save_objects(polygons)
                session.commit()
                polygons.clear()  # Clear the list for the next batch
        
        # Insert any remaining records that didn't fit into the final batch
        if polygons:
            session.bulk_save_objects(polygons)
            session.commit()
session.close()

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 88605/88605 [00:38<00:00, 2294.49it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 88605/88605 [00:36<00:00, 2408.04it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 88605/88605 [00:37<00:00, 2393.29it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

CPU times: user 4min 22s, sys: 2.63 s, total: 4min 25s
Wall time: 7min 21s


In [10]:
%%time
#lets make sure insert worked as expected
with  engine.connect() as conn:
    res=conn.execute(text("select count(geom) from geometries"))
    nresults=res.fetchall()
    print(nresults)

[(1063260,)]
CPU times: user 3.69 ms, sys: 46 µs, total: 3.74 ms
Wall time: 516 ms


In [13]:
%%time
with  engine.connect() as conn:
    res=conn.execute(text("select ST_AsGeoJSON(centroid) as centroid from geometries limit 1000"))
    centroids=res.fetchall()

CPU times: user 2.99 ms, sys: 68 µs, total: 3.06 ms
Wall time: 8.46 ms


In [14]:
centroids[0:100]

[('{"type":"Point","coordinates":[45862.132927504,20242.072237595]}',),
 ('{"type":"Point","coordinates":[45812.262488647,20306.061459279]}',),
 ('{"type":"Point","coordinates":[45854.852130326,20261.929323308]}',),
 ('{"type":"Point","coordinates":[45900.81372549,41104.156862745]}',),
 ('{"type":"Point","coordinates":[45901.874369386,20236.459702227]}',),
 ('{"type":"Point","coordinates":[46006.169376694,20198.662601626]}',),
 ('{"type":"Point","coordinates":[45858.962783172,20359.420550162]}',),
 ('{"type":"Point","coordinates":[45792.374420146,20346.953611663]}',),
 ('{"type":"Point","coordinates":[45750.401608789,20334.951736315]}',),
 ('{"type":"Point","coordinates":[45866.58056266,20465.875532822]}',),
 ('{"type":"Point","coordinates":[42552.904761905,46361.19047619]}',),
 ('{"type":"Point","coordinates":[45954.05971937,20293.65349076]}',),
 ('{"type":"Point","coordinates":[45759.335901387,20285.433744222]}',),
 ('{"type":"Point","coordinates":[45982.379725086,20206.214776632]}',