In [None]:
import sys
import os
%load_ext autoreload
%autoreload 2

# necessary to allow import of local modules
cwd = os.getcwd()
sys.path.append(cwd)

from connection import Connection


In [None]:
db = Connection("postgresql+psycopg://ying.tan@localhost/osm_db")

In [None]:
db.get_tables()


In [None]:
# TODO: create foreign key indexes from node1 to reference the vertices table

function_sql = """
CREATE OR REPLACE FUNCTION setup_bff_tables()
returns bool AS $$

begin
    drop table if exists vertices cascade;
    create table vertices (
            id bigserial primary key,
            osm_id bigint unique,
            basket_id bigint unique,
            geom geometry(Point,3857) not null
    );
    create index vertices_geom_uniq on vertices using GIST(geom);

    insert into vertices(id, basket_id, geom)
    select id, id, geom from baskets;

    insert into vertices(id, osm_id, geom)
    select node_id, node_id, geom from nodes;

    drop table if exists edges cascade;
    create table edges (
            id bigserial primary key,
            osm_id bigint,
            node1 bigint,
            node2 bigint,
            node1_point geometry(Point,3857),
            node2_point geometry(Point,3857),
            way geometry(Linestring, 3857),
            distance numeric
    );

    create index edges_node1_gist on edges using GIST(node1_point);
    create index edges_node2_gist on edges using GIST(node2_point);
    create index edges_way_gist on edges using GIST(way);

    
    return true;
end;
$$
language plpgsql;
"""

db.raw_execute(function_sql)

db.raw_execute("""
select setup_bff_tables();
""")


In [None]:
from dataclasses import dataclass, field
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
from sqlalchemy import BigInteger

class Base(DeclarativeBase):
    pass

class Edge(Base):
    __tablename__ = "edges"
    id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
    osm_id: Mapped[int] = mapped_column(BigInteger)
    node1: Mapped[int] = mapped_column(BigInteger)
    node2: Mapped[int] = mapped_column(BigInteger)

@dataclass
class EdgeData:
    id: int
    node1: int
    node2: int
    

class WayItem():
    def __init__(self, id):
        self.id = id
        self.edges = []

In [None]:
from lxml import etree

from itertools import islice
from collections import deque

tree = etree.parse("../data/nyc_footpaths.osm")

for n in tree.iter("node"):
    # add a tag to untagged nodes so that they get parsed
    if len(n) == 0: 
        n.append(etree.Element("tag", k="k", v="v"))

tree.write("../data/nyc_footpaths.osm")


In [None]:
# Parse xml and retrieve all the nodes and their edges
ways = deque()
tree = etree.iterparse("../data/nyc_footpaths.osm", events=("start", "end"))
i = 0
el = None
for action, elem in tree:
    if action == "start":
        if elem.tag == "way":
            ways.append(WayItem(elem.get("id")))
        elif elem.tag == "nd":
            wayitem = ways[-1]
            if len(wayitem.edges) == 0:
                edge = {"node1": int(elem.get('ref')), "osm_id": wayitem.id}
                wayitem.edges.append(edge)
            else:
                edge = wayitem.edges[-1]
                if "node2" in edge:
                    edge = {"node1": edge["node2"], "osm_id": wayitem.id}
                    wayitem.edges.append(edge)
                edge["node2"] = int(elem.get('ref'))

In [None]:
from sqlalchemy.orm import Session
from sqlalchemy import insert

# Bulk insert edges
i = 0
edges = []
for idx, w in enumerate(ways):
    edges += w.edges
    if len(edges) > 2000 or idx == len(ways) - 1:
        with Session(db.engine) as session, session.begin():
            session.execute(
                insert(Edge),
                edges
            )
            edges.clear()

In [None]:
# Insert the exact coordinates of the nodes, create edges, and record their distances
db.raw_execute("""
with temp_node1 as (
    select e.id, n.node_id, n.geom
    from nodes n
    inner join edges e on e.node1 = n.node_id
),
temp_node2 as (
    select e.id, n.node_id, n.geom
    from nodes n
    inner join edges e on e.node2 = n.node_id
),

temp_joined as (
    select 
        t1.id as id,
        t1.geom as node1,
        t2.geom as node2,
        st_distance(t1.geom, t2.geom) as distance,
        st_makeline(t1.geom, t2.geom) as way
    from
        temp_node1 as t1
    inner join temp_node2 t2 on t1.id = t2.id
)


update edges
    set 
        node1_point = t.node1,
        node2_point = t.node2,
        distance = t.distance,
        way = t.way
    from
        temp_joined t
    where t.id = edges.id;

""")


In [None]:
# This sql finds the closest point on each way for every node, and checks that every basket
# has a closest point on an edge.
# TODO: update this to use
sql = """
with

basket_intersections as (
select
    d.id as basketid, 
    e.id as edge_id,
    st_closestpoint(e.way, d.geom) as closest_point,
    st_distance(st_closestpoint(e.way, d.geom), d.geom) as distance,
    row_number() over (partition by d.id order by st_distance(st_closestpoint(e.way, d.geom), d.geom)) as rownum
    from baskets d
    left join edges e on st_dwithin(d.geom, e.way , 50)
)


select * from basket_intersections where closest_point is null limit 10;
"""

sanity_check = """
select
st_dwithin(e.way, b.closest_point, 1), count(1)
from basket_intersections b
join edges e on b.edge_id = e.id group by st_dwithin(e.way, b.closest_point, 1)
limit 10;
"""
db.execute(sql)


# func (edge record, point_on_edge record):
#   var newid = sequence
#   insert into edges (node1, node2, node1_point, node2_point, distance)
#     edge.node1, newid, edge.node1_point, point_on_edge, st_distance(edge.node1_point, point_on_edge)
#     newid, edge.node2, point_on_edge, edge.node2_point, st_distance(point_on_edge, edge.node2_point))
#     

In [None]:
# create a test table to test some assumptions
sql = """
drop table if exists teststuff;
create table teststuff (
    id bigserial primary key,
    val varchar
    );
"""
db.raw_execute(sql)

In [None]:
# According to the tests, function will abort in the middle if there is an exception, so transaction will be aborted, which is a good thing
# TODO: complete this function so that we can modify the vertices
function_sql = """
drop function if exists insert_node(edges, geometry, integer);
CREATE FUNCTION insert_node(edge edges, point geometry, basketid integer) RETURNS varchar AS $$
declare
    basket vertices%rowtype;
    pointid bigint;
    edgepoints record;
begin

    raise notice 'here, at the start';
    select * into basket from vertices where id = basketid;
    
    insert into vertices(geom) values (point) returning id into pointid;

    raise notice 'pointid %', pointid;

    select v1.geom as node1, v2.geom as node2 into edgepoints from edges e 
    inner join vertices v1 on e.node1 = v1.id
    inner join vertices v2 on e.node2 = v2.id;
    

    -- TODO: insert into edges point - basket
    insert into edges(node1, node2, way, distance) values
    (pointid, basketid, st_makeline(point, basket.geom), st_distance(point, basket.geom));

    insert into edges(node1, node2, way, distance) values
    (edge.node1, pointid, st_makeline(edgepoints.node1, point), st_distance(edgepoints.node1, point));
    
    insert into edges(node1, node2, way, distance) values
    (edge.node2, pointid, st_makeline(edgepoints.node2, point), st_distance(edgepoints.node2, point));
    
    raise notice 'hello %', st_astext(point);
    -- insert into teststuff(val) values('hi');
    return st_astext(point);
end;
$$ language plpgsql;
"""

db.raw_execute(function_sql)

db.raw_execute("""
select insert_node(edges.*, st_setsrid(st_makepoint(1,1), 3857), 1) order by id desc from edges limit 1;
""") 

In [None]:
r.keys()
df = pd.DataFrame(r.fetchall())

In [None]:
df

In [None]:
# Create a function that will allow us to return the end, inclusive in a generate_series
# This will then allow us to easily get a new set of nodes located along the edge of the previous one.
function_sql = """
CREATE OR REPLACE FUNCTION generate_series_with_end(start numeric, stop numeric, step numeric)
RETURNS setof numeric AS $BODY$
begin
    return query (select generate_series(start,stop,step) as res union select stop as res) order by res asc;

    return;
end;
$BODY$
language plpgsql;
"""

db.raw_execute(function_sql)

db.execute("""
select generate_series_with_end(0, 10, 2.8);
""")

In [None]:
# Now we need to create a table which has a bunch of 'sample points'. We first say that for each edge, we will
# add a list of points on the map. This is a psql function 
sql = """
with points as (
    select id, 
    st_locatealong(
        st_addmeasure(way, 0, distance), 
        generate_series_with_end(0, distance, 5)
    ) as pt
    from edges 
)
select id, st_astext(pt) from points limit 10;
 """
db.execute(sql)
# for each node here, inner join this with the original edge.
# insert into edges 


In [None]:
sql = """
select b.id, b1.id, st_astext(st_transform(b.geom, 4326)), st_astext(st_transform(b1.geom, 4326)), st_distance(b.geom, b1.geom) from baskets b cross join baskets b1 where b.id != b1.id and st_dwithin(b.geom, b1.geom, 10)  limit 10;
"""


# for each position in this list