In [1]:
from functools import reduce
from io import BytesIO
from pathlib import Path

import networkx as nx
import polars as pl
import psycopg2
from locus.utils.pl_utils import batch_iter
from tqdm import tqdm

%matplotlib widget

In [2]:
conn = psycopg2.connect(host="localhost", dbname="ldogi", user="postgres", password="1425869")

In [3]:
# Open a cursor to perform database operations
cur = conn.cursor()

# Execute a query
cur.execute("SELECT * FROM dataset")

# Retrieve query results
records = cur.fetchall()

In [4]:
records

[]

In [5]:
PROJECT_ROOT = Path().cwd().parent
PROCESSED_DATA_DIR = PROJECT_ROOT / "data" / "processed"

In [6]:
df = pl.scan_parquet(PROCESSED_DATA_DIR / "LDoGI/shards/shard_1.parquet")
print(df.head().collect())
c = df.select(pl.len()).collect()["len"][0] # count
c

shape: (5, 4)
┌───────┬────────────┬─────────────┬───────────────────────────────────┐
│ id    ┆ latitude   ┆ longitude   ┆ image                             │
│ ---   ┆ ---        ┆ ---         ┆ ---                               │
│ i64   ┆ f64        ┆ f64         ┆ binary                            │
╞═══════╪════════════╪═════════════╪═══════════════════════════════════╡
│ 29999 ┆ -34.574164 ┆ -58.453917  ┆ b"\xff\xd8\xff\xe0\x00\x10JFIF\x… │
│ 30000 ┆ 47.608571  ┆ -122.340247 ┆ b"\xff\xd8\xff\xe0\x00\x10JFIF\x… │
│ 30001 ┆ 43.26605   ┆ -79.90498   ┆ b"\xff\xd8\xff\xe0\x00\x10JFIF\x… │
│ 30002 ┆ 42.267385  ┆ -71.807584  ┆ b"\xff\xd8\xff\xe0\x00\x10JFIF\x… │
│ 30003 ┆ 57.040974  ┆ 8.495188    ┆ b"\xff\xd8\xff\xe0\x00\x10JFIF\x… │
└───────┴────────────┴─────────────┴───────────────────────────────────┘


30000

In [13]:
row_iter = df.head().collect().iter_rows(named=True)

In [16]:
row = next(row_iter)

In [17]:
row

{'id': 30000,
 'latitude': 47.608571,
 'longitude': -122.340247,
 'image': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xdb\x00C\x00\x08\x06\x06\x07\x06\x05\x08\x07\x07\x07\t\t\x08\n\x0c\x14\r\x0c\x0b\x0b\x0c\x19\x12\x13\x0f\x14\x1d\x1a\x1f\x1e\x1d\x1a\x1c\x1c $.\' ",#\x1c\x1c(7),01444\x1f\'9=82<.342\xff\xdb\x00C\x01\x08\t\t\x0c\x0b\x0c\x18\r\r\x182!\x1c!22222222222222222222222222222222222222222222222222\xff\xc0\x00\x11\x08\x01@\x01\xaa\x03\x01"\x00\x02\x11\x01\x03\x11\x01\xff\xc4\x00\x1f\x00\x00\x01\x05\x01\x01\x01\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\xff\xc4\x00\xb5\x10\x00\x02\x01\x03\x03\x02\x04\x03\x05\x05\x04\x04\x00\x00\x01}\x01\x02\x03\x00\x04\x11\x05\x12!1A\x06\x13Qa\x07"q\x142\x81\x91\xa1\x08#B\xb1\xc1\x15R\xd1\xf0$3br\x82\t\n\x16\x17\x18\x19\x1a%&\'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz\x83\x84\x85\x86\x87\x88\x89\x8a\x92\x93\x94\x95\x96\x97\x98\x99\x9a\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xb2\xb

In [18]:
# Latitude, longitude, and other data



latitude = row["latitude"]  # Replace with actual latitude
longitude = row["longitude"]  # Replace with actual longitude

# Create a BytesIO object to handle the binary data
binary_io = BytesIO(row["image"])

In [19]:
# Prepare an INSERT statement with parameters for latitude, longitude, and binary data
insert_str = "INSERT INTO dataset (latitude, longitude, image) VALUES (%s, %s, %s);"


In [20]:
# Execute the query
cur.execute(insert_str, (latitude, longitude, psycopg2.Binary(binary_io.read())))

# Commit the transaction
conn.commit()


In [22]:
cur.execute("SELECT * FROM dataset;")

# Retrieve query results
records = cur.fetchall()
print(records)

[(1, -34.574165, -58.45392, <memory at 0x7fe9201d6680>), (2, 47.60857, -122.34025, <memory at 0x7fe9201d6440>)]
