# Debugging for external process run by Subprocess

In [1]:
from pymeos.db.psycopg import MobilityDB
from pymeos import *
from datetime import datetime, timedelta
import time
from collections import deque
from pympler import asizeof
import gc
from enum import Enum
import numpy as np
from shapely.geometry import Point
import math
import subprocess
import shutil
import os
import sys

DIRECTORY_PATH = os.getcwd()
MATRIX_DIRECTORY_PATH = f'{DIRECTORY_PATH}/matrices'

# AIS Danish maritime dataset
DATABASE_NAME = "mobilitydb"
TPOINT_TABLE_NAME = "processed_data"
TPOINT_ID_COLUMN_NAME = "MMSI"
TPOINT_COLUMN_NAME = "traj"


class Database_connector:
    """
    Singleton class used to connect to the MobilityDB database.
    """
    
    def __init__(self):
        try: 
            connection_params = {
            "host": "localhost",
            "port": 5432,
            "dbname": DATABASE_NAME,
            "user": "postgres",
            "password": "postgres"
            }
            self.table_name = TPOINT_TABLE_NAME
            self.id_column_name = TPOINT_ID_COLUMN_NAME
            self.tpoint_column_name = TPOINT_COLUMN_NAME                  
            self.connection = MobilityDB.connect(**connection_params)

            self.cursor = self.connection.cursor()

            self.cursor.execute(f"SELECT {self.id_column_name} FROM public.{self.table_name};")
            self.ids_list = self.cursor.fetchall()
            self.ids_list = self.ids_list[:int(len(self.ids_list)*PERCENTAGE_OF_OBJECTS)]
        except Exception as e:
            pass

  
    def get_subset_of_tpoints(self, pstart, pend, xmin, ymin, xmax, ymax):
        """
        For each object in the ids_list :
            Fetch the subset of the associated Tpoints between the start and end timestamps
            contained in the STBOX defined by the xmin, ymin, xmax, ymax.
        """
        try:
           
            ids_list = [ f"'{id[0]}'"  for id in self.ids_list]
            ids_str = ', '.join(map(str, ids_list))
          
            query = f"""
                    SELECT 
                        atStbox(
                            a.{self.tpoint_column_name}::tgeompoint,
                            stbox(
                                ST_MakeEnvelope(
                                    {xmin}, {ymin}, -- xmin, ymin
                                    {xmax}, {ymax}, -- xmax, ymax
                                    4326 -- SRID
                                ),
                                tstzspan('[{pstart}, {pend}]')
                            )
                        )
                    FROM public.{self.table_name} as a 
                    WHERE a.{self.id_column_name} in ({ids_str})
                        AND a.{self.tpoint_column_name} IS NOT NULL;
                    """
            self.cursor.execute(query)
            rows = self.cursor.fetchall()
            return rows
        except Exception as e:
            self.log(e)


    def get_min_timestamp(self):
        """
        Returns the min timestamp of the tpoints columns.

        """
        try:
            
            self.cursor.execute(f"SELECT MIN(startTimestamp({self.tpoint_column_name})) AS earliest_timestamp FROM public.{self.table_name};")
            return self.cursor.fetchone()[0]
        except Exception as e:
            pass

    def get_max_timestamp(self):
        """
        Returns the max timestamp of the tpoints columns.

        """
        try:
            self.cursor.execute(f"SELECT MAX(endTimestamp({self.tpoint_column_name})) AS latest_timestamp FROM public.{self.table_name};")
            return self.cursor.fetchone()[0]
        except Exception as e:
            pass


    def close(self):
        """
        Close the connection to the MobilityDB database.
        """
        self.cursor.close()
        self.connection.close()




FPS_DEQUEUE_SIZE = 5 # Length of the dequeue to calculate the average FPS
TIME_DELTA_DEQUEUE_SIZE =  10 # Length of the dequeue to keep the keys to keep in the buffer


PERCENTAGE_OF_OBJECTS = 1 # To not overload the memory, we only take a percentage of the ships in the database
TIME_DELTA_SIZE = 240 # Number of frames associated to one Time delta
GRANULARITY = timedelta(minutes=1) # Time delta between two frames
SRID = 4326
FPS = 60



  

In [2]:
db = Database_connector()

start_date = db.get_min_timestamp()
end_date = db.get_max_timestamp()
total_frames = math.ceil( (end_date - start_date) // GRANULARITY )

timestamps = [start_date + i * GRANULARITY for i in range(total_frames)]
timestamps = [dt.replace(tzinfo=None) for dt in timestamps]
timestamps_strings = [dt.strftime('%Y-%m-%d %H:%M:%S') for dt in timestamps]

TypeError: unsupported operand type(s) for -: 'NoneType' and 'NoneType'

In [4]:
x_min = -180
y_min = -90
x_max = 180
y_max = 90

start_frame= 0
end_frame = 479

arguments = [start_frame, end_frame, PERCENTAGE_OF_OBJECTS, x_min, y_min, x_max, y_max]
arguments = [str(arg) for arg in arguments]
arguments += [timestamps_strings[0]  , str(len(timestamps)), "MINUTE", MATRIX_DIRECTORY_PATH, DATABASE_NAME, TPOINT_TABLE_NAME, TPOINT_ID_COLUMN_NAME, TPOINT_COLUMN_NAME]
            

In [22]:
arguments = ['0', '9', '0.1', '-180', '-90', '180', '90', '2023-06-01 00:00:00', '86399', 'SECOND', '/home/ali/matrices/10_0.1', 'mobilitydb', 'processed_data', 'MMSI', 'traj']
arguments

['0',
 '9',
 '0.1',
 '-180',
 '-90',
 '180',
 '90',
 '2023-06-01 00:00:00',
 '86399',
 'SECOND',
 '/home/ali/matrices/10_0.1',
 'mobilitydb',
 'processed_data',
 'MMSI',
 'traj']

In [4]:
# Command to execute Program B
command = ['/usr/bin/python3', '/home/ali/QGIS-MobilityDB/experiment9_sql_optimization/generate_matrix_before_optimization.py', *arguments]
result = subprocess.run(command, capture_output=True, text=True)

In [5]:
result

CompletedProcess(args=['/usr/bin/python3', '/home/ali/QGIS-MobilityDB/experiment9_sql_optimization/generate_matrix_before_optimization.py', '0', '9', '0.1', '-180', '-90', '180', '90', '2023-06-01 00:00:00', '86399', 'SECOND', '/home/ali/matrices/10_0.1', 'mobilitydb', 'processed_data', 'MMSI', 'traj'], returncode=0, stdout='', stderr='')

In [6]:
loaded_matrix = np.load(f"/home/ali/matrices/matrix_{0}.npy", allow_pickle=True)

FileNotFoundError: [Errno 2] No such file or directory: '/home/ali/matrices/matrix_0.npy'

In [11]:
np.count_nonzero(loaded_matrix != 'POINT EMPTY')

1595

In [1]:
arguments = ['60', '119', '0.1', '-180', '-90', '180', '90', '2023-06-01 00:00:00', '86399', 'SECOND', '/home/ali/matrices/10_0.1', 'mobilitydb', 'pymeos_demo', 'MMSI', 'trajectory']
arguments

120 - 180

['60',
 '119',
 '0.1',
 '-180',
 '-90',
 '180',
 '90',
 '2023-06-01 00:00:00',
 '86399',
 'SECOND',
 '/home/ali/matrices/10_0.1',
 'mobilitydb',
 'pymeos_demo',
 'MMSI',
 'trajectory']

In [2]:
"""

File used to create the matrices for the time deltas between the begin_frame and end_frame.

The matrices are saved in the /home/ali/matrices/ folder.


            # arguments :[ 
            # 0: begin_frame, 
            # 1: end_frame, 
            # 2: PERCENTAGE_OF_OBJECTS, 
            # 3: x_min, 4: y_min, 5: x_max, 6: y_max, 
            # 7: start_timestamp, 
            # 8: total_frames, 
            # 9: granularity, 
            # 10: matrix_directory_path, 
            # 11: database_name, 
            # 12: table_name, 
            # 13: id_column_name, 
            # 14: tpoint_column_name]

To measure the size of matrices folder : du -sh --block-size=MB matrices            

"""

import numpy as np
from shapely.geometry import Point
from pymeos.db.psycopg import MobilityDB

from pymeos import *
import os
import sys
from datetime import timedelta, datetime
from pymeos import *
import time

logs = ""
now = time.time()


args = arguments
logs += f"Args: {args}\n"
begin_frame = int(args[0])
end_frame = int(args[1])
TIME_DELTA_SIZE = end_frame - begin_frame + 1
PERCENTAGE_OF_OBJECTS = float(args[2])


SRID = 4326


DATABASE_NAME = args[11]
TPOINT_TABLE_NAME = args[12]
TPOINT_ID_COLUMN_NAME = args[13]
TPOINT_COLUMN_NAME = args[14]



class Database_connector:
    """
    Singleton class used to connect to the MobilityDB database.
    """
    
    def __init__(self):
        try: 
            connection_params = {
            "host": "localhost",
            "port": 5432,
            "dbname": DATABASE_NAME,
            "user": "postgres",
            "password": "postgres"
            }
            self.table_name = TPOINT_TABLE_NAME
            self.id_column_name = TPOINT_ID_COLUMN_NAME
            self.tpoint_column_name = TPOINT_COLUMN_NAME               
            self.connection = MobilityDB.connect(**connection_params)

            self.cursor = self.connection.cursor()

            self.cursor.execute(f"SELECT {self.id_column_name} FROM public.{self.table_name};")
            self.ids_list = self.cursor.fetchall()
            self.ids_list = self.ids_list[:int(len(self.ids_list)*PERCENTAGE_OF_OBJECTS)]
        except Exception as e:
            pass

  
    def get_subset_of_tpoints(self, pstart, pend, xmin, ymin, xmax, ymax, time_granularity, start_date):
        """
        For each object in the ids_list :
            Fetch the subset of the associated Tpoints between the start and end timestamps
            contained in the STBOX defined by the xmin, ymin, xmax, ymax.
        """
        
        try:
            ids_list = [ f"'{id[0]}'"  for id in self.ids_list]
            ids_str = ', '.join(map(str, ids_list))

            if time_granularity == "SECOND":
                time_value = 1
            elif time_granularity == "MINUTE":
                time_value = 60
            
            # return [self.tpoint_column_name, self.id_column_name, ids_str, xmin, ymin, xmax, ymax, pstart, pend, time_granularity, start_date, time_value]
            query = f"""WITH trajectories as (
                    SELECT 
                        atStbox(
                            a.{self.tpoint_column_name}::tgeompoint,
                            stbox(
                                ST_MakeEnvelope(
                                    {xmin}, {ymin}, -- xmin, ymin
                                    {xmax}, {ymax}, -- xmax, ymax
                                    0 -- SRID
                                ),
                                tstzspan('[{pstart}, {pend}]')
                            )
                        ) as trajectory
                    FROM public.{self.table_name} as a 
                    WHERE a.{self.id_column_name} in ({ids_str})),

                    resampled as (

                    SELECT tsample(traj.trajectory, INTERVAL '1 {time_granularity}', TIMESTAMP '{start_date}')  AS resampled_trajectory
                        FROM 
                            trajectories as traj)
				
                    SELECT
                            EXTRACT(EPOCH FROM (startTimestamp(rs.resampled_trajectory) - '{start_date}'::timestamp))::integer / {time_value} AS start_index ,
                            EXTRACT(EPOCH FROM (endTimestamp(rs.resampled_trajectory) - '{start_date}'::timestamp))::integer / {time_value} AS end_index,
                            rs.resampled_trajectory
                    FROM resampled as rs ;"""
        
            self.cursor.execute(query)
       
            rows = self.cursor.fetchall()
            return rows
        except Exception as e:
            print(query)
            print(e)


    def close(self):
        """
        Close the connection to the MobilityDB database.
        """
        self.cursor.close()
        self.connection.close()


MATRIX_DIRECTORY_PATH = "/home/ali/matrices"
file_name = f"{args[10]}/matrix_{begin_frame}.npy"


  
Time_granularities = {
                    # "MILLISECOND" : timedelta(milliseconds=1),
                      "SECOND" : timedelta(seconds=1),
                      "MINUTE" : timedelta(minutes=1),
                    #   "HOUR" : timedelta(hours=1),
                    }


# check if file does't already exist

if not os.path.exists(file_name):
   
    pymeos_initialize()
    db = Database_connector()

    x_min = float(args[3])
    y_min = float(args[4])
    x_max = float(args[5])
    y_max = float(args[6])

    start_date = args[7]
    start_date = datetime.strptime(start_date, '%Y-%m-%d %H:%M:%S')


    total_frames = int(args[8])
    GRANULARITY = Time_granularities[args[9]]

    timestamps = []
    for i in range(total_frames): 
        timestamps.append(start_date + i*GRANULARITY)



    p_start = timestamps[begin_frame]
    p_end = timestamps[end_frame]
    # print(p_start, p_end, x_min, y_min, x_max, y_max)
    now_db = time.time()
    rows = db.get_subset_of_tpoints(p_start, p_end, x_min, y_min, x_max, y_max, args[9], start_date)    

    

In [11]:
logs += f"Time to fetch subset of tpoints: {time.time() - now_db} seconds\n"
        
empty_point_wkt = Point().wkt  # "POINT EMPTY"
matrix = np.full((len(rows), TIME_DELTA_SIZE), empty_point_wkt, dtype=object)

time_ranges = timestamps


In [3]:
len(rows)

582

In [27]:
i=1
rows[i]

(60,
 119,
 TGeomPointSeq({POINT(8.42333 55.4718)@2023-06-01 00:01:00+02, POINT(8.42333 55.4718)@2023-06-01 00:01:01+02, POINT(8.42333 55.4718)@2023-06-01 00:01:02+02, POINT(8.42333 55.4718)@2023-06-01 00:01:03+02, POINT(8.42333 55.4718)@2023-06-01 00:01:04+02, POINT(8.42333 55.4718)@2023-06-01 00:01:05+02, POINT(8.42333 55.4718)@2023-06-01 00:01:06+02, POINT(8.42333 55.4718)@2023-06-01 00:01:07+02, POINT(8.42333 55.4718)@2023-06-01 00:01:08+02, POINT(8.42333 55.4718)@2023-06-01 00:01:09+02, POINT(8.42333 55.4718)@2023-06-01 00:01:10+02, POINT(8.42333 55.4718)@2023-06-01 00:01:11+02, POINT(8.42333 55.4718)@2023-06-01 00:01:12+02, POINT(8.42333 55.4718)@2023-06-01 00:01:13+02, POINT(8.42333 55.4718)@2023-06-01 00:01:14+02, POINT(8.42333 55.4718)@2023-06-01 00:01:15+02, POINT(8.42333 55.4718)@2023-06-01 00:01:16+02, POINT(8.42333 55.4718)@2023-06-01 00:01:17+02, POINT(8.42333 55.4718)@2023-06-01 00:01:18+02, POINT(8.42333 55.4718)@2023-06-01 00:01:19+02, POINT(8.42333 55.4718)@2023-06-01

In [28]:
traj_resampled = rows[i][2]
traj_resampled

TGeomPointSeq({POINT(8.42333 55.4718)@2023-06-01 00:01:00+02, POINT(8.42333 55.4718)@2023-06-01 00:01:01+02, POINT(8.42333 55.4718)@2023-06-01 00:01:02+02, POINT(8.42333 55.4718)@2023-06-01 00:01:03+02, POINT(8.42333 55.4718)@2023-06-01 00:01:04+02, POINT(8.42333 55.4718)@2023-06-01 00:01:05+02, POINT(8.42333 55.4718)@2023-06-01 00:01:06+02, POINT(8.42333 55.4718)@2023-06-01 00:01:07+02, POINT(8.42333 55.4718)@2023-06-01 00:01:08+02, POINT(8.42333 55.4718)@2023-06-01 00:01:09+02, POINT(8.42333 55.4718)@2023-06-01 00:01:10+02, POINT(8.42333 55.4718)@2023-06-01 00:01:11+02, POINT(8.42333 55.4718)@2023-06-01 00:01:12+02, POINT(8.42333 55.4718)@2023-06-01 00:01:13+02, POINT(8.42333 55.4718)@2023-06-01 00:01:14+02, POINT(8.42333 55.4718)@2023-06-01 00:01:15+02, POINT(8.42333 55.4718)@2023-06-01 00:01:16+02, POINT(8.42333 55.4718)@2023-06-01 00:01:17+02, POINT(8.42333 55.4718)@2023-06-01 00:01:18+02, POINT(8.42333 55.4718)@2023-06-01 00:01:19+02, POINT(8.42333 55.4718)@2023-06-01 00:01:20+02

In [29]:
start_index = rows[i][0] 
end_index = rows[i][1]
print(f" start index: {start_index}, end index: {end_index}")            

 start index: 60, end index: 119


In [17]:
values = [point.wkt for point in traj_resampled.values()]
values

['POINT (8.42333 55.4718)',
 'POINT (8.42333 55.4718)',
 'POINT (8.42333 55.4718)',
 'POINT (8.42333 55.4718)',
 'POINT (8.42333 55.4718)',
 'POINT (8.42333 55.4718)',
 'POINT (8.42333 55.4718)',
 'POINT (8.42333 55.4718)',
 'POINT (8.42333 55.4718)',
 'POINT (8.42333 55.4718)',
 'POINT (8.42333 55.4718)',
 'POINT (8.42333 55.4718)',
 'POINT (8.42333 55.4718)',
 'POINT (8.42333 55.4718)',
 'POINT (8.42333 55.4718)',
 'POINT (8.42333 55.4718)',
 'POINT (8.42333 55.4718)',
 'POINT (8.42333 55.4718)',
 'POINT (8.42333 55.4718)',
 'POINT (8.42333 55.4718)',
 'POINT (8.42333 55.4718)',
 'POINT (8.42333 55.4718)',
 'POINT (8.42333 55.4718)',
 'POINT (8.42333 55.4718)',
 'POINT (8.42333 55.4718)',
 'POINT (8.42333 55.4718)',
 'POINT (8.42333 55.4718)',
 'POINT (8.423330952380953 55.4718)',
 'POINT (8.423331904761906 55.4718)',
 'POINT (8.423332857142857 55.4718)',
 'POINT (8.42333380952381 55.4718)',
 'POINT (8.423334761904762 55.4718)',
 'POINT (8.423335714285715 55.4718)',
 'POINT (8.423336

In [21]:
values = np.array(values)
values

array(['POINT (8.42333 55.4718)', 'POINT (8.42333 55.4718)',
       'POINT (8.42333 55.4718)', 'POINT (8.42333 55.4718)',
       'POINT (8.42333 55.4718)', 'POINT (8.42333 55.4718)',
       'POINT (8.42333 55.4718)', 'POINT (8.42333 55.4718)',
       'POINT (8.42333 55.4718)', 'POINT (8.42333 55.4718)',
       'POINT (8.42333 55.4718)', 'POINT (8.42333 55.4718)',
       'POINT (8.42333 55.4718)', 'POINT (8.42333 55.4718)',
       'POINT (8.42333 55.4718)', 'POINT (8.42333 55.4718)',
       'POINT (8.42333 55.4718)', 'POINT (8.42333 55.4718)',
       'POINT (8.42333 55.4718)', 'POINT (8.42333 55.4718)',
       'POINT (8.42333 55.4718)', 'POINT (8.42333 55.4718)',
       'POINT (8.42333 55.4718)', 'POINT (8.42333 55.4718)',
       'POINT (8.42333 55.4718)', 'POINT (8.42333 55.4718)',
       'POINT (8.42333 55.4718)', 'POINT (8.423330952380953 55.4718)',
       'POINT (8.423331904761906 55.4718)',
       'POINT (8.423332857142857 55.4718)',
       'POINT (8.42333380952381 55.4718)',
     

In [22]:
values.shape

(60,)

In [30]:
matrix[i].shape

(60,)

In [39]:
matrix[i, start_index:end_index+1] = np.array([point.wkt for point in traj_resampled.values()])

ValueError: could not broadcast input array from shape (60,) into shape (0,)

In [46]:
start_index
end_index

119

In [44]:
matrix[i, 0:10]

array(['POINT EMPTY', 'POINT EMPTY', 'POINT EMPTY', 'POINT EMPTY',
       'POINT EMPTY', 'POINT EMPTY', 'POINT EMPTY', 'POINT EMPTY',
       'POINT EMPTY', 'POINT EMPTY'], dtype=object)

In [36]:
a  = np.array([point.wkt for point in traj_resampled.values()])

In [37]:
np.count_nonzero(a != 'POINT EMPTY')

60

In [38]:
a

array(['POINT (8.42333 55.4718)', 'POINT (8.42333 55.4718)',
       'POINT (8.42333 55.4718)', 'POINT (8.42333 55.4718)',
       'POINT (8.42333 55.4718)', 'POINT (8.42333 55.4718)',
       'POINT (8.42333 55.4718)', 'POINT (8.42333 55.4718)',
       'POINT (8.42333 55.4718)', 'POINT (8.42333 55.4718)',
       'POINT (8.42333 55.4718)', 'POINT (8.42333 55.4718)',
       'POINT (8.42333 55.4718)', 'POINT (8.42333 55.4718)',
       'POINT (8.42333 55.4718)', 'POINT (8.42333 55.4718)',
       'POINT (8.42333 55.4718)', 'POINT (8.42333 55.4718)',
       'POINT (8.42333 55.4718)', 'POINT (8.42333 55.4718)',
       'POINT (8.42333 55.4718)', 'POINT (8.42333 55.4718)',
       'POINT (8.42333 55.4718)', 'POINT (8.42333 55.4718)',
       'POINT (8.42333 55.4718)', 'POINT (8.42333 55.4718)',
       'POINT (8.42333 55.4718)', 'POINT (8.423330952380953 55.4718)',
       'POINT (8.423331904761906 55.4718)',
       'POINT (8.423332857142857 55.4718)',
       'POINT (8.42333380952381 55.4718)',
     

In [25]:
matrix.shape

(582, 60)

In [12]:


for i in range(len(rows)):
    if rows[i][2] is not None:
        try:
            traj_resampled = rows[i][2]

            start_index = rows[i][0] 
            end_index = rows[i][1]
            matrix[i, start_index:end_index+1] = np.array(traj_resampled.values())
    
        except:
            continue


# np.save(file_name, matrix)

# db.close()
# pymeos_finalize()
# total_time = time.time() - now
# frames_for_30_fps= 30 * total_time
# print(f"================================================================     Matrix {begin_frame} created in {total_time} seconds, {frames_for_30_fps} frames for 30 fps animation.")
# logs += f"time to create and fill the matrix {begin_frame}: {total_time} seconds\n"


In [26]:
np.count_nonzero(matrix != 'POINT EMPTY')

0