In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import psycopg2
from postgis.psycopg import register
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
from shapely.geometry import box
import project_path
from db_importer.settings import *
import scipy.stats
from scipy.stats import ks_2samp


class DatabaseConnection(object):
    def __enter__(self):
        self.conn = psycopg2.connect(f"dbname='{DB_NAME}' user='{DB_USER}' password='{DB_PASSWORD}' host='{DB_HOST}' port='{DB_PORT}'")
        self.conn.autocommit = True

        register(self.conn)
        self.cur = self.conn.cursor()

        return self.cur

    def __exit__(self, exc_type, exc_val, exc_tb):
        if exc_tb is None:
            self.conn.commit()
            self.cur.close()
            self.conn.close()
        else:
            self.conn.rollback()
            self.cur.close()
            self.conn.close()

In [None]:
start_rect = box(13.4155, 52.5213, 13.4167, 52.5219)
end_rect = box(13.4149, 52.5223, 13.4159, 52.5229)

def get_SUMO_durations(x):
    
    mask_first = x.apply(lambda coord: start_rect.contains(Point(coord['vehicle_x'], coord['vehicle_y'])), axis=1)
    mask_end = x.apply(lambda coord: end_rect.contains(Point(coord['vehicle_x'], coord['vehicle_y'])), axis=1)
    vehicle_id = x.iloc[0].vehicle_id
    try:
        start = x[mask_first==True].iloc[0].timestep_time
        end = x[mask_end==True].iloc[0].timestep_time
        return (vehicle_id, end - start)
    except: 
        return None, None

In [None]:
from statsmodels.distributions.empirical_distribution import ECDF


with DatabaseConnection() as cur:
    cur.execute("""SELECT
            filename,
            ST_AsGeoJSON(geom_raw) :: json->'coordinates' AS coordinates,
            timestamps,
            velos
        FROM ride
        WHERE st_intersects(geom,
            st_setsrid(st_makebox2d(st_makepoint(13.4155, 52.5213), st_makepoint(13.4167, 52.5219)), 4326))
          AND st_intersects(geom, 
            st_setsrid(st_makebox2d(st_makepoint(13.4149, 52.5223), st_makepoint(13.4159, 52.5229)), 4326))
          """)
    res = cur.fetchall()
    df = pd.DataFrame(res, columns=['filename', 'coords', 'timestamps', 'velo'])

def get_ride_durations(arr):
    coords = np.array(arr[0])
    timestamps = np.array(arr[1])
    first = None
    last = None
    for i, coord in enumerate(coords):
        point = Point(coord[0],coord[1])
        if (first is None) & start_rect.contains(point):
            first = i
            continue
        if (first is not None) & end_rect.contains(point):
            last = i
            break
    try:
        res = (timestamps[last] - timestamps[first]).total_seconds()
        if res > 150:
            return None
    except:
        res = None
    return res

test = df.apply(lambda x: get_ride_durations(x[['coords', 'timestamps']].to_numpy()), axis=1)
simra_durations = test[~test.isnull()].values

In [None]:
from scipy.stats import ks_2samp
import os
import pandas as pd
import matplotlib.pyplot as plt

IMPORT_DIRECTORY = '../../im_impl/alex/grid_search_results'

files = []
dfs = []
for r, d, f in os.walk(IMPORT_DIRECTORY, followlinks=True):
    for file in f:
        if '.xml' in file:
            continue
        split = file.split('_')
        weights = (split[2], split[3], split[4], split[5].split('.')[0])
        df = pd.read_csv(os.path.join(r, file), delimiter=';')
        df = df[df.vehicle_id.str.startswith('flow', na=False)]
        dfs.append((weights, df))
        
len(dfs)

In [None]:
durations_arr = []
for df in dfs:
    durations = df[1].groupby('vehicle_id').apply(lambda x: get_SUMO_durations(x)[1]).dropna()
    durations_arr.append((df[0], durations.values))

In [None]:
durations_arr

In [None]:
### The lower the higher res[3], the better the fit. Howerver, it should be noted, that
### res[2] (the number of cyclists that passed the intersection successfully) should be high.
### Here, the best fit is ('2.00', '1.00', '1.00', '15'), however, only 7 cyclists managed
### to pass the intersection. Therefore, the parameterization can not be considered suffiecient.

### When considering to use a specific parameterization for the intersection model, one should check
### the evaluation results for other scenarios with this notebook as well.

res = []
for durations in durations_arr:
    test_stat = ks_2samp(durations[1], simra_durations)
    res.append(np.array([durations[0], len(durations[1]), test_stat[1]]))
res = np.array(res)    
res[res[:, 2].argsort()][::-1]