In [1]:
import sys
sys.path.append('..')

import attack

In [2]:
import geopandas as gp
import pandas as pd
from tqdm import tqdm
import numpy as np
import libpysal
import itertools
from joblib import Parallel, delayed
import random


In [3]:
# load data frames (~ 40 secs)
import load_data_module

In [4]:
# Load preprocessed data
raw_full_trip_gdf, raw_trip_sp_gdf, raw_trip_ep_gdf, tesselation_gdf = load_data_module.raw_full_trip_gdf, load_data_module.raw_trip_sp_gdf, load_data_module.raw_trip_ep_gdf, load_data_module.tesselation_gdf
assert len(raw_full_trip_gdf) == len(raw_trip_sp_gdf) == len(raw_trip_ep_gdf)

In [5]:
# # Select n random person ids from the dataset
# n_users = 4
# raw_full_trip_gdf, raw_trip_sp_gdf, raw_trip_ep_gdf = attack.select_n_random_users_from_dataframes(n_users, raw_full_trip_gdf, raw_trip_sp_gdf, raw_trip_ep_gdf)

## Evaluation Functions

# Merge Start Points (SP) and End Points (EP) with Tessellation

In [6]:
gdf_sp, gdf_ep = attack.match_boundary_points_with_tessellation(raw_trip_sp_gdf, raw_trip_ep_gdf, tesselation_gdf)

# Extract Full Trips that Start and End within Tessellation Area

In [7]:
full_trip_gdf, trip_sp_gdf, trip_ep_gdf, gdf_sp, gdf_ep = attack.extract_trips_that_start_end_in_tessellation(raw_full_trip_gdf, raw_trip_sp_gdf, raw_trip_ep_gdf, gdf_sp, gdf_ep)


Number of trips that start and end wihin tessellation area: 996
Number of trips outside and therefore dropped: 28


## Build mapping of trip chains

In [8]:
mapping_cont_trips = attack.build_trip_chain_mapping(gdf_sp, gdf_ep)

100%|██████████| 996/996 [00:17<00:00, 57.09it/s]


In [9]:
attack.evaluate_trip_chaining(mapping_cont_trips, full_trip_gdf)

Number of edges (matched) between trips: 340
Number of wrong matches: 12


## Merge trips according to matching

In [10]:
full_trips_concat_gdf, trip_concat_dict = attack.merge_trips_from_matching(gdf_sp, mapping_cont_trips, full_trip_gdf)

Building trip chains...


100%|██████████| 996/996 [00:00<00:00, 10060.68it/s]


Done.
Merging trips...


100%|██████████| 996/996 [00:14<00:00, 69.70it/s] 


Done.
Number of trips that were matched at least once: 985/996
Concatenating MERGED and UNMERGED trips...
Done.


In [11]:
gdf_sp_concat, trip_sp_gdf_concat, gdf_ep_concat, trip_ep_gdf_concat = attack.extract_concatenated_trips(full_trips_concat_gdf, gdf_sp, trip_sp_gdf, gdf_ep, trip_ep_gdf)

## Build Clustering after Concatenation Step

In [12]:
clustering_concat = attack.build_clustering_after_concatenation(full_trips_concat_gdf, trip_concat_dict, full_trip_gdf)

Number of unique clusters: 667


# Build Home Locations (HL)

## From Start Points (SPs)

We use the SP-tessellation matching that still contains all SP (and potential HL), and not just the SP and EP of the concatenated trips. We do this, because we do not want loose potential HL contributed of substrip concatenated in a chain.

In [13]:
gdf_hl_combined_sp = attack.build_hl_from_start_points(gdf_sp)

 There are 60 disconnected components.
 There are 36 islands with ids: 1, 2, 20, 21, 28, 29, 30, 31, 35, 36, 37, 38, 39, 43, 44, 54, 55, 62, 63, 64, 66, 69, 77, 78, 79, 80, 81, 84, 93, 94, 112, 113, 130, 133, 134, 135.


## From End Points (EPs)

In [14]:
gdf_hl_combined_ep = attack.build_hl_from_end_points(gdf_ep)

 There are 104 disconnected components.
 There are 40 islands with ids: 4, 20, 27, 38, 39, 68, 80, 85, 86, 93, 94, 95, 96, 98, 99, 107, 112, 113, 114, 123, 126, 133, 134, 137, 152, 159, 173, 175, 178, 179, 196, 197, 218, 228, 251, 259, 261, 271, 286, 290.


## Merge (concatenate)

In [15]:
gp_combined, HL_table = attack.concatenate_hl(gdf_hl_combined_sp, gdf_hl_combined_ep)

 There are 126 disconnected components.
 There are 53 islands with ids: 4, 29, 52, 78, 79, 126, 146, 147, 154, 155, 164, 176, 177, 179, 189, 190, 191, 194, 209, 216, 225, 226, 229, 240, 254, 264, 277, 284, 285, 295, 302, 303, 315, 316, 342, 343, 348, 349, 354, 356, 363, 374, 375, 377, 390, 392, 394, 397, 405, 406, 424, 425, 444.


Count unique PERSON_IDs per HL:  PERSON_ID
1            89
2            28
3             6
4             2
5             1
dtype: int64
Number of users for which at least on Home Location has been identified:  54
Number of unique HL tiles: 126


# Match trips with Home Location tiles

## Match concatenated trips

In [16]:
HL_table_se_concat, unmatched_trips, double_assigned_trips, nr_unmatched = attack.match_trips_to_HL(gp_combined, HL_table, trip_sp_gdf_concat, trip_ep_gdf_concat, full_trips_concat_gdf)

Number of unmatched trajectories (concatenated) that do neither start nor end in a HL tile: 88/667
Number of trajectories (concatenated) that start AND end in a HL tile: 337/667
Number of trips that match different HL tiles with their SP and EP: 164


## Assign double matched trips to one unique HL

Iterate over all double matched trips and compare them to all other trips that are uniquely assigned in their respective potential HLs that they have been matched with. Then take the HL with the single maximum lcss score between the trip under question and any trip of the assigned HL tiles.

In [17]:
HL_table_trips_concat = attack.assign_double_matched_trips_to_unique_hl(HL_table_se_concat, full_trips_concat_gdf, unmatched_trips, double_assigned_trips, nr_unmatched)


[Parallel(n_jobs=-2)]: Using backend LokyBackend with 7 concurrent workers.
[Parallel(n_jobs=-2)]: Done   4 tasks      | elapsed:   10.5s
[Parallel(n_jobs=-2)]: Done  11 tasks      | elapsed:   11.8s
[Parallel(n_jobs=-2)]: Done  18 tasks      | elapsed:   14.9s
[Parallel(n_jobs=-2)]: Done  27 tasks      | elapsed:   17.3s
[Parallel(n_jobs=-2)]: Done  36 tasks      | elapsed:   19.8s
[Parallel(n_jobs=-2)]: Done  47 tasks      | elapsed:   22.5s
[Parallel(n_jobs=-2)]: Done  58 tasks      | elapsed:   27.2s
[Parallel(n_jobs=-2)]: Done  71 tasks      | elapsed:   30.5s
[Parallel(n_jobs=-2)]: Done  84 tasks      | elapsed:   33.1s
[Parallel(n_jobs=-2)]: Done  99 tasks      | elapsed:   36.2s
[Parallel(n_jobs=-2)]: Done 114 tasks      | elapsed:   40.3s
[Parallel(n_jobs=-2)]: Done 131 tasks      | elapsed:   45.8s
[Parallel(n_jobs=-2)]: Done 148 tasks      | elapsed:   49.4s
[Parallel(n_jobs=-2)]: Done 167 tasks      | elapsed:   53.7s
[Parallel(n_jobs=-2)]: Done 186 tasks      | elapsed:   

## Get trajectories that happened during the same time

In [18]:
full_trips_concat_gdf_overlap_dict = attack.getTripOverlaps(full_trips_concat_gdf)

[Parallel(n_jobs=-2)]: Using backend LokyBackend with 7 concurrent workers.
[Parallel(n_jobs=-2)]: Done   4 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-2)]: Done  11 tasks      | elapsed:    2.8s
[Parallel(n_jobs=-2)]: Done  18 tasks      | elapsed:    4.4s
[Parallel(n_jobs=-2)]: Done  27 tasks      | elapsed:    7.0s
[Parallel(n_jobs=-2)]: Done  36 tasks      | elapsed:    9.2s
[Parallel(n_jobs=-2)]: Done  47 tasks      | elapsed:   11.8s
[Parallel(n_jobs=-2)]: Done  58 tasks      | elapsed:   14.4s
[Parallel(n_jobs=-2)]: Done  71 tasks      | elapsed:   17.5s
[Parallel(n_jobs=-2)]: Done  84 tasks      | elapsed:   20.1s
[Parallel(n_jobs=-2)]: Done  99 tasks      | elapsed:   23.0s
[Parallel(n_jobs=-2)]: Done 114 tasks      | elapsed:   25.8s
[Parallel(n_jobs=-2)]: Done 131 tasks      | elapsed:   29.0s
[Parallel(n_jobs=-2)]: Done 148 tasks      | elapsed:   32.2s
[Parallel(n_jobs=-2)]: Done 167 tasks      | elapsed:   35.6s
[Parallel(n_jobs=-2)]: Done 186 tasks      | elapsed:   

## Create clustering after HL assignment step

In [28]:
def getOverlappingTrips(traj_id_list, full_trips_concat_gdf_overlap_dict):
    """This function finds the overlapping trips for a list of trajectory IDs.

    Args:
        traj_id_list (_type_): List of trajectory IDs.
    """
    overlapping_trips = [item for sublist in [full_trips_concat_gdf_overlap_dict[t] for t in traj_id_list] for item in sublist] # we first get a list of lists and then flatten it
    return overlapping_trips

def findLargestNonSimultaneousSubset(traj_id_list, full_trips_concat_gdf_overlap_dict, RANDOMIZED_SEARCH_THRESHOLD=30, RANDOMIZED_SEARCH_ITERATIONS=1000):
    """This function finds the largest subset of trajectories that are not simultaneous. It uses a determinitic algorithm if the length of the trajectory ID list is smaller than a threshold and a randomized algorithm if the length is larger than the given threshold.

    Args:
        traj_id_list (list): List of trajectory IDs.

    Returns:
        list: List of trajectory IDs that are not simultaneous.
    """
    len_traj_id_list = len(traj_id_list)

    # Deterministic (optimal) algorithm
    if len_traj_id_list <= RANDOMIZED_SEARCH_THRESHOLD:
        # We create a list of all possible subsets of the trajectory ID list with decreasing length
        # We do this iteratively to not overload the memory
        for i in range(len_traj_id_list):
            # Create a list of all possible subsets of the trajectory ID list with length len_traj_id_list - i
            subsets = list(itertools.combinations(traj_id_list, len_traj_id_list - i))
        
            # Sort the list by length of the subsets
            subsets.sort(key=len, reverse=True)

            # Loop through the list of subsets
            for subset in subsets:
                # get all trips that do overlap in time with any of the trips in subset
                overlapping_trips = getOverlappingTrips(subset, full_trips_concat_gdf_overlap_dict)

                # Check if the subset is not simultaneous
                if all([t not in overlapping_trips for t in subset]):
                    # If so, return the subset as list
                    return list(subset)

    # Randomized algorithm (numeric approximation)
    else:
        def randomized_subset_search(traj_id_list):
            subsets = []
            candidates = traj_id_list.copy()
            id = candidates.pop(random.randrange(len(candidates)))
            subset = [id]
            while candidates:
                next = candidates.pop(random.randrange(len(candidates)))

                if all([t not in getOverlappingTrips(subset, full_trips_concat_gdf_overlap_dict) for t in subset]):
                    subset.append(next)
            
            subsets.append(subset)
            return max(subsets, key=len)
        
        # We run the randomized subset search 100 times and return the longest subset
        print(f'Running randomized subset for search for {RANDOMIZED_SEARCH_ITERATIONS} iterations with {len(traj_id_list)} trajectories...')
        result = Parallel(n_jobs=-2, verbose=10)(delayed(randomized_subset_search)(traj_id_list) for _ in range(RANDOMIZED_SEARCH_ITERATIONS))
        print('Done. Length of longest subset: ', len(max(result, key=len)))
        return max(result, key=len)


def build_clustering_after_HL_assignment(HL_table_trips_concat, full_trip_gdf, trip_concat_dict, full_trips_concat_gdf_overlap_dict):
    # This creates the array with clustering IDs after the HL assignment step
    clustering_after_HL = {}
    HL_table_dict = (HL_table_trips_concat.groupby('HL_ID')
        .apply(lambda x: list(dict(x.TRIP_ID).values()))
        .to_dict())

    for index, HL in tqdm(enumerate(HL_table_dict), total=len(HL_table_dict)):
        # Skip HL_ID -1, we will assign a clustering ID to these trips later
        if HL == -1:
            continue

        print(len(HL_table_dict[HL]))

        # find the largest subset of trips that are not simultaneous
        non_simultaneous_subset = findLargestNonSimultaneousSubset(HL_table_dict[HL], full_trips_concat_gdf_overlap_dict)

        # assign hl_id -1 to all trips that are not part of the largest subset
        for trip in HL_table_dict[HL]:
            if trip not in non_simultaneous_subset:
                HL_table_dict[-1].append(trip)

        # Loop through all trips that are assigned to this HL_ID and assign the same clustering ID to all of them
        for trip in non_simultaneous_subset:
            clustering_after_HL[attack.getIndexInList(trip, full_trip_gdf)] = index

            # Check if this trip is a concatenated trip and assign the same clustering ID to all trips that are part of the concatenated trip
            if trip in trip_concat_dict:
                for t in trip_concat_dict[trip]:
                    clustering_after_HL[attack.getIndexInList(t, full_trip_gdf)] = index

    # Assign clustering IDs to all hl_id -1 trips (these are the trips that were not successfully assigned to any HL_ID)
    for index, unm_trip in enumerate(HL_table_dict[-1]):
        clustering_after_HL[attack.getIndexInList(unm_trip, full_trip_gdf)] = index + len(HL_table_dict) - 1 # -1 because we don't want to count the -1 HL_ID in the length of HL_table_dict

        # Check if this trip is a concatenated trip and assign the same clustering ID to all trips that are part of the concatenated trip
        if unm_trip in trip_concat_dict:
            for t in trip_concat_dict[unm_trip]:
                clustering_after_HL[attack.getIndexInList(t, full_trip_gdf)] = index + len(HL_table_dict) - 1

    return clustering_after_HL, HL_table_dict

In [35]:
clustering_after_HL, HL_table_dict = build_clustering_after_HL_assignment(HL_table_trips_concat, 
    full_trip_gdf, 
    trip_concat_dict, 
    full_trips_concat_gdf_overlap_dict)

 16%|█▋        | 14/86 [00:00<00:00, 131.65it/s]

7
1
1
1
1
18
10
4
7
15
4
4
2
1
1
1
2
24
1
3
2
23
65
Running randomized subset for search for 1000 iterations with 65 trajectories...


[Parallel(n_jobs=-2)]: Using backend LokyBackend with 7 concurrent workers.
[Parallel(n_jobs=-2)]: Done   4 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-2)]: Done  11 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-2)]: Done  18 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-2)]: Batch computation too fast (0.1652s.) Setting batch_size=2.
[Parallel(n_jobs=-2)]: Done  27 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-2)]: Done  36 tasks      | elapsed:    1.3s
[Parallel(n_jobs=-2)]: Batch computation too fast (0.0177s.) Setting batch_size=4.
[Parallel(n_jobs=-2)]: Done  52 tasks      | elapsed:    1.3s
[Parallel(n_jobs=-2)]: Batch computation too fast (0.0161s.) Setting batch_size=8.
[Parallel(n_jobs=-2)]: Done  78 tasks      | elapsed:    1.3s
[Parallel(n_jobs=-2)]: Done 134 tasks      | elapsed:    1.3s
[Parallel(n_jobs=-2)]: Batch computation too fast (0.0593s.) Setting batch_size=16.
[Parallel(n_jobs=-2)]: Done 238 tasks      | elapsed:    1.4s
[Parallel(n_jobs=-2)]: Batch comp

Done. Length of longest subset:  27
1
3
1
3
17
6
6
6
1
1
5
8
1
9
11
2
2
4
3
1
12
4
2
4
36
Running randomized subset for search for 1000 iterations with 36 trajectories...


[Parallel(n_jobs=-2)]: Using backend LokyBackend with 7 concurrent workers.
[Parallel(n_jobs=-2)]: Batch computation too fast (0.0050s.) Setting batch_size=2.
[Parallel(n_jobs=-2)]: Done   4 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-2)]: Done  11 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-2)]: Batch computation too fast (0.0173s.) Setting batch_size=4.
[Parallel(n_jobs=-2)]: Done  22 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-2)]: Done  40 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-2)]: Batch computation too fast (0.0180s.) Setting batch_size=8.
[Parallel(n_jobs=-2)]: Done  74 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-2)]: Batch computation too fast (0.0560s.) Setting batch_size=16.
[Parallel(n_jobs=-2)]: Done 138 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-2)]: Batch computation too fast (0.0688s.) Setting batch_size=32.
[Parallel(n_jobs=-2)]: Done 242 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-2)]: Done 466 tasks      | elapsed:    0.2s
[Parallel(n

Done. Length of longest subset:  26
4
1
3
2
2
3
6
2
1
5
3
1
3
5
23
12
1
22
19
1
3
1
9
13
4
23


100%|██████████| 86/86 [00:03<00:00, 24.70it/s]


2
6
9
10
12
2
1
1
1
8
8


In [30]:
print("Clustering results after concatenation step:")
print(f"Number of unique clusters: {len(set(clustering_concat))}")
attack.evaluate(clustering_concat, full_trip_gdf)

clustering_HL = list(dict(sorted(clustering_after_HL.items())).values())
print("\nClustering results after HL matching step:")
print(f"Number of unique clusters: {len(set(clustering_HL))}")
attack.evaluate(clustering_HL, full_trip_gdf)

Clustering results after concatenation step:
Number of unique clusters: 667
Homogeneity: 0.995
Completeness: 0.578
V-measure: 0.731
Rand index: 0.971
ARI: 0.061
MI: 3.684
NMI: 0.731
AMI: 0.267

Clustering results after HL matching step:
Number of unique clusters: 227
Homogeneity: 0.908
Completeness: 0.739
V-measure: 0.814
Rand index: 0.978
ARI: 0.524
MI: 3.362
NMI: 0.814
AMI: 0.691


## Assign Trips Without Match

In [31]:
clustering_after_double_assign_HL = attack.assign_trips_without_match(
    clustering_after_HL, HL_table_dict, 
    full_trips_concat_gdf, 
    full_trips_concat_gdf_overlap_dict, 
    full_trip_gdf, 
    trip_concat_dict,
    SIM_THRESH_FOR_NO_MATCH=0.25)


Comparing trips that were not assigned to any HL_ID with trips that were assigned to a HL_ID...


  1%|          | 1/143 [00:05<12:30,  5.29s/it]

existing match and assign cluster id 111.0 to trip 985302


  1%|▏         | 2/143 [00:27<36:03, 15.34s/it]

existing match and assign cluster id 18.0 to trip 988341


  3%|▎         | 4/143 [01:07<40:16, 17.38s/it]

existing match and assign cluster id 43.0 to trip 991154


  3%|▎         | 5/143 [01:40<52:48, 22.96s/it]

existing match and assign cluster id 93.0 to trip 993052


  4%|▍         | 6/143 [02:07<55:07, 24.15s/it]

trip overlaps in time with other trips in the same HL_ID


  5%|▍         | 7/143 [02:39<1:00:49, 26.84s/it]

trip overlaps in time with other trips in the same HL_ID


  6%|▋         | 9/143 [03:07<45:01, 20.16s/it]  

existing match and assign cluster id 113.0 to trip 985360


  7%|▋         | 10/143 [03:24<43:01, 19.41s/it]

trip overlaps in time with other trips in the same HL_ID


  8%|▊         | 11/143 [03:44<43:12, 19.64s/it]

trip overlaps in time with other trips in the same HL_ID


  8%|▊         | 12/143 [03:55<37:08, 17.01s/it]

trip overlaps in time with other trips in the same HL_ID


 10%|▉         | 14/143 [05:01<56:36, 26.33s/it]

no match and assign new cluster id 228 to trips 1007064 1007359


 11%|█         | 16/143 [05:37<47:12, 22.31s/it]

existing match and assign cluster id 116.0 to trip 992968


 12%|█▏        | 17/143 [05:47<39:07, 18.63s/it]

trip overlaps in time with other trips in the same HL_ID


 13%|█▎        | 18/143 [05:52<30:15, 14.52s/it]

trip overlaps in time with other trips in the same HL_ID


 13%|█▎        | 19/143 [05:54<22:24, 10.84s/it]

trip overlaps in time with other trips in the same HL_ID


 15%|█▍        | 21/143 [06:05<16:15,  8.00s/it]

existing match and assign cluster id 43.0 to trip 991152


 15%|█▌        | 22/143 [06:14<16:47,  8.33s/it]

trip overlaps in time with other trips in the same HL_ID


 17%|█▋        | 24/143 [06:24<12:49,  6.46s/it]

existing match and assign cluster id 73.0 to trip 985308


 18%|█▊        | 26/143 [06:39<13:56,  7.15s/it]

trip overlaps in time with other trips in the same HL_ID


 19%|█▉        | 27/143 [06:49<15:03,  7.79s/it]

trip overlaps in time with other trips in the same HL_ID
{-1.0: 1.0, 1.0: 0.0, 2.0: 0.0, 3.0: 0.0, 4.0: 0.0, 7.0: 0.0, 8.0: 0.0, 9.0: 0.0, 14.0: 0.0, 15.0: 0.0, 16.0: 0.0, 18.0: 0.0, 19.0: 0.0, 21.0: 0.0, 23.0: 0.0, 24.0: 0.0, 26.0: 0.0, 27.0: 0.0, 28.0: 0.0, 29.0: 0.0, 30.0: 0.0, 31.0: 0.0, 32.0: 0.0, 33.0: 0.0, 34.0: 0.0, 35.0: 0.0, 36.0: 0.0, 39.0: 0.0, 40.0: 0.0, 42.0: 0.0, 43.0: 0.0, 44.0: 0.0, 47.0: 0.0, 48.0: 0.0, 49.0: 0.0, 50.0: 0.0, 51.0: 0.0, 52.0: 0.0, 53.0: 0.0, 55.0: 0.0, 56.0: 0.0, 57.0: 0.0, 58.0: 0.0, 60.0: 0.0, 61.0: 0.0, 63.0: 0.0, 64.0: 0.0, 66.0: 0.0, 67.0: 0.0, 68.0: 0.0, 69.0: 0.0, 71.0: 0.0, 73.0: 0.0, 75.0: 0.0, 76.0: 0.0, 77.0: 0.0, 78.0: 0.0, 79.0: 0.0, 80.0: 0.0, 81.0: 0.0, 82.0: 0.0, 84.0: 0.0, 85.0: 0.0, 87.0: 0.0, 88.0: 0.0, 89.0: 0.0, 91.0: 0.0, 93.0: 0.0, 95.0: 0.0, 96.0: 0.0, 97.0: 0.0, 98.0: 1.0, 100.0: 0.0, 101.0: 0.0, 104.0: 0.0, 108.0: 0.0, 109.0: 0.0, 111.0: 0.0, 113.0: 0.0, 116.0: 0.0, 117.0: 0.0, 118.0: 0.0, 119.0: 0.0, 121.0: 0.0, 122.0: 0.0, 1

 20%|█▉        | 28/143 [06:52<12:11,  6.37s/it]

Done. Second highest LCSS score is for HL_ID -1.0
trip overlaps in time with other trips in the same HL_ID


 20%|██        | 29/143 [07:02<14:19,  7.54s/it]

trip overlaps in time with other trips in the same HL_ID


 21%|██        | 30/143 [07:05<11:30,  6.11s/it]

trip overlaps in time with other trips in the same HL_ID
{-1.0: 1.0, 1.0: 0.0, 2.0: 0.0, 3.0: 0.0, 4.0: 0.0, 7.0: 0.0, 8.0: 0.0, 9.0: 0.0, 14.0: 0.0, 15.0: 0.0, 16.0: 0.0, 18.0: 0.0, 19.0: 0.0, 21.0: 0.0, 23.0: 0.0, 24.0: 0.0, 26.0: 0.0, 27.0: 0.0, 28.0: 0.0, 29.0: 0.0, 30.0: 0.0, 31.0: 0.0, 32.0: 0.0, 33.0: 0.0, 34.0: 0.0, 35.0: 0.0, 36.0: 0.0, 39.0: 0.0, 40.0: 0.0, 42.0: 0.0, 43.0: 0.0, 44.0: 0.0, 47.0: 0.0, 48.0: 0.0, 49.0: 0.0, 50.0: 0.0, 51.0: 0.0, 52.0: 0.0, 53.0: 0.0, 55.0: 0.0, 56.0: 0.0, 57.0: 0.0, 58.0: 0.0, 60.0: 0.0, 61.0: 0.0, 63.0: 0.0, 64.0: 0.0, 66.0: 0.0, 67.0: 0.0, 68.0: 0.0, 69.0: 0.0, 71.0: 0.0, 73.0: 0.0, 75.0: 0.0, 76.0: 0.0, 77.0: 0.0, 78.0: 0.0, 79.0: 0.0, 80.0: 0.0, 81.0: 0.0, 82.0: 0.0, 84.0: 0.0, 85.0: 0.0, 87.0: 0.0, 88.0: 0.0, 89.0: 0.0, 91.0: 0.0, 93.0: 0.0, 95.0: 0.0, 96.0: 0.0, 97.0: 0.0, 98.0: 1.0, 100.0: 0.0, 101.0: 0.0, 104.0: 0.0, 108.0: 0.0, 109.0: 0.0, 111.0: 0.0, 113.0: 0.0, 116.0: 0.0, 117.0: 0.0, 118.0: 0.0, 119.0: 0.0, 121.0: 0.0, 122.0: 0.0, 1

 22%|██▏       | 31/143 [07:11<11:11,  5.99s/it]

Done. Second highest LCSS score is for HL_ID -1.0
trip overlaps in time with other trips in the same HL_ID


 22%|██▏       | 32/143 [07:27<16:37,  8.98s/it]

trip overlaps in time with other trips in the same HL_ID
{-1.0: 0.4666666666666667, 1.0: 0.0, 2.0: 0.0, 3.0: 0.0, 4.0: 0.0, 7.0: 0.0, 8.0: 0.0, 9.0: 0.0, 14.0: 0.0, 15.0: 0.0, 16.0: 0.0, 18.0: 0.0, 19.0: 0.0, 21.0: 0.0, 23.0: 0.0, 24.0: 0.0, 26.0: 0.0, 27.0: 0.0, 28.0: 0.0, 29.0: 0.0, 30.0: 0.0, 31.0: 0.0, 32.0: 0.0, 33.0: 0.0, 34.0: 0.0, 35.0: 0.0, 36.0: 0.0, 39.0: 0.0, 40.0: 0.0, 42.0: 0.0, 43.0: 0.0, 44.0: 0.0, 47.0: 0.0, 48.0: 0.0, 49.0: 0.0, 50.0: 0.0, 51.0: 0.0, 52.0: 0.0, 53.0: 0.0, 55.0: 0.0, 56.0: 0.0, 57.0: 0.0, 58.0: 0.0, 60.0: 0.0, 61.0: 0.0, 63.0: 0.0, 64.0: 0.0, 66.0: 0.0, 67.0: 0.0, 68.0: 0.0, 69.0: 0.0, 71.0: 0.0, 73.0: 0.0, 75.0: 0.0, 76.0: 0.0, 77.0: 0.0, 78.0: 0.0, 79.0: 0.0, 80.0: 0.0, 81.0: 0.0, 82.0: 0.0, 84.0: 0.0, 85.0: 0.0, 87.0: 0.0, 88.0: 0.0, 89.0: 0.0, 91.0: 0.0, 93.0: 0.0, 95.0: 0.0, 96.0: 0.0, 97.0: 0.0, 98.0: 0.4666666666666667, 100.0: 0.0, 101.0: 0.0, 104.0: 0.0, 108.0: 0.0, 109.0: 0.0, 111.0: 0.0, 113.0: 0.0, 116.0: 0.0, 117.0: 0.0, 118.0: 0.0, 119.0: 

 23%|██▎       | 33/143 [07:29<12:39,  6.90s/it]

Done. Second highest LCSS score is for HL_ID 98.0
existing match and assign cluster id 98.0 to trip 985835


 24%|██▍       | 34/143 [07:46<18:23, 10.13s/it]

trip overlaps in time with other trips in the same HL_ID


 24%|██▍       | 35/143 [08:14<27:41, 15.38s/it]

existing match and assign cluster id 16.0 to trip 1006154


 25%|██▌       | 36/143 [08:45<35:44, 20.04s/it]

existing match and assign cluster id 116.0 to trip 1003014


 26%|██▌       | 37/143 [08:48<26:22, 14.93s/it]

trip overlaps in time with other trips in the same HL_ID


 27%|██▋       | 39/143 [09:00<18:15, 10.53s/it]

trip overlaps in time with other trips in the same HL_ID


 28%|██▊       | 40/143 [09:44<35:34, 20.72s/it]

no match and assign new cluster id 229 to trips 1006134 1009192


 29%|██▊       | 41/143 [10:08<36:53, 21.70s/it]

trip overlaps in time with other trips in the same HL_ID


 29%|██▉       | 42/143 [10:14<28:10, 16.74s/it]

no match and assign new cluster id 230 to trips 1006125 990036


 30%|███       | 43/143 [10:17<21:01, 12.62s/it]

existing match and assign cluster id 32.0 to trip 992952


 31%|███       | 44/143 [10:31<21:52, 13.26s/it]

trip overlaps in time with other trips in the same HL_ID


 31%|███▏      | 45/143 [10:34<16:40, 10.20s/it]

trip overlaps in time with other trips in the same HL_ID


 33%|███▎      | 47/143 [11:13<23:19, 14.58s/it]

existing match and assign cluster id 98.0 to trip 1005143


 34%|███▎      | 48/143 [11:42<30:03, 18.99s/it]

no match and assign new cluster id 231 to trips 1009606 1009672


 34%|███▍      | 49/143 [11:48<23:34, 15.05s/it]

existing match and assign cluster id 73.0 to trip 1009584


 35%|███▍      | 50/143 [11:56<20:11, 13.03s/it]

trip overlaps in time with other trips in the same HL_ID


 36%|███▌      | 51/143 [12:15<22:37, 14.76s/it]

trip overlaps in time with other trips in the same HL_ID


 36%|███▋      | 52/143 [12:28<21:29, 14.17s/it]

trip overlaps in time with other trips in the same HL_ID


 37%|███▋      | 53/143 [12:43<22:00, 14.67s/it]

trip overlaps in time with other trips in the same HL_ID


 39%|███▉      | 56/143 [14:26<34:27, 23.76s/it]

no match and assign new cluster id 232 to trips 987927 987720


 41%|████      | 58/143 [14:52<26:54, 19.00s/it]

trip overlaps in time with other trips in the same HL_ID


 42%|████▏     | 60/143 [15:01<15:41, 11.34s/it]

existing match and assign cluster id 71.0 to trip 1005197


 43%|████▎     | 61/143 [15:24<20:17, 14.84s/it]

existing match and assign cluster id 104.0 to trip 985637


 43%|████▎     | 62/143 [16:35<42:48, 31.71s/it]

no match and assign new cluster id 233 to trips 1009686 993240


 44%|████▍     | 63/143 [16:39<31:15, 23.45s/it]

trip overlaps in time with other trips in the same HL_ID


 45%|████▍     | 64/143 [16:50<26:10, 19.88s/it]

trip overlaps in time with other trips in the same HL_ID


 45%|████▌     | 65/143 [16:54<19:39, 15.12s/it]

trip overlaps in time with other trips in the same HL_ID


 46%|████▌     | 66/143 [16:57<14:42, 11.46s/it]

trip overlaps in time with other trips in the same HL_ID


 47%|████▋     | 67/143 [17:22<19:41, 15.54s/it]

existing match and assign cluster id 50.0 to trip 993129


 48%|████▊     | 68/143 [17:31<16:48, 13.44s/it]

existing match and assign cluster id 53.0 to trip 995006


 48%|████▊     | 69/143 [18:09<25:43, 20.85s/it]

existing match and assign cluster id 61.0 to trip 1011849


 49%|████▉     | 70/143 [18:31<25:51, 21.25s/it]

existing match and assign cluster id 125.0 to trip 989059
{-1.0: 0.33916083916083917, 1.0: 0.0, 2.0: 0.0, 3.0: 0.0, 4.0: 0.0, 7.0: 0.0, 8.0: 0.08579881656804733, 9.0: 0.011834319526627219, 14.0: 0.0, 15.0: 0.0, 16.0: 0.0, 18.0: 0.05621301775147929, 19.0: 0.04585798816568047, 21.0: 0.0, 23.0: 0.0, 24.0: 0.0, 26.0: 0.0, 27.0: 0.004437869822485207, 28.0: 0.03994082840236687, 29.0: 0.0, 30.0: 0.0, 31.0: 0.0, 32.0: 0.0029585798816568047, 33.0: 0.33916083916083917, 34.0: 0.0, 35.0: 0.0, 36.0: 0.0, 39.0: 0.0, 40.0: 0.0, 42.0: 0.0, 43.0: 0.0, 44.0: 0.0, 47.0: 0.0, 48.0: 0.0, 49.0: 0.0, 50.0: 0.06656804733727811, 51.0: 0.0, 52.0: 0.0, 53.0: 0.19230769230769232, 55.0: 0.0, 56.0: 0.05325443786982249, 57.0: 0.0, 58.0: 0.0, 60.0: 0.0, 61.0: 0.08727810650887574, 63.0: 0.0, 64.0: 0.0, 66.0: 0.0, 67.0: 0.0, 68.0: 0.0, 69.0: 0.0, 71.0: 0.0, 73.0: 0.0, 75.0: 0.0, 76.0: 0.0, 77.0: 0.0, 78.0: 0.004437869822485207, 79.0: 0.0029585798816568047, 80.0: 0.0, 81.0: 0.0, 82.0: 0.0, 84.0: 0.0, 85.0: 0.0, 87.0: 0.

 50%|████▉     | 71/143 [18:42<21:36, 18.01s/it]

Done. Second highest LCSS score is for HL_ID 33.0
existing match and assign cluster id 33.0 to trip 994383


 51%|█████     | 73/143 [18:53<13:21, 11.46s/it]

existing match and assign cluster id 33.0 to trip 985638


 52%|█████▏    | 74/143 [19:11<15:21, 13.35s/it]

trip overlaps in time with other trips in the same HL_ID
{-1.0: 0.47985347985347987, 1.0: 0.0, 2.0: 0.0, 3.0: 0.0, 4.0: 0.0, 7.0: 0.0, 8.0: 0.3373205741626794, 9.0: 0.019138755980861243, 14.0: 0.0, 15.0: 0.0, 16.0: 0.0, 18.0: 0.028708133971291867, 19.0: 0.0023923444976076554, 21.0: 0.0, 23.0: 0.0, 24.0: 0.0, 26.0: 0.0, 27.0: 0.007177033492822967, 28.0: 0.13157894736842105, 29.0: 0.0, 30.0: 0.0, 31.0: 0.0, 32.0: 0.007177033492822967, 33.0: 0.47985347985347987, 34.0: 0.0, 35.0: 0.0, 36.0: 0.0, 39.0: 0.0, 40.0: 0.0, 42.0: 0.0, 43.0: 0.0, 44.0: 0.0, 47.0: 0.0, 48.0: 0.0, 49.0: 0.0, 50.0: 0.050239234449760764, 51.0: 0.0, 52.0: 0.0, 53.0: 0.0215311004784689, 55.0: 0.0, 56.0: 0.01674641148325359, 57.0: 0.0, 58.0: 0.0, 60.0: 0.0, 61.0: 0.011961722488038277, 63.0: 0.0, 64.0: 0.0023923444976076554, 66.0: 0.0, 67.0: 0.0, 68.0: 0.0, 69.0: 0.0, 71.0: 0.0, 73.0: 0.0, 75.0: 0.0, 76.0: 0.0, 77.0: 0.0, 78.0: 0.06937799043062201, 79.0: 0.004784688995215311, 80.0: 0.0, 81.0: 0.05502392344497608, 82.0: 0.

 52%|█████▏    | 75/143 [19:18<13:06, 11.57s/it]

Done. Second highest LCSS score is for HL_ID 33.0
existing match and assign cluster id 33.0 to trip 1005990
{-1.0: 0.9092514124293786, 1.0: 0.0, 2.0: 0.0, 3.0: 0.0, 4.0: 0.0, 7.0: 0.0, 8.0: 0.05683918669131238, 9.0: 0.0006514657980456026, 14.0: 0.0, 15.0: 0.0, 16.0: 0.0014823599169878447, 18.0: 0.02385685884691849, 19.0: 0.050512445095168376, 21.0: 0.0, 23.0: 0.0, 24.0: 0.0, 26.0: 0.0, 27.0: 0.0, 28.0: 0.0029806259314456036, 29.0: 0.0, 30.0: 0.14241960183767227, 31.0: 0.0, 32.0: 0.3057692307692308, 33.0: 0.9092514124293786, 34.0: 0.0, 35.0: 0.0, 36.0: 0.0, 39.0: 0.0020325203252032522, 40.0: 0.0014144271570014145, 42.0: 0.0, 43.0: 0.0, 44.0: 0.0, 47.0: 0.0, 48.0: 0.0, 49.0: 0.0, 50.0: 0.008167265599477294, 51.0: 0.009402654867256638, 52.0: 0.001959503592423253, 53.0: 0.05714285714285714, 55.0: 0.0, 56.0: 0.028735632183908046, 57.0: 0.08118811881188119, 58.0: 0.0, 60.0: 0.0, 61.0: 0.025157232704402517, 63.0: 0.0, 64.0: 0.0014144271570014145, 66.0: 0.0, 67.0: 0.0011389521640091116, 68.0: 

 53%|█████▎    | 76/143 [20:02<23:42, 21.23s/it]

Done. Second highest LCSS score is for HL_ID 33.0
existing match and assign cluster id 33.0 to trip 987958


 54%|█████▍    | 77/143 [20:24<23:40, 21.53s/it]

existing match and assign cluster id 33.0 to trip 985191
{-1.0: 0.5467075038284839, 1.0: 0.0, 2.0: 0.0, 3.0: 0.0, 4.0: 0.0, 7.0: 0.0, 8.0: 0.021439509954058193, 9.0: 0.0030627871362940277, 14.0: 0.0, 15.0: 0.0, 16.0: 0.0, 18.0: 0.1225114854517611, 19.0: 0.0, 21.0: 0.03215926493108729, 23.0: 0.007656967840735069, 24.0: 0.0, 26.0: 0.0, 27.0: 0.0, 28.0: 0.016845329249617153, 29.0: 0.0, 30.0: 0.0030627871362940277, 31.0: 0.0, 32.0: 0.007656967840735069, 33.0: 0.5467075038284839, 34.0: 0.0, 35.0: 0.0, 36.0: 0.0, 39.0: 0.0, 40.0: 0.03522205206738132, 42.0: 0.0, 43.0: 0.17304747320061256, 44.0: 0.0, 47.0: 0.0, 48.0: 0.0, 49.0: 0.0, 50.0: 0.006125574272588055, 51.0: 0.0015313935681470138, 52.0: 0.0, 53.0: 0.03215926493108729, 55.0: 0.0, 56.0: 0.0, 57.0: 0.013782542113323124, 58.0: 0.0, 60.0: 0.0, 61.0: 0.0030627871362940277, 63.0: 0.0, 64.0: 0.0, 66.0: 0.0, 67.0: 0.0, 68.0: 0.0, 69.0: 0.0, 71.0: 0.0030627871362940277, 73.0: 0.0, 75.0: 0.0, 76.0: 0.0030627871362940277, 77.0: 0.0, 78.0: 0.0, 79.

 55%|█████▍    | 78/143 [20:34<19:40, 18.17s/it]

Done. Second highest LCSS score is for HL_ID 33.0
existing match and assign cluster id 33.0 to trip 998502


 55%|█████▌    | 79/143 [20:40<15:14, 14.28s/it]

trip overlaps in time with other trips in the same HL_ID


 56%|█████▌    | 80/143 [21:07<19:13, 18.31s/it]

no match and assign new cluster id 234 to trips 986865 986866


 57%|█████▋    | 81/143 [21:28<19:36, 18.98s/it]

trip overlaps in time with other trips in the same HL_ID


 57%|█████▋    | 82/143 [21:51<20:23, 20.05s/it]

existing match and assign cluster id 33.0 to trip 1006607


 58%|█████▊    | 83/143 [21:54<14:57, 14.97s/it]

trip overlaps in time with other trips in the same HL_ID


 59%|█████▉    | 85/143 [22:04<09:33,  9.89s/it]

existing match and assign cluster id 61.0 to trip 1007183
{-1.0: 0.5238095238095238, 1.0: 0.0, 2.0: 0.0, 3.0: 0.0, 4.0: 0.0, 7.0: 0.0, 8.0: 0.4139194139194139, 9.0: 0.03663003663003663, 14.0: 0.0, 15.0: 0.0, 16.0: 0.0, 18.0: 0.03296703296703297, 19.0: 0.0, 21.0: 0.0, 23.0: 0.029304029304029304, 24.0: 0.0, 26.0: 0.0, 27.0: 0.0, 28.0: 0.1794871794871795, 29.0: 0.0, 30.0: 0.0, 31.0: 0.0, 32.0: 0.007326007326007326, 33.0: 0.5238095238095238, 34.0: 0.0, 35.0: 0.0, 36.0: 0.0, 39.0: 0.0, 40.0: 0.06227106227106227, 42.0: 0.0, 43.0: 0.003663003663003663, 44.0: 0.0, 47.0: 0.0, 48.0: 0.0, 49.0: 0.0, 50.0: 0.07326007326007326, 51.0: 0.0, 52.0: 0.0, 53.0: 0.06227106227106227, 55.0: 0.0, 56.0: 0.003663003663003663, 57.0: 0.0, 58.0: 0.0, 60.0: 0.003663003663003663, 61.0: 0.06896551724137931, 63.0: 0.0, 64.0: 0.0, 66.0: 0.0, 67.0: 0.0, 68.0: 0.0, 69.0: 0.0, 71.0: 0.0, 73.0: 0.0, 75.0: 0.0, 76.0: 0.007326007326007326, 77.0: 0.0, 78.0: 0.05128205128205128, 79.0: 0.0, 80.0: 0.0, 81.0: 0.04395604395604396

 60%|██████    | 86/143 [22:10<08:10,  8.61s/it]

Done. Second highest LCSS score is for HL_ID 33.0
existing match and assign cluster id 33.0 to trip 1000524


 61%|██████    | 87/143 [22:20<08:36,  9.22s/it]

trip overlaps in time with other trips in the same HL_ID


 62%|██████▏   | 88/143 [22:43<12:03, 13.16s/it]

existing match and assign cluster id 53.0 to trip 1007188


 62%|██████▏   | 89/143 [22:58<12:27, 13.84s/it]

existing match and assign cluster id 67.0 to trip 992734
{-1.0: 0.45190839694656487, 1.0: 0.0, 2.0: 0.0, 3.0: 0.0, 4.0: 0.0, 7.0: 0.0, 8.0: 0.0, 9.0: 0.0, 14.0: 0.0, 15.0: 0.0, 16.0: 0.0, 18.0: 0.0, 19.0: 0.0, 21.0: 0.0, 23.0: 0.0, 24.0: 0.0, 26.0: 0.0, 27.0: 0.0, 28.0: 0.0, 29.0: 0.0, 30.0: 0.0, 31.0: 0.0, 32.0: 0.0, 33.0: 0.0, 34.0: 0.0, 35.0: 0.0, 36.0: 0.0, 39.0: 0.0, 40.0: 0.0, 42.0: 0.015267175572519083, 43.0: 0.0, 44.0: 0.0, 47.0: 0.0, 48.0: 0.0, 49.0: 0.0, 50.0: 0.0, 51.0: 0.0, 52.0: 0.0, 53.0: 0.0, 55.0: 0.0, 56.0: 0.0, 57.0: 0.0, 58.0: 0.0, 60.0: 0.0, 61.0: 0.0, 63.0: 0.0, 64.0: 0.0, 66.0: 0.0, 67.0: 0.42748091603053434, 68.0: 0.0, 69.0: 0.0, 71.0: 0.0, 73.0: 0.0, 75.0: 0.0, 76.0: 0.0, 77.0: 0.17099236641221374, 78.0: 0.0, 79.0: 0.0, 80.0: 0.0, 81.0: 0.0, 82.0: 0.0, 84.0: 0.0, 85.0: 0.0, 87.0: 0.45190839694656487, 88.0: 0.13129770992366413, 89.0: 0.0, 91.0: 0.0, 93.0: 0.0, 95.0: 0.0, 96.0: 0.0, 97.0: 0.0, 98.0: 0.0, 100.0: 0.0, 101.0: 0.0, 104.0: 0.0, 108.0: 0.0, 109.0: 0.0, 

 63%|██████▎   | 90/143 [23:08<11:07, 12.59s/it]

Done. Second highest LCSS score is for HL_ID -1.0
trip overlaps in time with other trips in the same HL_ID


 64%|██████▎   | 91/143 [23:10<08:10,  9.42s/it]

existing match and assign cluster id 67.0 to trip 985739


 64%|██████▍   | 92/143 [23:24<09:18, 10.95s/it]

existing match and assign cluster id 67.0 to trip 1006145


 65%|██████▌   | 93/143 [23:58<14:48, 17.76s/it]

trip overlaps in time with other trips in the same HL_ID


 66%|██████▌   | 94/143 [24:07<12:27, 15.26s/it]

trip overlaps in time with other trips in the same HL_ID


 66%|██████▋   | 95/143 [24:23<12:19, 15.41s/it]

existing match and assign cluster id 87.0 to trip 1005502
Done.
Assigning clustering IDs to all trips that are part of a new cluster...
Done.





In [33]:
print("Clustering results after concatenation step:")
print(f"Number of unique clusters: {len(set(clustering_concat))}")
attack.evaluate(clustering_concat, full_trip_gdf)

print("\nClustering results after double assign HL step:")
print(f"Number of unique clusters: {len(set(list(dict(sorted(clustering_after_double_assign_HL.items())).values())))}")
attack.evaluate(list(dict(sorted(clustering_after_double_assign_HL.items())).values()), full_trip_gdf)

Clustering results after concatenation step:
Number of unique clusters: 667
Homogeneity: 0.995
Completeness: 0.578
V-measure: 0.731
Rand index: 0.971
ARI: 0.061
MI: 3.684
NMI: 0.731
AMI: 0.267

Clustering results after double assign HL step:
Number of unique clusters: 188
Homogeneity: 0.895
Completeness: 0.755
V-measure: 0.819
Rand index: 0.978
ARI: 0.544
MI: 3.317
NMI: 0.819
AMI: 0.711


In [36]:
clustering_after_double_assign_HL = attack.assign_trips_without_match(
    clustering_after_HL, HL_table_dict, 
    full_trips_concat_gdf, 
    full_trips_concat_gdf_overlap_dict, 
    full_trip_gdf, 
    trip_concat_dict,
    SIM_THRESH_FOR_NO_MATCH=0.5)


Comparing trips that were not assigned to any HL_ID with trips that were assigned to a HL_ID...


  1%|▏         | 2/148 [00:18<24:53, 10.23s/it]

trip overlaps in time with other trips in the same HL_ID


  2%|▏         | 3/148 [00:39<36:31, 15.12s/it]

existing match and assign cluster id 18.0 to trip 988341


  3%|▎         | 5/148 [01:20<41:55, 17.59s/it]

existing match and assign cluster id 43.0 to trip 991154


  5%|▌         | 8/148 [03:11<1:12:17, 30.98s/it]

trip overlaps in time with other trips in the same HL_ID


  6%|▌         | 9/148 [03:50<1:17:37, 33.51s/it]

trip overlaps in time with other trips in the same HL_ID


  9%|▉         | 13/148 [05:13<53:26, 23.75s/it]  

trip overlaps in time with other trips in the same HL_ID


  9%|▉         | 14/148 [05:43<57:00, 25.52s/it]

trip overlaps in time with other trips in the same HL_ID


 10%|█         | 15/148 [05:54<47:18, 21.35s/it]

trip overlaps in time with other trips in the same HL_ID


 11%|█▏        | 17/148 [07:41<1:28:12, 40.40s/it]

no match and assign new cluster id 233 to trips 1007064 1007359


 14%|█▎        | 20/148 [08:43<58:08, 27.26s/it]  

existing match and assign cluster id 100.0 to trip 1009131


 14%|█▍        | 21/148 [08:49<44:28, 21.01s/it]

trip overlaps in time with other trips in the same HL_ID


 15%|█▍        | 22/148 [08:52<32:28, 15.47s/it]

no match and assign new cluster id 234 to trips 1009192 1006134


 17%|█▋        | 25/148 [09:17<23:02, 11.24s/it]

trip overlaps in time with other trips in the same HL_ID


 19%|█▉        | 28/148 [09:39<17:32,  8.77s/it]

existing match and assign cluster id 24.0 to trip 989808


 20%|█▉        | 29/148 [09:48<17:32,  8.84s/it]

trip overlaps in time with other trips in the same HL_ID


 20%|██        | 30/148 [09:58<18:07,  9.21s/it]

trip overlaps in time with other trips in the same HL_ID
{-1.0: 1.0, 1.0: 0.0, 2.0: 0.0, 3.0: 0.0, 4.0: 0.0, 7.0: 0.0, 8.0: 0.0, 9.0: 0.0, 14.0: 0.0, 15.0: 0.0, 16.0: 0.0, 18.0: 0.0, 19.0: 0.0, 21.0: 0.0, 23.0: 0.0, 24.0: 0.0, 26.0: 0.0, 27.0: 0.0, 28.0: 0.0, 29.0: 0.0, 30.0: 0.0, 31.0: 0.0, 32.0: 0.0, 33.0: 0.0, 34.0: 0.0, 35.0: 0.0, 36.0: 0.0, 39.0: 0.0, 40.0: 0.0, 42.0: 0.0, 43.0: 0.0, 44.0: 0.0, 47.0: 0.0, 48.0: 0.0, 49.0: 0.0, 50.0: 0.0, 51.0: 0.0, 52.0: 0.0, 53.0: 0.0, 55.0: 0.0, 56.0: 0.0, 57.0: 0.0, 58.0: 0.0, 60.0: 0.0, 61.0: 0.0, 63.0: 0.0, 64.0: 0.0, 66.0: 0.0, 67.0: 0.0, 68.0: 0.0, 69.0: 0.0, 71.0: 0.0, 73.0: 0.0, 75.0: 0.0, 76.0: 0.0, 77.0: 0.0, 78.0: 0.0, 79.0: 0.0, 80.0: 0.0, 81.0: 0.0, 82.0: 0.0, 84.0: 0.0, 85.0: 0.0, 87.0: 0.0, 88.0: 0.0, 89.0: 0.0, 91.0: 0.0, 93.0: 0.0, 95.0: 0.0, 96.0: 0.0, 97.0: 0.0, 98.0: 1.0, 100.0: 0.0, 101.0: 0.0, 104.0: 0.0, 108.0: 0.0, 109.0: 0.0, 111.0: 0.0, 113.0: 0.0, 116.0: 0.0, 117.0: 0.0, 118.0: 0.0, 119.0: 0.0, 121.0: 0.0, 122.0: 0.0, 1

 21%|██        | 31/148 [10:01<14:32,  7.46s/it]

Done. Second highest LCSS score is for HL_ID -1.0
trip overlaps in time with other trips in the same HL_ID


 22%|██▏       | 32/148 [10:13<17:10,  8.88s/it]

trip overlaps in time with other trips in the same HL_ID


 22%|██▏       | 33/148 [10:16<13:39,  7.13s/it]

trip overlaps in time with other trips in the same HL_ID
{-1.0: 1.0, 1.0: 0.0, 2.0: 0.0, 3.0: 0.0, 4.0: 0.0, 7.0: 0.0, 8.0: 0.0, 9.0: 0.0, 14.0: 0.0, 15.0: 0.0, 16.0: 0.0, 18.0: 0.0, 19.0: 0.0, 21.0: 0.0, 23.0: 0.0, 24.0: 0.0, 26.0: 0.0, 27.0: 0.0, 28.0: 0.0, 29.0: 0.0, 30.0: 0.0, 31.0: 0.0, 32.0: 0.0, 33.0: 0.0, 34.0: 0.0, 35.0: 0.0, 36.0: 0.0, 39.0: 0.0, 40.0: 0.0, 42.0: 0.0, 43.0: 0.0, 44.0: 0.0, 47.0: 0.0, 48.0: 0.0, 49.0: 0.0, 50.0: 0.0, 51.0: 0.0, 52.0: 0.0, 53.0: 0.0, 55.0: 0.0, 56.0: 0.0, 57.0: 0.0, 58.0: 0.0, 60.0: 0.0, 61.0: 0.0, 63.0: 0.0, 64.0: 0.0, 66.0: 0.0, 67.0: 0.0, 68.0: 0.0, 69.0: 0.0, 71.0: 0.0, 73.0: 0.0, 75.0: 0.0, 76.0: 0.0, 77.0: 0.0, 78.0: 0.0, 79.0: 0.0, 80.0: 0.0, 81.0: 0.0, 82.0: 0.0, 84.0: 0.0, 85.0: 0.0, 87.0: 0.0, 88.0: 0.0, 89.0: 0.0, 91.0: 0.0, 93.0: 0.0, 95.0: 0.0, 96.0: 0.0, 97.0: 0.0, 98.0: 1.0, 100.0: 0.0, 101.0: 0.0, 104.0: 0.0, 108.0: 0.0, 109.0: 0.0, 111.0: 0.0, 113.0: 0.0, 116.0: 0.0, 117.0: 0.0, 118.0: 0.0, 119.0: 0.0, 121.0: 0.0, 122.0: 0.0, 1

 23%|██▎       | 34/148 [10:23<13:11,  6.94s/it]

Done. Second highest LCSS score is for HL_ID -1.0
trip overlaps in time with other trips in the same HL_ID


 24%|██▎       | 35/148 [10:41<19:26, 10.32s/it]

trip overlaps in time with other trips in the same HL_ID


 24%|██▍       | 36/148 [10:43<14:48,  7.93s/it]

trip overlaps in time with other trips in the same HL_ID


 25%|██▌       | 37/148 [11:07<23:26, 12.67s/it]

no match and assign new cluster id 235 to trips 1011700 989686


 26%|██▌       | 38/148 [11:36<31:50, 17.37s/it]

{-1.0: 0.6661931818181818, 1.0: 0.0, 2.0: 0.0, 3.0: 0.0, 4.0: 0.0, 7.0: 0.0, 8.0: 0.0009581603321622484, 9.0: 0.00031938677738741617, 14.0: 0.0012775471095496647, 15.0: 0.0, 16.0: 0.0, 18.0: 0.0, 19.0: 0.0014641288433382138, 21.0: 0.0, 23.0: 0.0, 24.0: 0.0, 26.0: 0.0, 27.0: 0.0, 28.0: 0.0, 29.0: 0.0, 30.0: 0.0007656967840735069, 31.0: 0.0, 32.0: 0.0009615384615384616, 33.0: 0.6661931818181818, 34.0: 0.0, 35.0: 0.0, 36.0: 0.0, 39.0: 0.0, 40.0: 0.008403361344537815, 42.0: 0.0, 43.0: 0.009421265141318977, 44.0: 0.0, 47.0: 0.0, 48.0: 0.0, 49.0: 0.0011578541103820917, 50.0: 0.0, 51.0: 0.0, 52.0: 0.0012422360248447205, 53.0: 0.07888853401469179, 55.0: 0.0, 56.0: 0.0015772870662460567, 57.0: 0.00031938677738741617, 58.0: 0.0, 60.0: 0.0, 61.0: 0.18436154949784792, 63.0: 0.0, 64.0: 0.0, 66.0: 0.0, 67.0: 0.011033099297893681, 68.0: 0.012106537530266344, 69.0: 0.0, 71.0: 0.0, 73.0: 0.0, 75.0: 0.000846740050804403, 76.0: 0.0, 77.0: 0.00038402457757296467, 78.0: 0.02425531914893617, 79.0: 0.0047337

 26%|██▋       | 39/148 [12:16<44:06, 24.28s/it]

Done. Second highest LCSS score is for HL_ID -1.0
no match and assign new cluster id 236 to trips 1006860 1005119


 27%|██▋       | 40/148 [13:19<1:04:37, 35.90s/it]

trip overlaps in time with other trips in the same HL_ID


 28%|██▊       | 41/148 [13:22<46:27, 26.05s/it]  

trip overlaps in time with other trips in the same HL_ID


 29%|██▉       | 43/148 [13:35<28:13, 16.12s/it]

trip overlaps in time with other trips in the same HL_ID


 30%|███       | 45/148 [13:59<23:19, 13.59s/it]

trip overlaps in time with other trips in the same HL_ID


 31%|███       | 46/148 [14:25<29:04, 17.10s/it]

trip overlaps in time with other trips in the same HL_ID


 32%|███▏      | 47/148 [14:30<22:50, 13.57s/it]

no match and assign new cluster id 237 to trips 1006125 990036


 32%|███▏      | 48/148 [14:33<17:14, 10.34s/it]

existing match and assign cluster id 32.0 to trip 992952


 33%|███▎      | 49/148 [14:47<18:55, 11.47s/it]

trip overlaps in time with other trips in the same HL_ID


 34%|███▍      | 50/148 [14:50<14:38,  8.97s/it]

trip overlaps in time with other trips in the same HL_ID


 35%|███▌      | 52/148 [15:32<23:56, 14.96s/it]

existing match and assign cluster id 98.0 to trip 1005143


 36%|███▌      | 53/148 [16:07<33:07, 20.92s/it]

no match and assign new cluster id 238 to trips 1009606 1009672


 36%|███▋      | 54/148 [16:14<26:22, 16.83s/it]

existing match and assign cluster id 73.0 to trip 1009584


 37%|███▋      | 55/148 [16:26<23:55, 15.44s/it]

trip overlaps in time with other trips in the same HL_ID


 38%|███▊      | 56/148 [16:48<26:22, 17.20s/it]

trip overlaps in time with other trips in the same HL_ID


 39%|███▊      | 57/148 [17:04<25:40, 16.93s/it]

trip overlaps in time with other trips in the same HL_ID


 39%|███▉      | 58/148 [17:17<23:37, 15.74s/it]

trip overlaps in time with other trips in the same HL_ID


 41%|████      | 61/148 [18:56<33:59, 23.45s/it]

no match and assign new cluster id 239 to trips 987927 987720


 45%|████▍     | 66/148 [19:38<18:02, 13.20s/it]

existing match and assign cluster id 104.0 to trip 985637


 45%|████▌     | 67/148 [20:53<42:47, 31.70s/it]

no match and assign new cluster id 240 to trips 1009686 993240


 46%|████▌     | 68/148 [20:58<31:41, 23.77s/it]

trip overlaps in time with other trips in the same HL_ID


 47%|████▋     | 69/148 [21:10<26:42, 20.29s/it]

trip overlaps in time with other trips in the same HL_ID


 47%|████▋     | 70/148 [21:17<21:12, 16.32s/it]

trip overlaps in time with other trips in the same HL_ID


 48%|████▊     | 71/148 [21:21<16:00, 12.47s/it]

trip overlaps in time with other trips in the same HL_ID


 49%|████▊     | 72/148 [21:58<25:16, 19.95s/it]

existing match and assign cluster id 33.0 to trip 1002089


 50%|█████     | 74/148 [23:30<42:31, 34.48s/it]

trip overlaps in time with other trips in the same HL_ID


 51%|█████     | 75/148 [24:23<48:35, 39.94s/it]

{-1.0: 0.7413793103448276, 1.0: 0.0, 2.0: 0.0, 3.0: 0.0, 4.0: 0.0, 7.0: 0.0, 8.0: 0.21619718309859154, 9.0: 0.2009991184249192, 14.0: 0.014668689934243804, 15.0: 0.00025290844714213456, 16.0: 0.0, 18.0: 0.01669195751138088, 19.0: 0.006502302898943376, 21.0: 0.0, 23.0: 0.03183023872679045, 24.0: 0.0, 26.0: 0.014721345951629864, 27.0: 0.0, 28.0: 0.42654808959156787, 29.0: 0.0, 30.0: 0.013016845329249618, 31.0: 0.0, 32.0: 0.03716814159292035, 33.0: 0.7413793103448276, 34.0: 0.006557377049180328, 35.0: 0.0003874467260751647, 36.0: 0.0, 39.0: 0.0, 40.0: 0.1588628762541806, 42.0: 0.003003003003003003, 43.0: 0.0009337068160597573, 44.0: 0.005434782608695652, 47.0: 0.0026041666666666665, 48.0: 0.0, 49.0: 0.005434782608695652, 50.0: 0.08167467398764584, 51.0: 0.0011061946902654867, 52.0: 0.0, 53.0: 0.3914110429447853, 55.0: 0.0011402508551881414, 56.0: 0.0, 57.0: 0.0035407182599898838, 58.0: 0.0, 60.0: 0.0, 61.0: 0.03423566878980892, 63.0: 0.0, 64.0: 0.0, 66.0: 0.0, 67.0: 0.0014801657785671995,

 51%|█████▏    | 76/148 [25:30<57:29, 47.92s/it]

Done. Second highest LCSS score is for HL_ID 33.0
trip overlaps in time with other trips in the same HL_ID


 53%|█████▎    | 79/148 [26:25<35:08, 30.55s/it]

existing match and assign cluster id 125.0 to trip 989059


 56%|█████▌    | 83/148 [27:53<27:11, 25.09s/it]

trip overlaps in time with other trips in the same HL_ID


 57%|█████▋    | 85/148 [28:23<21:22, 20.36s/it]

existing match and assign cluster id 33.0 to trip 1006856


 58%|█████▊    | 86/148 [28:44<21:25, 20.73s/it]

trip overlaps in time with other trips in the same HL_ID
{-1.0: 0.538, 1.0: 0.0, 2.0: 0.0, 3.0: 0.0, 4.0: 0.0, 7.0: 0.0, 8.0: 0.01, 9.0: 0.008, 14.0: 0.0, 15.0: 0.0, 16.0: 0.0, 18.0: 0.0, 19.0: 0.006, 21.0: 0.0, 23.0: 0.004, 24.0: 0.0, 26.0: 0.0, 27.0: 0.0, 28.0: 0.538, 29.0: 0.0, 30.0: 0.002, 31.0: 0.0, 32.0: 0.148, 33.0: 0.526, 34.0: 0.0, 35.0: 0.0, 36.0: 0.0, 39.0: 0.0, 40.0: 0.098, 42.0: 0.0, 43.0: 0.0, 44.0: 0.0, 47.0: 0.008, 48.0: 0.0, 49.0: 0.0, 50.0: 0.008, 51.0: 0.0, 52.0: 0.0, 53.0: 0.1, 55.0: 0.0, 56.0: 0.0, 57.0: 0.008, 58.0: 0.0, 60.0: 0.0, 61.0: 0.002, 63.0: 0.0, 64.0: 0.0, 66.0: 0.0, 67.0: 0.004, 68.0: 0.0, 69.0: 0.0, 71.0: 0.0, 73.0: 0.0, 75.0: 0.0, 76.0: 0.096, 77.0: 0.0, 78.0: 0.0, 79.0: 0.0, 80.0: 0.0, 81.0: 0.0, 82.0: 0.0, 84.0: 0.0, 85.0: 0.004, 87.0: 0.084, 88.0: 0.0, 89.0: 0.0, 91.0: 0.0, 93.0: 0.0, 95.0: 0.0, 96.0: 0.0, 97.0: 0.0, 98.0: 0.0, 100.0: 0.0, 101.0: 0.0, 104.0: 0.0, 108.0: 0.0, 109.0: 0.0, 111.0: 0.0, 113.0: 0.0, 116.0: 0.0, 117.0: 0.0, 118.0: 0.0, 11

 59%|█████▉    | 87/148 [28:59<19:25, 19.10s/it]

Done. Second highest LCSS score is for HL_ID 28.0
trip overlaps in time with other trips in the same HL_ID


 59%|█████▉    | 88/148 [29:28<22:00, 22.01s/it]

existing match and assign cluster id 33.0 to trip 985191
{-1.0: 0.5862068965517241, 1.0: 0.0, 2.0: 0.0, 3.0: 0.0, 4.0: 0.0, 7.0: 0.0, 8.0: 0.0, 9.0: 0.08064516129032258, 14.0: 0.016129032258064516, 15.0: 0.0008488964346349745, 16.0: 0.006289308176100629, 18.0: 0.0, 19.0: 0.001697792869269949, 21.0: 0.0, 23.0: 0.0, 24.0: 0.0, 26.0: 0.0, 27.0: 0.0, 28.0: 0.11969439728353141, 29.0: 0.0, 30.0: 0.0, 31.0: 0.0, 32.0: 0.001697792869269949, 33.0: 0.5862068965517241, 34.0: 0.0, 35.0: 0.0, 36.0: 0.0, 39.0: 0.0, 40.0: 0.0, 42.0: 0.0025466893039049238, 43.0: 0.0018674136321195146, 44.0: 0.11120543293718166, 47.0: 0.01867572156196944, 48.0: 0.0023094688221709007, 49.0: 0.23026315789473684, 50.0: 0.1640826873385013, 51.0: 0.0, 52.0: 0.0, 53.0: 0.5161290322580645, 55.0: 0.019524617996604415, 56.0: 0.0, 57.0: 0.0, 58.0: 0.0, 60.0: 0.0, 61.0: 0.0, 63.0: 0.0, 64.0: 0.0, 66.0: 0.0, 67.0: 0.025466893039049237, 68.0: 0.0, 69.0: 0.0, 71.0: 0.0, 73.0: 0.0, 75.0: 0.0, 76.0: 0.0, 77.0: 0.0, 78.0: 0.0, 79.0: 0.

 60%|██████    | 89/148 [29:52<22:17, 22.66s/it]

Done. Second highest LCSS score is for HL_ID -1.0
no match and assign new cluster id 241 to trips 1004742 990381
{-1.0: 0.4847161572052402, 1.0: 0.0, 2.0: 0.0, 3.0: 0.0, 4.0: 0.0, 7.0: 0.0, 8.0: 0.10043668122270742, 9.0: 0.4279475982532751, 14.0: 0.0, 15.0: 0.0, 16.0: 0.0, 18.0: 0.0, 19.0: 0.06550218340611354, 21.0: 0.0, 23.0: 0.0, 24.0: 0.0, 26.0: 0.0, 27.0: 0.004366812227074236, 28.0: 0.4759825327510917, 29.0: 0.0, 30.0: 0.0, 31.0: 0.0, 32.0: 0.004366812227074236, 33.0: 0.896551724137931, 34.0: 0.0, 35.0: 0.0, 36.0: 0.0, 39.0: 0.0, 40.0: 0.0, 42.0: 0.0, 43.0: 0.0, 44.0: 0.0, 47.0: 0.0, 48.0: 0.0, 49.0: 0.0, 50.0: 0.004366812227074236, 51.0: 0.0, 52.0: 0.0, 53.0: 0.043668122270742356, 55.0: 0.0, 56.0: 0.0, 57.0: 0.0, 58.0: 0.0, 60.0: 0.0, 61.0: 0.0, 63.0: 0.0, 64.0: 0.0, 66.0: 0.0, 67.0: 0.0, 68.0: 0.0, 69.0: 0.0, 71.0: 0.0, 73.0: 0.0, 75.0: 0.0, 76.0: 0.0, 77.0: 0.0, 78.0: 0.0, 79.0: 0.0, 80.0: 0.0, 81.0: 0.0, 82.0: 0.0, 84.0: 0.0, 85.0: 0.0, 87.0: 0.0, 88.0: 0.0, 89.0: 0.0, 91.0: 0.

 61%|██████    | 90/148 [30:01<17:46, 18.38s/it]

Done. Second highest LCSS score is for HL_ID 33.0
trip overlaps in time with other trips in the same HL_ID


 61%|██████▏   | 91/148 [30:20<17:46, 18.72s/it]

trip overlaps in time with other trips in the same HL_ID


 63%|██████▎   | 93/148 [31:04<17:11, 18.76s/it]

trip overlaps in time with other trips in the same HL_ID


 66%|██████▌   | 97/148 [31:44<09:18, 10.95s/it]

existing match and assign cluster id 61.0 to trip 1007183


 66%|██████▌   | 98/148 [31:53<08:43, 10.47s/it]

existing match and assign cluster id 33.0 to trip 985293
{-1.0: 0.5238095238095238, 1.0: 0.0, 2.0: 0.0, 3.0: 0.0, 4.0: 0.0, 7.0: 0.0, 8.0: 0.4139194139194139, 9.0: 0.03663003663003663, 14.0: 0.0, 15.0: 0.0, 16.0: 0.0, 18.0: 0.03296703296703297, 19.0: 0.0, 21.0: 0.0, 23.0: 0.029304029304029304, 24.0: 0.0, 26.0: 0.0, 27.0: 0.0, 28.0: 0.1794871794871795, 29.0: 0.0, 30.0: 0.0, 31.0: 0.0, 32.0: 0.007326007326007326, 33.0: 0.5238095238095238, 34.0: 0.0, 35.0: 0.0, 36.0: 0.0, 39.0: 0.0, 40.0: 0.06227106227106227, 42.0: 0.0, 43.0: 0.003663003663003663, 44.0: 0.0, 47.0: 0.0, 48.0: 0.0, 49.0: 0.0, 50.0: 0.07326007326007326, 51.0: 0.0, 52.0: 0.0, 53.0: 0.06227106227106227, 55.0: 0.0, 56.0: 0.003663003663003663, 57.0: 0.0, 58.0: 0.0, 60.0: 0.003663003663003663, 61.0: 0.06896551724137931, 63.0: 0.0, 64.0: 0.0, 66.0: 0.0, 67.0: 0.0, 68.0: 0.0, 69.0: 0.0, 71.0: 0.0, 73.0: 0.0, 75.0: 0.0, 76.0: 0.007326007326007326, 77.0: 0.0, 78.0: 0.05128205128205128, 79.0: 0.0, 80.0: 0.0, 81.0: 0.04395604395604396,

 67%|██████▋   | 99/148 [32:00<07:35,  9.30s/it]

Done. Second highest LCSS score is for HL_ID 33.0
existing match and assign cluster id 33.0 to trip 1000524


 68%|██████▊   | 100/148 [32:14<08:37, 10.78s/it]

trip overlaps in time with other trips in the same HL_ID


 68%|██████▊   | 101/148 [32:37<11:10, 14.27s/it]

existing match and assign cluster id 53.0 to trip 1007188


 69%|██████▉   | 102/148 [33:14<16:15, 21.21s/it]

existing match and assign cluster id 67.0 to trip 996560
{-1.0: 0.6752136752136753, 1.0: 0.0, 2.0: 0.0, 3.0: 0.0, 4.0: 0.0, 7.0: 0.0, 8.0: 0.0, 9.0: 0.0, 14.0: 0.0, 15.0: 0.013097949886104784, 16.0: 0.0, 18.0: 0.0, 19.0: 0.0, 21.0: 0.0, 23.0: 0.0, 24.0: 0.0, 26.0: 0.0, 27.0: 0.0, 28.0: 0.0, 29.0: 0.0, 30.0: 0.0, 31.0: 0.0, 32.0: 0.0, 33.0: 0.14542836573074155, 34.0: 0.0, 35.0: 0.0, 36.0: 0.0, 39.0: 0.15163398692810456, 40.0: 0.0, 42.0: 0.10276073619631902, 43.0: 0.0, 44.0: 0.0, 47.0: 0.0, 48.0: 0.0, 49.0: 0.0, 50.0: 0.0, 51.0: 0.0, 52.0: 0.0006531678641410843, 53.0: 0.21798875702685822, 55.0: 0.0, 56.0: 0.0, 57.0: 0.0, 58.0: 0.0, 60.0: 0.0, 61.0: 0.002145922746781116, 63.0: 0.0, 64.0: 0.0, 66.0: 0.059322033898305086, 67.0: 0.6752136752136753, 68.0: 0.0049261083743842365, 69.0: 0.0008658008658008658, 71.0: 0.0, 73.0: 0.0, 75.0: 0.0017084282460136675, 76.0: 0.0, 77.0: 0.002277904328018223, 78.0: 0.0, 79.0: 0.0, 80.0: 0.011958997722095672, 81.0: 0.0, 82.0: 0.0, 84.0: 0.0, 85.0: 0.00066577

 70%|██████▉   | 103/148 [33:43<17:36, 23.49s/it]

Done. Second highest LCSS score is for HL_ID 67.0
existing match and assign cluster id 67.0 to trip 992642


 70%|███████   | 104/148 [33:53<14:12, 19.37s/it]

trip overlaps in time with other trips in the same HL_ID


 71%|███████   | 105/148 [34:01<11:30, 16.05s/it]

trip overlaps in time with other trips in the same HL_ID


 72%|███████▏  | 106/148 [34:04<08:30, 12.16s/it]

existing match and assign cluster id 87.0 to trip 1006131


 73%|███████▎  | 108/148 [34:53<13:02, 19.56s/it]

trip overlaps in time with other trips in the same HL_ID


 74%|███████▎  | 109/148 [35:04<11:00, 16.94s/it]

trip overlaps in time with other trips in the same HL_ID


 75%|███████▌  | 111/148 [35:42<11:54, 19.30s/it]

trip overlaps in time with other trips in the same HL_ID
Done.
Assigning clustering IDs to all trips that are part of a new cluster...
Done.





In [37]:
print("Clustering results after concatenation step:")
print(f"Number of unique clusters: {len(set(clustering_concat))}")
attack.evaluate(clustering_concat, full_trip_gdf)

print("\nClustering results after double assign HL step:")
print(f"Number of unique clusters: {len(set(list(dict(sorted(clustering_after_double_assign_HL.items())).values())))}")
attack.evaluate(list(dict(sorted(clustering_after_double_assign_HL.items())).values()), full_trip_gdf)

Clustering results after concatenation step:
Number of unique clusters: 667
Homogeneity: 0.995
Completeness: 0.578
V-measure: 0.731
Rand index: 0.971
ARI: 0.061
MI: 3.684
NMI: 0.731
AMI: 0.267

Clustering results after double assign HL step:
Number of unique clusters: 205
Homogeneity: 0.903
Completeness: 0.747
V-measure: 0.817
Rand index: 0.978
ARI: 0.536
MI: 3.343
NMI: 0.817
AMI: 0.702
