In [2]:
!pip install pyproj
!pip install geopandas
!pip install selenium 

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyproj
  Downloading pyproj-3.2.1-cp37-cp37m-manylinux2010_x86_64.whl (6.3 MB)
[K     |████████████████████████████████| 6.3 MB 8.2 MB/s 
Installing collected packages: pyproj
Successfully installed pyproj-3.2.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting geopandas
  Downloading geopandas-0.10.2-py2.py3-none-any.whl (1.0 MB)
[K     |████████████████████████████████| 1.0 MB 6.1 MB/s 
Collecting fiona>=1.8
  Downloading Fiona-1.8.22-cp37-cp37m-manylinux2014_x86_64.whl (16.7 MB)
[K     |████████████████████████████████| 16.7 MB 666 kB/s 
Collecting click-plugins>=1.0
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Collecting munch
  Downloading munch-2.5.0-py2.py3-none-any.whl (10 kB)
Collecting cligj>=0.5
  Downloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Installing collected packages: munch, cl

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
import os, sys, io
import folium
from shapely.geometry import Point, LineString
from pyproj import CRS
import geopandas as gpd
from PIL import Image
import pandas as pd
import json
import shapely.wkt

In [7]:
mtrain_file = '/content/drive/MyDrive/Colab Notebooks/AI6128_Project2/data/train_1000.csv'
mm_file = '/content/drive/MyDrive/Colab Notebooks/AI6128_Project2/porto/match_result.csv'
train_file = '/content/drive/MyDrive/Colab Notebooks/AI6128_Project2/data/train_1000.csv'

results_path = '/content/drive/MyDrive/Colab Notebooks/AI6128_Project2/output/'

if not os.path.exists(results_path):
    os.makedirs(results_path)
colors_list = ['crimson', 'blue',  'deepskyblue', 'purple', 'orange', 'green', 'orangered','cyan', 'magenta', 'teal']
ftiles = ['OpenStreetMap','Stamen Terrain','Stamen Toner','Mapbox Bright','Mapbox Control Room']

In [8]:
PLOT_LINES = True
PLOT_POINTS = False
SAVE_IMAGES = True
SAVE_INDIVIDUAL = True

In [9]:

def get_train1000_df(file_path, dedup=False):
    train1000 = pd.read_csv(file_path,
                            sep = ",", usecols=['POLYLINE'],
                            converters={'POLYLINE': lambda x: json.loads(x)})
    if dedup:
        cleaned_list = []
        dedup_ctr = 0
        for k in train1000["POLYLINE"]:
            dedup_ks = [k[i] for i in range(len(k)) if i == 0 or k[i] != k[i-1]]
            if len(dedup) != len(k):
                dedup_ctr += 1
            cleaned_list.append(dedup_ks)

        print(f"Removed duplicated points from {dedup_ctr} trajectories.")        
        train1000["POLYLINE"] = cleaned_list
        
    return train1000

In [10]:
crs = CRS("EPSG:4326")
def get_lineString_gdf(train_df, verbose=True):
    linestr_obj = []
    for idx, coords in enumerate(train_df['POLYLINE']):
        if len(coords) > 0:
            points = []
            for coord in coords:
                points.append(Point(coord))
                
            if len(points) > 1:
                linestr_obj.append(LineString(points))
            elif verbose:
                print(f"Insufficient points to form LineString. Coords only have {len(points)} point.")
        else:
            if verbose:
                print(f"Missing coordinates at row {idx}!")
                print(train_df['POLYLINE'][idx])
                
    if verbose:
        print(f"Total number of LineString: {len(linestr_obj)}")
        
    ls_gdf = gpd.GeoDataFrame([{'geometry': ls} for ls in linestr_obj], crs=crs)
    return ls_gdf


In [11]:
def get_coords(geom):
    return list((y, x) for x, y in geom.coords)
def get_lineString_coords(ls_gdf):
    lcoords = ls_gdf.apply(lambda row: get_coords(row.geometry), axis=1)
    return lcoords

In [12]:
train1000 = get_train1000_df(train_file)
train1000m = get_train1000_df(mtrain_file)
mlsdf = get_lineString_gdf(train1000m)
mlcoords = get_lineString_coords(mlsdf)
tlcoords = mlcoords

Insufficient points to form LineString. Coords only have 1 point.
Insufficient points to form LineString. Coords only have 1 point.
Insufficient points to form LineString. Coords only have 1 point.
Insufficient points to form LineString. Coords only have 1 point.
Insufficient points to form LineString. Coords only have 1 point.
Insufficient points to form LineString. Coords only have 1 point.
Insufficient points to form LineString. Coords only have 1 point.
Insufficient points to form LineString. Coords only have 1 point.
Missing coordinates at row 762!
[]
Insufficient points to form LineString. Coords only have 1 point.
Total number of LineString: 990


In [13]:
def get_global_xy_bounds(gdf, adjustx=0., adjusty=0.):
    x_min, x_max = 9999, -9999
    y_min, y_max = 9999, -9999 
    x_min = min(x_min, min([i[0] for lcoord in gdf for i in lcoord]))
    x_max = max(x_max, max([i[0] for lcoord in gdf for i in lcoord]))
    y_min = min(y_min, min([i[1] for lcoord in gdf for i in lcoord]))
    y_max = max(y_max, max([i[1] for lcoord in gdf for i in lcoord]))
    return [[x_min+adjustx, y_min+adjusty], [x_max-adjustx, y_max-adjusty]]

In [14]:
def get_porto_coords(adjusted=True):
    porto_lat = 41.1496100
    porto_lon = -8.6109900
    if adjusted:
        porto_lat += 0.01
        porto_lon -= 0.02
    return (porto_lat, porto_lon)

In [15]:
f = folium.Figure(width=1500, height=1000)
fmap = folium.Map(get_porto_coords(), zoom_start=13, tiles=ftiles[1], zoom_control = False, max_bounds=True).add_to(f)
bounds = get_global_xy_bounds(tlcoords[:10])
fmap.fit_bounds(bounds)

PLOT_LINES = False
for idx, lcoord in enumerate(tlcoords[:10]):
    feature_group = folium.FeatureGroup()
    if PLOT_LINES:
        feature_group.add_child(folium.PolyLine(lcoord, color=colors_list[idx], weight=3))
    for p in lcoord:
        feature_group.add_child(folium.Circle(p, radius=5, color=colors_list[idx], fill=True, fill_color=colors_list[idx], fill_opacity=1))
    fmap.add_child(feature_group)    

fmap

In [16]:

def get_xy_bounds(lcoord):
    x_min, x_max = 9999, -9999
    y_min, y_max = 9999, -9999 
    x_min = min(x_min, min([i[0] for i in lcoord]))
    x_max = max(x_max, max([i[0] for i in lcoord]))
    y_min = min(y_min, min([i[1] for i in lcoord]))
    y_max = max(y_max, max([i[1] for i in lcoord]))
    return [[x_min, y_min], [x_max, y_max]]


def save_fmap_plot(fmap, filename) -> None:
    img_data = fmap._to_png(5)
    img = Image.open(io.BytesIO(img_data))
    img.save(filename+".png")

In [20]:
prob_traj = [2, 3]
if SAVE_INDIVIDUAL:
    for idx in prob_traj:
        lcoord = tlcoords[idx]
        fmap = folium.Map(get_porto_coords(), zoom_start=13.5, tiles=ftiles[1], zoom_control = False)
        bounds = get_xy_bounds(lcoord)
        fmap.fit_bounds(bounds)
        feature_group = folium.FeatureGroup()
        for p in lcoord:
            feature_group.add_child(folium.Circle(p, radius=1, color=colors_list[idx], fill=True, fill_color=colors_list[idx], fill_opacity=1))
        fmap.add_child(feature_group)
        save_fmap_plot(fmap, os.path.join(results_path, f'6fmap_gps_{idx}'))

In [21]:
def get_mm_results_df(filepath):
    mm_results = pd.read_csv(filepath, sep = ",", usecols=['mgeom'])        
    return mm_results

In [22]:
mm_results = get_mm_results_df(mm_file) 
mm_results.head()

Unnamed: 0,mgeom
0,"LINESTRING(-8.6186233 41.141456,-8.6183463 41...."
1,"LINESTRING(-8.6398592 41.159752,-8.6400962 41...."
2,"LINESTRING(-8.6135064 41.141371,-8.6134714 41...."
3,"LINESTRING(-8.5747537 41.151899,-8.5747993 41...."
4,"LINESTRING(-8.6457599 41.180528,-8.645788 41.1..."


In [23]:
mm_results.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   mgeom   1000 non-null   object
dtypes: object(1)
memory usage: 7.9+ KB


In [24]:
def get_geom_gdf(mm_results, verbose=True):
    linestr_obj = []
    for idx, ls in enumerate(mm_results['mgeom']):
        try:
            line = shapely.wkt.loads(ls)
            if len(line.coords) == 1 and verbose:
                print(f"Only 1 point at row {idx+1}")
            linestr_obj.append(line)
        except:
            if verbose:
                print(f"Missing coordinates at row {idx+1}! Row data: {ls}")
                
    if verbose:
        print(f"Total number of LineString: {len(linestr_obj)}")
        
    geom_gdf = gpd.GeoDataFrame([{'geometry': ls} for ls in linestr_obj], crs=crs)
    return geom_gdf

In [25]:
routes = get_geom_gdf(mm_results)

ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:

Missing coordinates at row 10! Row data: LINESTRING()
Missing coordinates at row 17! Row data: LINESTRING()
Missing coordinates at row 25! Row data: LINESTRING()
Missing coordinates at row 27! Row data: LINESTRING()
Missing coordinates at row 34! Row data: LINESTRING()
Missing coordinates at row 36! Row data: LINESTRING()
Missing coordinates at row 45! Row data: LINESTRING()
Missing coordinates at row 46! Row data: LINESTRING()
Missing coordinates at row 47! Row data: LINESTRING()
Missing coordinates at row 58! Row data: LINESTRING()
Missing coordinates at row 63! Row data: LINESTRING()
Missing coordinates at row 70! Row data: LINESTRING()
Missing coordinates at row 75! Row data: LINESTRING()
Missing coordinates at row 81! Row data: LINESTRING()
Missing coordinates at row 87! Row data: LINESTRING()
Missing coordinates at row 88! Row data: LINESTRING()
Missing coordinates at row 89! Row data: LINESTRING()
Missing coordinates at row 90! Row data: LINESTRING()
Missing coordinates at row 9

ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:

Missing coordinates at row 222! Row data: LINESTRING()
Missing coordinates at row 224! Row data: LINESTRING()
Missing coordinates at row 225! Row data: LINESTRING()
Missing coordinates at row 227! Row data: LINESTRING()
Missing coordinates at row 229! Row data: LINESTRING()
Missing coordinates at row 230! Row data: LINESTRING()
Missing coordinates at row 231! Row data: LINESTRING()
Missing coordinates at row 232! Row data: LINESTRING()
Missing coordinates at row 233! Row data: LINESTRING()
Missing coordinates at row 234! Row data: LINESTRING()
Missing coordinates at row 236! Row data: LINESTRING()
Missing coordinates at row 238! Row data: LINESTRING()
Missing coordinates at row 240! Row data: LINESTRING()
Missing coordinates at row 242! Row data: LINESTRING()
Missing coordinates at row 244! Row data: LINESTRING()
Missing coordinates at row 245! Row data: LINESTRING()
Missing coordinates at row 246! Row data: LINESTRING()
Missing coordinates at row 247! Row data: LINESTRING()
Missing co

ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:

Missing coordinates at row 282! Row data: LINESTRING()
Missing coordinates at row 284! Row data: LINESTRING()
Missing coordinates at row 290! Row data: LINESTRING()
Missing coordinates at row 291! Row data: LINESTRING()
Missing coordinates at row 293! Row data: LINESTRING()
Missing coordinates at row 295! Row data: LINESTRING()
Missing coordinates at row 302! Row data: LINESTRING()
Missing coordinates at row 303! Row data: LINESTRING()
Missing coordinates at row 305! Row data: LINESTRING()
Missing coordinates at row 306! Row data: LINESTRING()
Missing coordinates at row 307! Row data: LINESTRING()
Missing coordinates at row 309! Row data: LINESTRING()
Missing coordinates at row 314! Row data: LINESTRING()
Missing coordinates at row 318! Row data: LINESTRING()
Missing coordinates at row 319! Row data: LINESTRING()
Missing coordinates at row 325! Row data: LINESTRING()
Missing coordinates at row 326! Row data: LINESTRING()
Missing coordinates at row 327! Row data: LINESTRING()
Missing co

ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:

Missing coordinates at row 386! Row data: LINESTRING()
Missing coordinates at row 387! Row data: LINESTRING()
Missing coordinates at row 390! Row data: LINESTRING()
Missing coordinates at row 391! Row data: LINESTRING()
Missing coordinates at row 394! Row data: LINESTRING()
Missing coordinates at row 395! Row data: LINESTRING()
Missing coordinates at row 396! Row data: LINESTRING()
Missing coordinates at row 405! Row data: LINESTRING()
Missing coordinates at row 408! Row data: LINESTRING()
Missing coordinates at row 410! Row data: LINESTRING()
Missing coordinates at row 413! Row data: LINESTRING()
Missing coordinates at row 414! Row data: LINESTRING()
Missing coordinates at row 416! Row data: LINESTRING()
Missing coordinates at row 418! Row data: LINESTRING()
Missing coordinates at row 419! Row data: LINESTRING()
Missing coordinates at row 422! Row data: LINESTRING()
Missing coordinates at row 423! Row data: LINESTRING()
Missing coordinates at row 425! Row data: LINESTRING()
Missing co

ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:IllegalArgumentException: point array must contain 0 or >1 elements

ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encou

Missing coordinates at row 557! Row data: LINESTRING()
Missing coordinates at row 558! Row data: LINESTRING()
Missing coordinates at row 563! Row data: LINESTRING(-8.5661302 41.175915)
Missing coordinates at row 564! Row data: LINESTRING()
Missing coordinates at row 574! Row data: LINESTRING()
Missing coordinates at row 575! Row data: LINESTRING()
Missing coordinates at row 576! Row data: LINESTRING()
Missing coordinates at row 579! Row data: LINESTRING()
Missing coordinates at row 590! Row data: LINESTRING()
Missing coordinates at row 592! Row data: LINESTRING()
Missing coordinates at row 593! Row data: LINESTRING()
Missing coordinates at row 596! Row data: LINESTRING()
Missing coordinates at row 602! Row data: LINESTRING()
Missing coordinates at row 610! Row data: LINESTRING()
Missing coordinates at row 612! Row data: LINESTRING()
Missing coordinates at row 614! Row data: LINESTRING()
Missing coordinates at row 618! Row data: LINESTRING()
Missing coordinates at row 619! Row data: LIN

ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:shapely.geos:ParseException: Expected number but encountered ')'
ERROR:

Missing coordinates at row 780! Row data: LINESTRING()
Missing coordinates at row 787! Row data: LINESTRING()
Missing coordinates at row 793! Row data: LINESTRING()
Missing coordinates at row 799! Row data: LINESTRING()
Missing coordinates at row 803! Row data: LINESTRING()
Missing coordinates at row 820! Row data: LINESTRING()
Missing coordinates at row 822! Row data: LINESTRING()
Missing coordinates at row 824! Row data: LINESTRING()
Missing coordinates at row 832! Row data: LINESTRING()
Missing coordinates at row 833! Row data: LINESTRING()
Missing coordinates at row 837! Row data: LINESTRING()
Missing coordinates at row 838! Row data: LINESTRING()
Missing coordinates at row 850! Row data: LINESTRING()
Missing coordinates at row 855! Row data: LINESTRING()
Missing coordinates at row 857! Row data: LINESTRING()
Missing coordinates at row 858! Row data: LINESTRING()
Missing coordinates at row 859! Row data: LINESTRING()
Missing coordinates at row 860! Row data: LINESTRING()
Missing co

In [26]:
routesdf = get_lineString_coords(routes)
routesdf.head()

0    [(41.141456, -8.6186233), (41.14133, -8.618346...
1    [(41.159752, -8.6398592), (41.15979, -8.640096...
2    [(41.141371, -8.6135064), (41.14139, -8.613471...
3    [(41.151899, -8.5747537), (41.151865, -8.57479...
4    [(41.180528, -8.6457599), (41.180475, -8.64578...
dtype: object

In [27]:
PLOT_LINES = True
fmap = folium.Map(get_porto_coords(), zoom_start=14, tiles=ftiles[1], zoom_control = False)

for idx, lcoord in enumerate(routesdf[:10]):
    feature_group = folium.FeatureGroup()
    if PLOT_LINES:
        feature_group.add_child(folium.PolyLine(lcoord, color=colors_list[idx], weight=3))
        
    if PLOT_POINTS:
        for p in lcoord:
            feature_group.add_child(folium.Circle(p, radius=1, color=colors_list[idx], fill=True, fill_color=colors_list[idx], fill_opacity=1))
    fmap.add_child(feature_group)    

fmap

In [None]:
if SAVE_IMAGES:
    save_fmap_plot(fmap, os.path.join(results_path, 'all'))

In [38]:
if SAVE_INDIVIDUAL:
    for idx, lcoord in enumerate(routesdf[:1]):
        fmap = folium.Map(get_porto_coords(), zoom_start=13.5, tiles=ftiles[1], zoom_control = False)
        bounds = get_xy_bounds(lcoord)
        fmap.fit_bounds(bounds)
        feature_group = folium.FeatureGroup()
        if PLOT_LINES:
            feature_group.add_child(folium.PolyLine(lcoord, color=colors_list[idx], weight=3))
        if PLOT_POINTS:
            for p in lcoord:
                feature_group.add_child(folium.Circle(p, radius=1, color=colors_list[idx], fill=True, fill_color=colors_list[idx], fill_opacity=1))
        fmap.add_child(feature_group)
        fmap  