In [136]:
import os,  io
import folium

from shapely.geometry import Point, LineString
from pyproj import CRS
import geopandas as gpd

from PIL import Image

import pandas as pd

import json

import shapely.wkt

In [137]:
data_path = 'data'

mtrain_file = os.path.join(data_path, 'improved_train_1000.csv')
mm_file = os.path.join(data_path, 'improved_match_result.csv')
train_file = os.path.join(data_path, 'train_1000.csv')

results_path = 'output/task_6'
if not os.path.exists(results_path):
    os.makedirs(results_path)
colors_list = ['crimson', 'blue',  'deepskyblue', 'purple', 'orange', 'green', 'orangered','cyan', 'magenta', 'teal']
ftiles = ['OpenStreetMap','Stamen Terrain','Stamen Toner','Mapbox Bright','Mapbox Control Room']

In [138]:
PLOT_LINES = True
PLOT_POINTS = False
SAVE_IMAGES = True
SAVE_INDIVIDUAL = True

In [139]:

def get_train1000_df(file_path, dedup=False):
    train1000 = pd.read_csv(file_path,
                            sep = ",", usecols=['POLYLINE'],
                            converters={'POLYLINE': lambda x: json.loads(x)})
    if dedup:
        cleaned_list = []
        dedup_ctr = 0
        for k in train1000["POLYLINE"]:
            dedup_ks = [k[i] for i in range(len(k)) if i == 0 or k[i] != k[i-1]]
            if len(dedup) != len(k):
                dedup_ctr += 1
            cleaned_list.append(dedup_ks)

        print(f"Removed duplicated points from {dedup_ctr} trajectories.")        
        train1000["POLYLINE"] = cleaned_list
        
    return train1000

In [140]:


crs = CRS("EPSG:4326")
def get_lineString_gdf(train_df, verbose=True):
    linestr_obj = []
    for idx, coords in enumerate(train_df['POLYLINE']):
        if len(coords) > 0:
            points = []
            for coord in coords:
                points.append(Point(coord))
                
            if len(points) > 1:
                linestr_obj.append(LineString(points))
            elif verbose:
                print(f"Insufficient points to form LineString. Coords only have {len(points)} point.")
        else:
            if verbose:
                print(f"Missing coordinates at row {idx}!")
                print(train_df['POLYLINE'][idx])
                
    if verbose:
        print(f"Total number of LineString: {len(linestr_obj)}")
        
    ls_gdf = gpd.GeoDataFrame([{'geometry': ls} for ls in linestr_obj], crs=crs)
    return ls_gdf


In [141]:
def get_coords(geom):
    return list((y, x) for x, y in geom.coords)
def get_lineString_coords(ls_gdf):
    lcoords = ls_gdf.apply(lambda row: get_coords(row.geometry), axis=1)
    return lcoords

In [142]:
train1000 = get_train1000_df(train_file)
train1000m = get_train1000_df(mtrain_file)
mlsdf = get_lineString_gdf(train1000m)
mlcoords = get_lineString_coords(mlsdf)
tlcoords = mlcoords

Insufficient points to form LineString. Coords only have 1 point.
Insufficient points to form LineString. Coords only have 1 point.
Insufficient points to form LineString. Coords only have 1 point.
Insufficient points to form LineString. Coords only have 1 point.
Insufficient points to form LineString. Coords only have 1 point.
Insufficient points to form LineString. Coords only have 1 point.
Insufficient points to form LineString. Coords only have 1 point.
Insufficient points to form LineString. Coords only have 1 point.
Missing coordinates at row 762!
[]
Insufficient points to form LineString. Coords only have 1 point.
Total number of LineString: 990


In [143]:
def get_global_xy_bounds(gdf, adjustx=0., adjusty=0.):
    x_min, x_max = 9999, -9999
    y_min, y_max = 9999, -9999 
    x_min = min(x_min, min([i[0] for lcoord in gdf for i in lcoord]))
    x_max = max(x_max, max([i[0] for lcoord in gdf for i in lcoord]))
    y_min = min(y_min, min([i[1] for lcoord in gdf for i in lcoord]))
    y_max = max(y_max, max([i[1] for lcoord in gdf for i in lcoord]))
    return [[x_min+adjustx, y_min+adjusty], [x_max-adjustx, y_max-adjusty]]

In [144]:
def get_porto_coords(adjusted=True):
    porto_lat = 41.1496100
    porto_lon = -8.6109900
    if adjusted:
        porto_lat += 0.01
        porto_lon -= 0.02
    return (porto_lat, porto_lon)

In [145]:

def get_xy_bounds(lcoord):
    x_min, x_max = 9999, -9999
    y_min, y_max = 9999, -9999 
    x_min = min(x_min, min([i[0] for i in lcoord]))
    x_max = max(x_max, max([i[0] for i in lcoord]))
    y_min = min(y_min, min([i[1] for i in lcoord]))
    y_max = max(y_max, max([i[1] for i in lcoord]))
    return [[x_min, y_min], [x_max, y_max]]


def save_fmap_plot(fmap, filename) -> None:
    img_data = fmap._to_png(5)
    img = Image.open(io.BytesIO(img_data))
    img.save(filename+".png")

In [146]:
prob_traj = [2, 3]
if SAVE_INDIVIDUAL:
    for idx in prob_traj:
        lcoord = tlcoords[idx]
        fmap = folium.Map(get_porto_coords(), zoom_start=13.5, tiles=ftiles[1], zoom_control = False)
        bounds = get_xy_bounds(lcoord)
        fmap.fit_bounds(bounds)
        feature_group = folium.FeatureGroup()
        for p in lcoord:
            feature_group.add_child(folium.Circle(p, radius=1, color=colors_list[idx], fill=True, fill_color=colors_list[idx], fill_opacity=1))
        fmap.add_child(feature_group)
        save_fmap_plot(fmap, os.path.join(results_path, f'6fmap_gps_{idx}'))

In [147]:
def get_mm_results_df(filepath):
    mm_results = pd.read_csv(filepath, sep = ",", usecols=['mgeom'])        
    return mm_results

In [148]:
mm_results = get_mm_results_df(mm_file) 
mm_results.head()

Unnamed: 0,mgeom
0,"LINESTRING(-8.6186233 41.141456,-8.6183463 41...."
1,"LINESTRING(-8.6398592 41.159752,-8.6400962 41...."
2,"LINESTRING(-8.6135064 41.141371,-8.6134714 41...."
3,"LINESTRING(-8.5747537 41.151899,-8.5747993 41...."
4,"LINESTRING(-8.6457599 41.180528,-8.645788 41.1..."


In [149]:
mm_results.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   mgeom   1000 non-null   object
dtypes: object(1)
memory usage: 7.9+ KB


In [150]:

def get_geom_gdf(mm_results, verbose=True):
    linestr_obj = []
    for idx, ls in enumerate(mm_results['mgeom']):
        try:
            line = shapely.wkt.loads(ls)
            if len(line.coords) == 1 and verbose:
                print(f"Only 1 point at row {idx+1}")
            linestr_obj.append(line)
        except:
            if verbose:
                print(f"Missing coordinates at row {idx+1}! Row data: {ls}")
                
    if verbose:
        print(f"Total number of LineString: {len(linestr_obj)}")
        
    geom_gdf = gpd.GeoDataFrame([{'geometry': ls} for ls in linestr_obj], crs=crs)
    return geom_gdf

In [151]:
routes = get_geom_gdf(mm_results)

ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseExcepti

Missing coordinates at row 10! Row data: LINESTRING()
Missing coordinates at row 17! Row data: LINESTRING()
Missing coordinates at row 25! Row data: LINESTRING()
Missing coordinates at row 27! Row data: LINESTRING()
Missing coordinates at row 34! Row data: LINESTRING()
Missing coordinates at row 36! Row data: LINESTRING()
Missing coordinates at row 45! Row data: LINESTRING()
Missing coordinates at row 46! Row data: LINESTRING()
Missing coordinates at row 47! Row data: LINESTRING()
Missing coordinates at row 58! Row data: LINESTRING()
Missing coordinates at row 63! Row data: LINESTRING()
Missing coordinates at row 70! Row data: LINESTRING()
Missing coordinates at row 75! Row data: LINESTRING()
Missing coordinates at row 81! Row data: LINESTRING()
Missing coordinates at row 87! Row data: LINESTRING()
Missing coordinates at row 88! Row data: LINESTRING()
Missing coordinates at row 89! Row data: LINESTRING()
Missing coordinates at row 90! Row data: LINESTRING()
Missing coordinates at row 9

ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseExcepti

Missing coordinates at row 318! Row data: LINESTRING()
Missing coordinates at row 319! Row data: LINESTRING()
Missing coordinates at row 325! Row data: LINESTRING()
Missing coordinates at row 326! Row data: LINESTRING()
Missing coordinates at row 327! Row data: LINESTRING()
Missing coordinates at row 328! Row data: LINESTRING()
Missing coordinates at row 333! Row data: LINESTRING()
Missing coordinates at row 334! Row data: LINESTRING()
Missing coordinates at row 337! Row data: LINESTRING()
Missing coordinates at row 344! Row data: LINESTRING()
Missing coordinates at row 345! Row data: LINESTRING()
Missing coordinates at row 346! Row data: LINESTRING()
Missing coordinates at row 347! Row data: LINESTRING()
Missing coordinates at row 349! Row data: LINESTRING()
Missing coordinates at row 350! Row data: LINESTRING()
Missing coordinates at row 351! Row data: LINESTRING()
Missing coordinates at row 354! Row data: LINESTRING()
Missing coordinates at row 355! Row data: LINESTRING()
Missing co

ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseException: Expected number but encountered ')'
ParseExcepti

Missing coordinates at row 476! Row data: LINESTRING()
Missing coordinates at row 479! Row data: LINESTRING()
Missing coordinates at row 486! Row data: LINESTRING()
Missing coordinates at row 488! Row data: LINESTRING()
Missing coordinates at row 489! Row data: LINESTRING()
Missing coordinates at row 496! Row data: LINESTRING()
Missing coordinates at row 500! Row data: LINESTRING()
Missing coordinates at row 502! Row data: LINESTRING()
Missing coordinates at row 505! Row data: LINESTRING()
Missing coordinates at row 506! Row data: LINESTRING()
Missing coordinates at row 515! Row data: LINESTRING()
Missing coordinates at row 517! Row data: LINESTRING()
Missing coordinates at row 519! Row data: LINESTRING()
Missing coordinates at row 522! Row data: LINESTRING()
Missing coordinates at row 538! Row data: LINESTRING()
Missing coordinates at row 539! Row data: LINESTRING()
Missing coordinates at row 540! Row data: LINESTRING()
Missing coordinates at row 544! Row data: LINESTRING()
Missing co

In [152]:
routesdf = get_lineString_coords(routes)
routesdf.head()

0    [(41.141456, -8.6186233), (41.14133, -8.618346...
1    [(41.159752, -8.6398592), (41.15979, -8.640096...
2    [(41.141371, -8.6135064), (41.14139, -8.613471...
3    [(41.151899, -8.5747537), (41.151865, -8.57479...
4    [(41.180528, -8.6457599), (41.180475, -8.64578...
dtype: object

In [153]:
PLOT_LINES = True
fmap = folium.Map(get_porto_coords(), zoom_start=14, tiles=ftiles[1], zoom_control = False)

for idx, lcoord in enumerate(routesdf[:10]):
    feature_group = folium.FeatureGroup()
    if PLOT_LINES:
        feature_group.add_child(folium.PolyLine(lcoord, color=colors_list[idx], weight=3))
        
    if PLOT_POINTS:
        for p in lcoord:
            feature_group.add_child(folium.Circle(p, radius=1, color=colors_list[idx], fill=True, fill_color=colors_list[idx], fill_opacity=1))
    fmap.add_child(feature_group)    

fmap

In [154]:
if SAVE_IMAGES:
    save_fmap_plot(fmap, os.path.join(results_path, 'improved_all'))

In [155]:
if SAVE_INDIVIDUAL:
    for idx, lcoord in enumerate(routesdf[:10]):
        fmap = folium.Map(get_porto_coords(), zoom_start=13.5, tiles=ftiles[1], zoom_control = False)
        bounds = get_xy_bounds(lcoord)
        fmap.fit_bounds(bounds)
        feature_group = folium.FeatureGroup()
        if PLOT_LINES:
            feature_group.add_child(folium.PolyLine(lcoord, color=colors_list[idx], weight=3))
        if PLOT_POINTS:
            for p in lcoord:
                feature_group.add_child(folium.Circle(p, radius=1, color=colors_list[idx], fill=True, fill_color=colors_list[idx], fill_opacity=1))
        fmap.add_child(feature_group)
        save_fmap_plot(fmap, os.path.join(results_path, f'fmap_route_{idx}'))