In [1]:

import pandas as pd
from geopandas import GeoDataFrame
from shapely.geometry import Point
import fiona

csv_file='/home/bidur/map_match_gps_data/raw_data/3368.csv'
df = pd.read_csv(csv_file)
df.head(2)

Unnamed: 0,ap_id,timestamp,latitude,longitude
0,3368610588,2019-07-03 11:17:30,14.2918,120.925776
1,3368610588,2019-07-03 15:57:30,14.2918,120.925776


### unrealistic timestamp values for same position

In [2]:
df.sort_values(['latitude', 'longitude'])

Unnamed: 0,ap_id,timestamp,latitude,longitude
18,3368610588,2019-07-03 14:27:30,14.291069,120.927235
5,3368610588,2019-07-03 14:26:30,14.291081,120.927256
25,3368610588,2019-07-03 14:26:30,14.291083,120.927258
7,3368610588,2019-07-03 14:08:30,14.291151,120.926815
9,3368610588,2019-07-03 15:11:30,14.291308,120.9247
10,3368610588,2019-07-03 13:23:30,14.291308,120.9247
24,3368610588,2019-07-03 15:39:30,14.291308,120.9247
0,3368610588,2019-07-03 11:17:30,14.2918,120.925776
1,3368610588,2019-07-03 15:57:30,14.2918,120.925776
2,3368610588,2019-07-03 16:42:30,14.2918,120.925776


### make geopandas

In [3]:

geometry = [Point(xy) for xy in zip(df.longitude, df.latitude)]
crs = {'init': 'epsg:4362'} #http://www.spatialreference.org/ref/epsg/2263/
geo_df = GeoDataFrame(df, crs=crs, geometry=geometry)

  return _prepare_from_string(" ".join(pjargs))


### sort by nearest location
- https://stackoverflow.com/questions/31456683/plot-line-from-gps-points

In [4]:
import numpy as np
from scipy.spatial.distance import pdist, squareform

def find_gps_sorted(xy_coord, k0=0):
    
    """Find iteratively a continuous path from the given points xy_coord,
      starting by the point indexes by k0 """      
    N = len(xy_coord)
    distance_matrix = squareform(pdist(xy_coord, metric='euclidean'))
    mask = np.ones(N, dtype='bool')
    sorted_order = np.zeros(N, dtype=np.int)
    indices = np.arange(N)

    i = 0
    k = k0
    while True:
        sorted_order[i] = k
        mask[k] = False

        dist_k = distance_matrix[k][mask]
        indices_k = indices[mask]

        if not len(indices_k):
            break

        # find next unused closest point
        k = indices_k[np.argmin(dist_k)]
        # additional some criterion here ?
        i += 1
    return sorted_order, xy_coord[sorted_order]

In [5]:
xy_coord2 = df[["longitude","latitude"]].to_numpy()
sorted_order, xy_coord_sorted = find_gps_sorted(xy_coord2, k0=0)

In [20]:
df['order'] = sorted_order
df.head()

Unnamed: 0,ap_id,timestamp,latitude,longitude,geometry,order
0,3368610588,2019-07-03 11:17:30,14.2918,120.925776,POINT (120.92578 14.29180),0
1,3368610588,2019-07-03 15:57:30,14.2918,120.925776,POINT (120.92578 14.29180),1
2,3368610588,2019-07-03 16:42:30,14.2918,120.925776,POINT (120.92578 14.29180),2
3,3368610588,2019-07-03 10:47:30,14.2918,120.925776,POINT (120.92578 14.29180),3
4,3368610588,2019-07-03 14:25:30,14.2918,120.925776,POINT (120.92578 14.29180),4


In [19]:
xy_coord_sorted

array([[120.925776,  14.2918  ],
       [120.925776,  14.2918  ],
       [120.925776,  14.2918  ],
       [120.925776,  14.2918  ],
       [120.925776,  14.2918  ],
       [120.925776,  14.2918  ],
       [120.925776,  14.2918  ],
       [120.925776,  14.2918  ],
       [120.925776,  14.2918  ],
       [120.925776,  14.2918  ],
       [120.925059,  14.292225],
       [120.9247  ,  14.291308],
       [120.9247  ,  14.291308],
       [120.9247  ,  14.291308],
       [120.923983,  14.292702],
       [120.926815,  14.291151],
       [120.927235,  14.291069],
       [120.927256,  14.291081],
       [120.927258,  14.291083],
       [120.91871 ,  14.311615],
       [120.925128,  14.338867],
       [120.925128,  14.338867],
       [120.920755,  14.341276],
       [120.920755,  14.341276],
       [120.920755,  14.341276],
       [120.924321,  14.344976]])

In [17]:
df.to_csv('3368_ordered',index=False)

In [22]:
ts_start = df['timestamp'].min() 
ts_end = df['timestamp'].max()

In [61]:
from datetime import datetime 
def get_timestamp_from_str(ts_str):	
	
	if '.' in ts_str: # microsecond present
		ts_str = datetime.strptime(ts_str, '%Y-%m-%d %H:%M:%S.%f') 
		
	elif 'Z' in ts_str:# 2019-07-01T12:37:25Z

		ts_str = datetime.strptime(ts_str, '%Y-%m-%dT%H:%M:%SZ') 
		
	else:
		ts_str = datetime.strptime(ts_str, '%Y-%m-%d %H:%M:%S') 
		
	return ts_str

ts1 = get_timestamp_from_str(ts_start)
ts2 = get_timestamp_from_str(ts_end)
arr_ts = []

ts_interval = (ts2 - ts1)/ ( len(xy_coord_sorted) -1)
for counter in range (len(xy_coord_sorted)):
    ts = ts1 + ts_interval * counter 
    arr_ts.append(ts)
    

In [74]:
ordered_df = pd.DataFrame({'longitude': xy_coord_sorted[:, 0], 'latitude': xy_coord_sorted[:, 1]})
ordered_df['timestamp'] = arr_ts
ordered_df['timestamp'] = ordered_df['timestamp'].astype('datetime64[s]')
ordered_df['ap_id'] = 3368

In [75]:
ordered_df

Unnamed: 0,longitude,latitude,timestamp,ap_id
0,120.925776,14.2918,2019-07-03 00:14:30,3368
1,120.925776,14.2918,2019-07-03 01:10:22,3368
2,120.925776,14.2918,2019-07-03 02:06:15,3368
3,120.925776,14.2918,2019-07-03 03:02:08,3368
4,120.925776,14.2918,2019-07-03 03:58:01,3368
5,120.925776,14.2918,2019-07-03 04:53:54,3368
6,120.925776,14.2918,2019-07-03 05:49:46,3368
7,120.925776,14.2918,2019-07-03 06:45:39,3368
8,120.925776,14.2918,2019-07-03 07:41:32,3368
9,120.925776,14.2918,2019-07-03 08:37:25,3368


In [76]:
ordered_df.to_csv('3368_ordered_ts.csv',index=False)