In [288]:
import geopandas as gpd
import pandas as pd

data = gpd.read_file('./activity_points.geojson')
data.set_index('id', inplace=True, drop=False)
data.timestamp = pd.to_datetime(data.timestamp)
data.created_at = pd.to_datetime(data.created_at)
print('total data points:', len(data))
#print(data.crs)
#print(data.dtypes)
#data.accuracy.sort_values().tail()
#data.speed

total data points: 380


In [289]:
## Looking at date based properties.
print('Min timestamp:', data.timestamp.min(), 'Max timestamp:', data.timestamp.max())
print('Min created_at:', data.created_at.min(), 'Max created_at:', data.created_at.max())
# created_at seems earlier than timestamp, test this, and if True we assume this is the time of the coördinate.
print('created_at larger than timestamp:', len(data[data.created_at > data.timestamp]))
# Eleven cases where created_at is latter than timestamp of a total of 380 points. I will ignore this,
# and assume created_at is the time of the coórdinate.

# Are points wit similar times near each other? How many points created at a min 1, max 1 hour from each other are a distance of ± 300 m.
# Add period colum, shifted 1 min in future a period of 1 hour.
data['period'] = pd.DatetimeIndex(data.created_at).shift(1, freq='min').to_period(freq='60T')
near_points_series = pd.Series(0, index=data.index)
# For every row check wich data is inside the period.
for row in data.itertuples():
    m = data[(row.period.start_time <= data.created_at) & (data.created_at <= row.period.end_time)]
    if not m.empty:
        # Decimal precision of 2.5 about 300m.
        near_points = len(m[m.geom_almost_equals(row.geometry, 2.5)])
        if near_points:
            near_points_series[row.id] = near_points
        
data['near_points'] = near_points_series
print('Added points near other points')

Min timestamp: 2015-11-11 09:03:01 Max timestamp: 2015-12-14 22:57:35
Min created_at: 2015-11-11 06:03:12 Max created_at: 2015-12-14 19:57:38
created_at larger than timestamp: 11


In [291]:
data[data.near_points > 2]

Unnamed: 0_level_0,accuracy,altitude,bearing,created_at,current_dominating_activity,current_dominating_activity_confidence,feature,geometry,id,previous_dominating_activity,previous_dominating_activity_confidence,route,speed,timestamp,period,near_points
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
37,12.3,25.6,141,2015-11-12 05:08:02,still,85,passive_tracking,POINT (39.2885333 -6.813225),37,in_vehicle,62,,141,2015-11-12 07:34:23,2015-11-12 05:09,5
38,96.0,36.7,81,2015-11-12 05:08:02,still,77,passive_tracking,POINT (39.2883233 -6.8127149),38,in_vehicle,46,,81,2015-11-12 07:51:47,2015-11-12 05:09,5
40,96.0,8.9,169,2015-11-12 05:08:02,in_vehicle,46,passive_tracking,POINT (39.28868 -6.813395),40,still,56,,169,2015-11-12 08:07:53,2015-11-12 05:09,5
178,36.0,0.0,0,2015-11-29 16:22:51,,0,passive_tracking,POINT (39.2792084 -6.7656276),178,in_vehicle,62,,0,2015-11-29 19:22:49,2015-11-29 16:23,3
185,40.5,0.0,0,2015-11-29 17:00:24,,0,passive_tracking,POINT (39.2800122 -6.7652424),185,on_bicycle,17,,0,2015-11-29 20:00:14,2015-11-29 17:01,3
186,13.9,0.0,0,2015-11-29 16:58:36,on_foot,37,passive_tracking,POINT (39.2796774 -6.7654564),186,on_bicycle,17,,0,2015-11-29 19:58:32,2015-11-29 16:59,4
336,40.5,0.0,0,2015-12-04 09:21:35,,0,passive_tracking,POINT (39.2877971 -6.8123043),336,still,77,,0,2015-12-04 12:17:08,2015-12-04 09:22,3
402,5.0,-21.0,355,2015-12-05 13:01:28,,0,passive_tracking,POINT (39.311293 -6.8284456),402,still,62,,355,2015-12-05 16:01:22,2015-12-05 13:02,3
406,5.0,-21.0,355,2015-12-05 14:32:44,,0,passive_tracking,POINT (39.311293 -6.8284456),406,still,69,,355,2015-12-05 17:32:38,2015-12-05 14:33,3
411,5.0,-21.0,355,2015-12-05 13:47:57,still,40,passive_tracking,POINT (39.311293 -6.8284456),411,,0,,355,2015-12-05 16:47:54,2015-12-05 13:48,4


In [292]:
#data

In [293]:
# Looking at duplicate data.
print('Duplicate timestamps:', len(data[data.timestamp.duplicated()]))
print('Duplicate created_at:', len(data[data.created_at.duplicated()]))
print('Duplicate locations:', len(data[data.geometry.astype('S32').duplicated()]))

Duplicate timestamps: 12
Duplicate created_at: 45
Duplicate locations: 49


In [296]:
# Save data.
help(data.to_file)
import fiona; fiona.supported_drivers

Help on method to_file in module geopandas.geodataframe:

to_file(filename, driver='ESRI Shapefile', schema=None, **kwargs) method of geopandas.geodataframe.GeoDataFrame instance
    Write this GeoDataFrame to an OGR data source
    
    A dictionary of supported OGR providers is available via:
    >>> import fiona
    >>> fiona.supported_drivers
    
    Parameters
    ----------
    filename : string
        File path or file handle to write to.
    driver : string, default 'ESRI Shapefile'
        The OGR format driver used to write the vector file.
    schema : dict, default None
        If specified, the schema dictionary is passed to Fiona to
        better control how the file is written.
    
    The *kwargs* are passed to fiona.open and can be used to write
    to multi-layer data, store data within archives (zip files), etc.



{'ARCGEN': 'r',
 'AeronavFAA': 'r',
 'BNA': 'raw',
 'DGN': 'raw',
 'DXF': 'raw',
 'ESRI Shapefile': 'raw',
 'GPSTrackMaker': 'raw',
 'GPX': 'raw',
 'GeoJSON': 'rw',
 'Idrisi': 'r',
 'MapInfo File': 'raw',
 'OpenFileGDB': 'r',
 'PCIDSK': 'r',
 'SEGY': 'r',
 'SUA': 'r'}