In [1]:
import pandas as pd
import geopandas as gpd
import skmob
from skmob.preprocessing import filtering, compression
from skmob.tessellation import tilers
from shapely import wkt, MultiPolygon

In [2]:
column_names = ['userid', 'timecome','date','lat','long','count','timeleave','duration']
data = pd.read_csv('../../data/raw/stay_points_07/2017-07-02.txt', names=column_names)

In [4]:
to_tdf = data[['userid', 'lat', 'long', 'timecome']]
tdf = skmob.TrajDataFrame(to_tdf, latitude='lat', longitude='long', datetime='timecome', user_id='userid')
print(tdf.crs)
tdf.head()

{'init': 'epsg:4326'}


Unnamed: 0,uid,lat,lng,datetime
0,000070ec83acc05efcd9216ef9cf8725e2bbd41c7a9d2c...,29.137871,-95.063447,2017-07-02 04:11:10
1,00009827ea7c3bd4007e921ac8816407955a54deb76482...,29.787984,-95.592063,2017-07-02 06:28:41
2,00009827ea7c3bd4007e921ac8816407955a54deb76482...,29.788166,-95.592103,2017-07-02 12:34:14
3,0000bd2c0530443b29fee5a4a9adc2fcc5afc891821320...,30.058383,-95.390059,2017-07-02 10:36:48
4,0000bd2c0530443b29fee5a4a9adc2fcc5afc891821320...,30.065311,-95.430054,2017-07-02 11:56:01


In [27]:
tdf_test = tdf[0:10000]
tessellation_test = tilers.tiler.get("squared", base_shape='Houston, Texas', meters=5000)
tessellation_test.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [43]:
gdf = gpd.GeoDataFrame(tdf_test, geometry=gpd.points_from_xy(tdf_test.lat, tdf_test.lng))
gdf = gdf.set_crs('epsg:4326')

In [47]:
tessellation_test.head()

Unnamed: 0,tile_ID,geometry
0,0,"POLYGON ((-95.90974 30.00492, -95.90974 30.043..."
1,1,"POLYGON ((-95.90974 30.04381, -95.90974 30.082..."
2,2,"POLYGON ((-95.90974 30.08268, -95.90974 30.121..."
3,3,"POLYGON ((-95.86483 30.00492, -95.86483 30.043..."
4,4,"POLYGON ((-95.81991 29.81025, -95.81991 29.849..."


In [49]:
gdf.head()

Unnamed: 0,uid,lat,lng,datetime,geometry
0,000070ec83acc05efcd9216ef9cf8725e2bbd41c7a9d2c...,29.137871,-95.063447,2017-07-02 04:11:10,POINT (29.13787 -95.06345)
1,00009827ea7c3bd4007e921ac8816407955a54deb76482...,29.787984,-95.592063,2017-07-02 06:28:41,POINT (29.78798 -95.59206)
2,00009827ea7c3bd4007e921ac8816407955a54deb76482...,29.788166,-95.592103,2017-07-02 12:34:14,POINT (29.78817 -95.59210)
3,0000bd2c0530443b29fee5a4a9adc2fcc5afc891821320...,30.058383,-95.390059,2017-07-02 10:36:48,POINT (30.05838 -95.39006)
4,0000bd2c0530443b29fee5a4a9adc2fcc5afc891821320...,30.065311,-95.430054,2017-07-02 11:56:01,POINT (30.06531 -95.43005)


In [50]:
test = gpd.sjoin(gdf, tessellation_test, how='left', op='within')

  if await self.run_code(code, result, async_=asy):


In [51]:
test

Unnamed: 0,uid,lat,lng,datetime,geometry,index_right,tile_ID
0,000070ec83acc05efcd9216ef9cf8725e2bbd41c7a9d2c...,29.137871,-95.063447,2017-07-02 04:11:10,POINT (29.13787 -95.06345),,
1,00009827ea7c3bd4007e921ac8816407955a54deb76482...,29.787984,-95.592063,2017-07-02 06:28:41,POINT (29.78798 -95.59206),,
2,00009827ea7c3bd4007e921ac8816407955a54deb76482...,29.788166,-95.592103,2017-07-02 12:34:14,POINT (29.78817 -95.59210),,
3,0000bd2c0530443b29fee5a4a9adc2fcc5afc891821320...,30.058383,-95.390059,2017-07-02 10:36:48,POINT (30.05838 -95.39006),,
4,0000bd2c0530443b29fee5a4a9adc2fcc5afc891821320...,30.065311,-95.430054,2017-07-02 11:56:01,POINT (30.06531 -95.43005),,
...,...,...,...,...,...,...,...
9995,021c7c985e7b3627b7b5ef683d12d082655cb35fcb8546...,29.636211,-95.805768,2017-07-02 06:20:07,POINT (29.63621 -95.80577),,
9996,021c7c985e7b3627b7b5ef683d12d082655cb35fcb8546...,29.671327,-95.749123,2017-07-02 09:35:10,POINT (29.67133 -95.74912),,
9997,021c7c985e7b3627b7b5ef683d12d082655cb35fcb8546...,29.636183,-95.805904,2017-07-02 11:36:31,POINT (29.63618 -95.80590),,
9998,021c7c985e7b3627b7b5ef683d12d082655cb35fcb8546...,29.636202,-95.804921,2017-07-02 18:00:03,POINT (29.63620 -95.80492),,


In [28]:
fdf = tdf_test.to_flowdataframe(tessellation=tessellation_test, self_loops=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.sort_values(by=self.__operate_on(), ascending=True, inplace=True)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  fdf = tdf_test.to_flowdataframe(tessellation=tessellation_test, self_loops=True)
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: +init=epsg:4326 +type=crs
Right CRS: EPSG:4326

  tile_ids = gpd.sjoin(gdf, tessellation, how=how, op='within')[[constants.TILE_ID]]


In [6]:
bg_shapes = pd.read_csv('../../data/processed/bg_shapefile.csv')
bg_shapes = gpd.GeoDataFrame(bg_shapes, geometry=bg_shapes['geometry'].apply(wkt.loads))

print(bg_shapes.shape)
bg_shapes = bg_shapes.drop(bg_shapes[bg_shapes['geometry'].apply(lambda geom: isinstance(geom, MultiPolygon))].index)
print(bg_shapes.shape)

(4152, 12)
(4143, 12)


In [23]:
bg_shapes = pd.read_csv('../../data/processed/bg_shapefile.csv')
bg_shapes = gpd.GeoDataFrame(bg_shapes, geometry=bg_shapes['geometry'].apply(wkt.loads))

print(bg_shapes.shape)
bg_shapes = bg_shapes.drop(bg_shapes[bg_shapes['geometry'].apply(lambda geom: isinstance(geom, MultiPolygon))].index)
print(bg_shapes.shape)

bg_shapes.reset_index(inplace=True, drop=True)
bg_shapes.reset_index(inplace=True)

bg_shapes.rename(columns={'index':'tile_ID'}, inplace=True)
tessellation_columns = ['tile_ID', 'geometry']
bg_tessellation = bg_shapes[tessellation_columns]


bg_tessellation = bg_tessellation.set_crs('epsg:4326')
bg_tessellation.head()

(4152, 12)
(4143, 12)


Unnamed: 0,tile_ID,geometry
0,0,"POLYGON ((-95.56428 30.18810, -95.56368 30.192..."
1,1,"POLYGON ((-95.48293 30.28567, -95.48143 30.288..."
2,2,"POLYGON ((-95.51639 30.17621, -95.51416 30.178..."
3,3,"POLYGON ((-95.45590 30.29504, -95.45300 30.295..."
4,4,"POLYGON ((-95.39666 30.10288, -95.39003 30.108..."


In [25]:
fdf = tdf_test.to_flowdataframe(tessellation=bg_tessellation, self_loops=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.sort_values(by=self.__operate_on(), ascending=True, inplace=True)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  fdf = tdf_test.to_flowdataframe(tessellation=bg_tessellation, self_loops=True)
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: +init=epsg:4326 +type=crs
Right CRS: EPSG:4326

  tile_ids = gpd.sjoin(gdf, tessellation, how=how, op='within')[[constants.TILE_ID]]


ValueError: Inconsistency - origin and destination IDs must be present in the tessellation.

In [None]:
bg_tessellation.crs

In [None]:
fdf = tdf.to_flowdataframe(tessellation=bg_tessellation, self_loops=True)