In [1]:
import pandas as pd
import geopandas as gpd
import skmob
from skmob.preprocessing import filtering, compression
from skmob.tessellation import tilers

In [4]:
column_names = ['userid', 'timecome','date','lat','long','count','timeleave','duration']
data = pd.read_csv('../../data/raw/stay_points_07/2017-07-02.txt', names=column_names)

In [5]:
to_tdf = data[['userid', 'lat', 'long', 'timecome']]
tdf = skmob.TrajDataFrame(to_tdf, latitude='lat', longitude='long', datetime='timecome', user_id='userid')
tdf.head()

Unnamed: 0,uid,lat,lng,datetime
0,000070ec83acc05efcd9216ef9cf8725e2bbd41c7a9d2c...,29.137871,-95.063447,2017-07-02 04:11:10
1,00009827ea7c3bd4007e921ac8816407955a54deb76482...,29.787984,-95.592063,2017-07-02 06:28:41
2,00009827ea7c3bd4007e921ac8816407955a54deb76482...,29.788166,-95.592103,2017-07-02 12:34:14
3,0000bd2c0530443b29fee5a4a9adc2fcc5afc891821320...,30.058383,-95.390059,2017-07-02 10:36:48
4,0000bd2c0530443b29fee5a4a9adc2fcc5afc891821320...,30.065311,-95.430054,2017-07-02 11:56:01


In [30]:
bg_shapes = pd.read_csv('../../data/processed/bg_shapefile.csv')

In [16]:
from shapely import wkt

In [31]:
bg_shapes = gpd.GeoDataFrame(bg_shapes, geometry=bg_shapes['geometry'].apply(wkt.loads))

In [33]:
bg_shapes.reset_index(inplace=True)

In [36]:
bg_shapes.rename(columns={'index':'tile_ID'}, inplace=True)
tessellation_columns = ['tile_ID', 'geometry']
bg_tessellation = bg_shapes[tessellation_columns]
bg_tessellation.head()

Unnamed: 0,tile_ID,geometry
0,0,"POLYGON ((-95.56428 30.18810, -95.56368 30.192..."
1,1,"POLYGON ((-95.48293 30.28567, -95.48143 30.288..."
2,2,"POLYGON ((-95.51639 30.17621, -95.51416 30.178..."
3,3,"POLYGON ((-95.45590 30.29504, -95.45300 30.295..."
4,4,"POLYGON ((-95.39666 30.10288, -95.39003 30.108..."


In [72]:
tessellation_lg = tilers.tiler.get("squared", base_shape="Houston, Texas", meters=300)

In [73]:
tessellation_lg.shape

(25777, 2)

In [74]:
tessellation_lg.head()

Unnamed: 0,tile_ID,geometry
0,0,"POLYGON ((-95.90974 30.09356, -95.90974 30.095..."
1,1,"POLYGON ((-95.90974 30.09590, -95.90974 30.098..."
2,2,"POLYGON ((-95.90974 30.09823, -95.90974 30.100..."
3,3,"POLYGON ((-95.90974 30.10056, -95.90974 30.102..."
4,4,"POLYGON ((-95.90974 30.10289, -95.90974 30.105..."


In [77]:
bg_tessellation.shape

(4152, 2)

In [45]:
type(tessellation_lg.tile_ID[0])

str

In [78]:
tessellation_lg.drop("geometry", axis=1, inplace=True)

tessellation_lg = tessellation_lg.merge(bg_tessellation[["tile_ID", "geometry"]], on="tile_ID")

# Remove excess rows in TILES
tessellation_lg = tessellation_lg.iloc[:bg_tessellation.shape[0]]


In [81]:
tessellation_lg.head()

Unnamed: 0,tile_ID,geometry
0,0,"POLYGON ((-95.56428 30.18810, -95.56368 30.192..."
1,1,"POLYGON ((-95.48293 30.28567, -95.48143 30.288..."
2,2,"POLYGON ((-95.51639 30.17621, -95.51416 30.178..."
3,3,"POLYGON ((-95.45590 30.29504, -95.45300 30.295..."
4,4,"POLYGON ((-95.39666 30.10288, -95.39003 30.108..."


In [48]:
bg_tessellation.tile_ID = bg_tessellation.tile_ID.astype('str')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [86]:
from shapely.geometry import Polygon, Point

In [92]:
tdf.head()

Unnamed: 0,uid,lat,lng,datetime
0,000070ec83acc05efcd9216ef9cf8725e2bbd41c7a9d2c...,29.137871,-95.063447,2017-07-02 04:11:10
1354799,000070ec83acc05efcd9216ef9cf8725e2bbd41c7a9d2c...,29.13787,-95.06342,2017-07-02 04:11:10
1190331,000070ec83acc05efcd9216ef9cf8725e2bbd41c7a9d2c...,29.298177,-94.913601,2017-07-02 08:24:43
1478526,000070ec83acc05efcd9216ef9cf8725e2bbd41c7a9d2c...,29.29817,-94.9136,2017-07-02 08:24:43
1190332,000070ec83acc05efcd9216ef9cf8725e2bbd41c7a9d2c...,29.299064,-94.907767,2017-07-02 09:28:45


In [93]:
gdf = gpd.GeoDataFrame(tdf, geometry=gpd.points_from_xy(tdf.lng, tdf.lat), crs=tdf._crs)


  in_crs_string = _prepare_from_proj_string(in_crs_string)


In [96]:
gdf.head()

Unnamed: 0,uid,lat,lng,datetime,geometry
0,000070ec83acc05efcd9216ef9cf8725e2bbd41c7a9d2c...,29.137871,-95.063447,2017-07-02 04:11:10,POINT (-95.06345 29.13787)
1354799,000070ec83acc05efcd9216ef9cf8725e2bbd41c7a9d2c...,29.13787,-95.06342,2017-07-02 04:11:10,POINT (-95.06342 29.13787)
1190331,000070ec83acc05efcd9216ef9cf8725e2bbd41c7a9d2c...,29.298177,-94.913601,2017-07-02 08:24:43,POINT (-94.91360 29.29818)
1478526,000070ec83acc05efcd9216ef9cf8725e2bbd41c7a9d2c...,29.29817,-94.9136,2017-07-02 08:24:43,POINT (-94.91360 29.29817)
1190332,000070ec83acc05efcd9216ef9cf8725e2bbd41c7a9d2c...,29.299064,-94.907767,2017-07-02 09:28:45,POINT (-94.90777 29.29906)


In [97]:
tile_ids = gpd.sjoin(gdf, bg_tessellation, how='left', op='within')

  if await self.run_code(code, result, async_=asy):
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: +init=epsg:4326 +type=crs
Right CRS: None

  tile_ids = gpd.sjoin(gdf, bg_tessellation, how='left', op='within')


In [111]:
d = {'col1': ['name1', 'name2'], 'geometry': [Point(1, 2), Point(2, 1)]}


[<MULTIPOLYGON (((-95.005 29.614, -94.997 29.615, -94.996 29.615, -95.004 29....>,
 <MULTIPOLYGON (((-94.978 29.684, -94.977 29.685, -94.976 29.685, -94.974 29....>,
 <MULTIPOLYGON (((-94.502 29.511, -94.501 29.512, -94.498 29.512, -94.493 29....>,
 <MULTIPOLYGON (((-94.915 29.273, -94.913 29.275, -94.909 29.274, -94.909 29....>,
 <MULTIPOLYGON (((-94.823 29.324, -94.823 29.325, -94.822 29.327, -94.822 29....>,
 <MULTIPOLYGON (((-94.643 29.474, -94.64 29.475, -94.64 29.473, -94.642 29.47...>,
 <MULTIPOLYGON (((-94.929 29.287, -94.923 29.288, -94.92 29.287, -94.921 29.2...>,
 <MULTIPOLYGON (((-94.786 29.551, -94.782 29.55, -94.785 29.549, -94.787 29.5...>,
 <MULTIPOLYGON (((-94.718 29.729, -94.717 29.732, -94.717 29.734, -94.714 29....>]

In [112]:
a = [i for i in range(len(multi))]
d2 = {'ind' : a, 'geometry' : multi}
d2

{'ind': [0, 1, 2, 3, 4, 5, 6, 7, 8],
 'geometry': [<MULTIPOLYGON (((-95.005 29.614, -94.997 29.615, -94.996 29.615, -95.004 29....>,
  <MULTIPOLYGON (((-94.978 29.684, -94.977 29.685, -94.976 29.685, -94.974 29....>,
  <MULTIPOLYGON (((-94.502 29.511, -94.501 29.512, -94.498 29.512, -94.493 29....>,
  <MULTIPOLYGON (((-94.915 29.273, -94.913 29.275, -94.909 29.274, -94.909 29....>,
  <MULTIPOLYGON (((-94.823 29.324, -94.823 29.325, -94.822 29.327, -94.822 29....>,
  <MULTIPOLYGON (((-94.643 29.474, -94.64 29.475, -94.64 29.473, -94.642 29.47...>,
  <MULTIPOLYGON (((-94.929 29.287, -94.923 29.288, -94.92 29.287, -94.921 29.2...>,
  <MULTIPOLYGON (((-94.786 29.551, -94.782 29.55, -94.785 29.549, -94.787 29.5...>,
  <MULTIPOLYGON (((-94.718 29.729, -94.717 29.732, -94.717 29.734, -94.714 29....>]}

In [114]:
gdf = gpd.GeoDataFrame(d2, crs="EPSG:4326")

In [117]:
gdf_of_points = gpd.GeoDataFrame(tdf, geometry=gpd.points_from_xy(tdf.lng, tdf.lat), crs=tdf._crs)

  in_crs_string = _prepare_from_proj_string(in_crs_string)


In [118]:
the_culprits = gpd.sjoin(gdf_of_points, gdf, how='left', op='within')

  if await self.run_code(code, result, async_=asy):
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: +init=epsg:4326 +type=crs
Right CRS: EPSG:4326

  the_culprits = gpd.sjoin(gdf_of_points, gdf, how='left', op='within')


In [120]:
the_culprits.ind.nunique()

8

In [100]:
new_data = tdf.merge(tile_ids, right_index=True, left_index=True)

Unnamed: 0,uid,lat,lng,datetime,geometry
0,000070ec83acc05efcd9216ef9cf8725e2bbd41c7a9d2c...,29.137871,-95.063447,2017-07-02 04:11:10,POINT (-95.06345 29.13787)
1354799,000070ec83acc05efcd9216ef9cf8725e2bbd41c7a9d2c...,29.13787,-95.06342,2017-07-02 04:11:10,POINT (-95.06342 29.13787)
1190331,000070ec83acc05efcd9216ef9cf8725e2bbd41c7a9d2c...,29.298177,-94.913601,2017-07-02 08:24:43,POINT (-94.91360 29.29818)
1478526,000070ec83acc05efcd9216ef9cf8725e2bbd41c7a9d2c...,29.29817,-94.9136,2017-07-02 08:24:43,POINT (-94.91360 29.29817)
1190332,000070ec83acc05efcd9216ef9cf8725e2bbd41c7a9d2c...,29.299064,-94.907767,2017-07-02 09:28:45,POINT (-94.90777 29.29906)


In [83]:
tdf.mapping(tessellation_lg)

  in_crs_string = _prepare_from_proj_string(in_crs_string)


UnboundLocalError: cannot access local variable 'tile_ids' where it is not associated with a value

In [82]:
fdf = tdf.to_flowdataframe(tessellation=tessellation_lg, self_loops=True)

  in_crs_string = _prepare_from_proj_string(in_crs_string)


UnboundLocalError: cannot access local variable 'tile_ids' where it is not associated with a value

In [102]:
tessellation_lg = tilers.tiler.get("squared", base_shape="Houston, Texas", meters=15000)

In [109]:
poly = 0
non_poly = 0
multi = []
for x in bg_tessellation.geometry:
    if isinstance(x, Polygon):
        poly += 1
    else :
        non_poly+= 1
        multi.append(x)

print(poly, non_poly)

4143 9


In [110]:
multi.plot()

AttributeError: 'list' object has no attribute 'plot'

In [104]:
if all(isinstance(x, Polygon) for x in bg_tessellation.geometry):
    print("YAY")

In [101]:
fdf = tdf.to_flowdataframe(tessellation=bg_tessellation, self_loops=True)

  in_crs_string = _prepare_from_proj_string(in_crs_string)


UnboundLocalError: cannot access local variable 'tile_ids' where it is not associated with a value

In [None]:
fdf = skmob.FlowDataFrame.from_file("NY_commuting_flows_2011.csv",  
                                        tessellation=bg_tessellation,
                                        tile_id='tile_ID',
                                        sep=",")