In [7]:
from shapely.geometry import Point, Polygon
from shapely import wkt
import skmob
from skmob.preprocessing import filtering, detection
from skmob.utils.plot import plot_gdf
from skmob.tessellation import tilers
import pandas as pd
import geopandas as gpd
import webbrowser
import numpy as np
import networkx as nx
import datetime
import os

pd.set_option('display.max_columns', 500)

In [8]:
def toAdj(fdf, tess):
    names = list(tess.tile_ID.values)
    adj = pd.DataFrame(columns=names, index = names)
    for i, row in enumerate(fdf.itertuples(), 1):
         adj.at[str(row.origin), str(row.destination)] = row.flow
    return adj

In [9]:
def filter_tessellation_land(tessellation, shape_file_land):    
    tiles_in_land = gpd.sjoin(tessellation, shape_file_land, how='left', op='intersects')
    tiles_in_land = tiles_in_land.groupby(['tile_ID'],sort=False,as_index=False).first()
    #land = tiles_in_land.dropna().drop(["index_right","boro_code","boro_name","shape_area","shape_leng"],axis=1)
    #water = tiles_in_land[tiles_in_land['index_right'].isnull()].drop(["index_right","boro_code","boro_name","shape_area","shape_leng"],axis=1)    
    land = tiles_in_land.dropna()[['tile_ID', 'geometry']]
    water = tiles_in_land[tiles_in_land['index_right'].isnull()][['tile_ID', 'geometry']]     
    crs = {'init': 'epsg:4326'}
    land = gpd.GeoDataFrame(land, crs=crs, geometry='geometry')
    water = gpd.GeoDataFrame(water, crs=crs, geometry='geometry')     
    return {"land":land, "water":water}

In [10]:
new_dir = os.path.join('', "Filtered")
if not os.path.exists(new_dir):
    os.mkdir(new_dir)

In [5]:
for i in range (1,13):
    print(i)
    for year in range (2018,2020):
        df = pd.read_csv('data/' +year + "%.2d" % i +'-citibike-tripdata.csv.zip')
        df['start_date'] =  pd.to_datetime(df['starttime']).dt.strftime('%Y-%m-%d')
        df['stop_date'] =  pd.to_datetime(df['stoptime']).dt.strftime('%Y-%m-%d')
        df = df[df['start_date'] == df['stop_date']]

        df['date'] = df['start_date']
        df = df[['date','start station latitude', 'start station longitude', 'end station latitude', 'end station longitude']]

        df_start = df[['date', 'start station latitude', 'start station longitude']]
        df_start['ind'] = df_start.index
        df_start['lat'] = df['start station latitude']
        df_start['lon'] = df['start station longitude']
        df_start = df_start[['date', 'lat', 'lon', 'ind']]


        df_end =  df[['date', 'end station latitude', 'end station longitude']]
        df_end['ind'] = df_end.index
        df_end['lat'] = df['end station latitude']
        df_end['lon'] = df['end station longitude']
        df_end = df_end[['date', 'lat', 'lon', 'ind']]

        result = pd.concat([df_start, df_end])
        result = result.sort_values(by=['ind'])

        data = result.groupby('date')

        for row,group in data:
            p = os.path.join(new_dir, "{}.csv".format(row))
            group.to_csv(p, index=False)


1


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


2
3
4
5
6
7
8
9
10
11
12


# From Tdf to Flow to AdjMatrix #

In [17]:
meters = 1840
tesselletion = tilers.tiler.get("squared", meters=meters, base_shape="New York City, New York")

shape_file_land = gpd.read_file("NYC_shapeNoWaterArea.geojson")

shape_file_land_MAN = shape_file_land.iloc[[4]]

res_inter = filter_tessellation_land(tesselletion, shape_file_land_MAN )
tess_nyc = res_inter['land']

  return _prepare_from_string(" ".join(pjargs))
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: +init=epsg:4326 +type=crs
Right CRS: EPSG:4326

  
  return _prepare_from_string(" ".join(pjargs))


In [18]:
tess_nyc

Unnamed: 0,tile_ID,geometry
154,154,"POLYGON ((-74.06074 40.67827, -74.06074 40.690..."
155,155,"POLYGON ((-74.06074 40.69080, -74.06074 40.703..."
170,170,"POLYGON ((-74.04421 40.67827, -74.04421 40.690..."
171,171,"POLYGON ((-74.04421 40.69080, -74.04421 40.703..."
186,186,"POLYGON ((-74.02768 40.67827, -74.02768 40.690..."
...,...,...
358,358,"POLYGON ((-73.92851 40.85353, -73.92851 40.866..."
359,359,"POLYGON ((-73.92851 40.86603, -73.92851 40.878..."
360,360,"POLYGON ((-73.92851 40.87853, -73.92851 40.891..."
394,394,"POLYGON ((-73.91198 40.86603, -73.91198 40.878..."


In [19]:
plot_gdf(tess_nyc, style_func_args={'fillColor':'gray', 'color':'black', 'opacity': 0.2}, zoom = 9) 

# Reshaping #

In [20]:
new_dir = os.path.join('', "Adj")
if not os.path.exists(new_dir):
    os.mkdir(new_dir)

In [None]:
directory = './Filtered'
i = 1
l = []

for filename in os.listdir(directory):
    
    base= os.path.basename(filename)
    name = os.path.splitext(base)[0]
    print(i)
    
    df = pd.read_csv('./Filtered/'+filename)
    tdf = skmob.TrajDataFrame(df, latitude='lat', longitude='lon', 
                               user_id='ind', datetime='date')
    fdf = tdf.to_flowdataframe(tess_nyc, self_loops=True)

    adj = toAdj(fdf, tess_nyc)
    adj = adj.fillna(0)
    arr = adj.to_numpy()

    np.save("./Adj/" +name +'.npy', arr)    
    i += 1
