In [1]:
import pandas as pd
import numpy as np
import pickle as pkl
import geopandas as gpd

In [4]:
df = pd.read_csv('data/processed/processed_data.csv')
zone_id = df['zone'].unique()

In [2]:
zipfile = 'data/taxi_zones.shp'
zones = gpd.read_file(zipfile)
zones = zones.sort_values(['LocationID']).reset_index(drop=True)

In [5]:
Manhattan = zones[zones['LocationID'].isin(zone_id)]
Manhattan.head(10)

Unnamed: 0,OBJECTID,Shape_Leng,Shape_Area,zone,LocationID,borough,geometry
3,4,0.043567,0.000112,Alphabet City,4,Manhattan,"POLYGON ((992073.467 203714.076, 992068.667 20..."
11,12,0.036661,4.2e-05,Battery Park,12,Manhattan,"POLYGON ((979908.772 196066.565, 979980.852 19..."
12,13,0.050281,0.000149,Battery Park City,13,Manhattan,"POLYGON ((980801.310 201248.869, 980697.386 20..."
23,24,0.047,6.1e-05,Bloomingdale,24,Manhattan,"POLYGON ((995453.114 230274.267, 995312.583 23..."
40,41,0.052793,0.000143,Central Harlem,41,Manhattan,"POLYGON ((998716.913 234240.397, 999458.736 23..."
41,42,0.092709,0.000264,Central Harlem North,42,Manhattan,"POLYGON ((1002413.191 243934.560, 1002388.021 ..."
42,43,0.099739,0.00038,Central Park,43,Manhattan,"POLYGON ((991852.973 217950.807, 991725.244 21..."
44,45,0.045907,9.1e-05,Chinatown,45,Manhattan,"POLYGON ((984941.821 199431.157, 984958.446 19..."
47,48,0.043747,9.4e-05,Clinton East,48,Manhattan,"POLYGON ((986694.313 214463.846, 986568.184 21..."
49,50,0.055748,0.000173,Clinton West,50,Manhattan,"POLYGON ((985170.372 221087.389, 985817.170 22..."


In [6]:
zones_centroid = Manhattan['geometry'].centroid
zones_polygon = Manhattan['geometry']

### Eculidean

In [7]:
euc_dist = zones_centroid.apply(lambda x:zones_centroid.distance(x)).to_numpy().flatten()

In [8]:
census_adj = pd.DataFrame(np.array([np.repeat(Manhattan['LocationID'].to_numpy(), len(Manhattan)),
                                    np.tile(Manhattan['LocationID'].to_numpy(), len(Manhattan)),
                                    euc_dist]).T, columns=['start_id','end_id','euc'])

In [9]:
census_adj['start_id'] = census_adj['start_id'].astype(int)
census_adj['end_id'] = census_adj['end_id'].astype(int)

### Connectivity

In [10]:
touch = zones_polygon.apply(lambda x: zones_polygon.touches(x)).to_numpy(dtype=np.float32).flatten()

In [11]:
census_adj['con'] = touch

In [12]:
census_adj.head()

Unnamed: 0,start_id,end_id,euc,con
0,4,4,0.0,0.0
1,4,12,13112.689481,0.0
2,4,13,11651.582885,0.0
3,4,24,28674.720866,0.0
4,4,41,30207.906231,0.0


### Functionality

In [13]:
spatial = df[['zone','residential','education','recreational','commercial','health']].drop_duplicates()
s = spatial[['residential','education','recreational','commercial','health']]
for i in s.columns:
    s.loc[:,i] = s[i]/s[i].max()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  s.loc[:,i] = s[i]/s[i].max()


In [14]:
n = len(s)
s1 = np.transpose(np.tile(s, (n,1,1)),(1,0,2))
s2 = np.tile(s, (n,1,1))
s3 = np.sum(np.sqrt((s1-s2)*(s1-s2)), axis=2)
s4 = s3[(s3 > 0)].min()/s3
s4 = s4.flatten()
s4[~np.isfinite(s4)] = 0

  s4 = s3[(s3 > 0)].min()/s3


In [15]:
func_adj = pd.DataFrame(np.array([np.repeat(spatial['zone'].to_numpy(), n), 
                       np.tile(spatial['zone'].to_numpy(), n),
                       s4]).T, columns=['start_id','end_id','func'])

In [16]:
census_adj = pd.merge(census_adj, func_adj, on=['start_id', 'end_id'], how='left').fillna(0)

In [17]:
census_adj.head()

Unnamed: 0,start_id,end_id,euc,con,func
0,4,4,0.0,0.0,0.0
1,4,12,13112.689481,0.0,0.042759
2,4,13,11651.582885,0.0,0.037645
3,4,24,28674.720866,0.0,0.046002
4,4,41,30207.906231,0.0,0.046256


In [None]:
census_adj.to_csv('data/processed/adjlist.csv',index=False)