# Publishing TaxiCab Data of 2016 as a feature service

> 1. ArcGIS pro (v 1.4 or higher)
> 2. 'arcgis' package (v 1.2.5) installed with ArcGIS Pro (follow [instructions](https://developers.arcgis.com/python/guide/install-and-set-up/) under Step 2 (Install using ArcGIS Pro)

Once installed, locate the proenv.bat within '\ArcGIS\Pro\bin\python\scripts\', launch terminal/command prompt and execute the following command

__`path_to_this_file\proenv.bat`__

Then launch a jupyter notebook instance. This ensures that Python and the ArcGIS API for Python that is integrated with ArcGIS Pro is used to implement the rest of this notebook that follows.

In [1]:
#Import packages, setup environment
import pandas as pd
from arcgis.features import *
from arcgis.gis import *
gis = GIS("http://dc.mapsqa.arcgis.com/", "username", "password")

In [2]:
datetime.datetime.now().time()

datetime.time(21, 39, 37, 510279)

In [3]:
#Read in data for 2016
test = pd.read_csv('TaxiCab_2016/taxi2016.csv', index_col=0)

  interactivity=interactivity, compiler=compiler, result=result)
  mask |= (ar1 == a)


In [4]:
datetime.datetime.now().time()

datetime.time(21, 47, 39, 340315)

In [5]:
#Specify the desired column names and apply it to the entire dataframe
cols = ['OBJECTID', 'TRIPTYPE', 'PROVIDER', 'METERFARE', 'TIP', 'SURCHARGE', 'EXTRAS', 'TOLLS', 'TOTALAMOUNT', 'PAYMENTTYPE', 'PAYMENTCARDPROVIDER', 'PICKUPCITY', 'PICKUPSTATE', 'PICKUPZIP', 'DROPOFFCITY', 'DROPOFFSTATE', 'DROPOFFZIP', 'TRIPMILEAGE', 'TRIPTIME', 'PICKUP_BLOCK_LATITUDE', 'PICKUP_BLOCK_LONGITUDE', 'PICKUP_BLOCKNAME', 'DROPOFF_BLOCK_LATITUDE', 'DROPOFF_BLOCK_LONGITUDE', 'DROPOFF_BLOCKNAME', 'AIRPORT', 'PICKUPDATETIME_TR', 'DROPOFFDATETIME_TR']
test.columns = cols

In [6]:
#Identify the columns that are of object type and numerical type

object_cols = []
for i in cols:
    if test[i].dtype == "object":
        object_cols.append(i)
        
print(object_cols)

['TRIPTYPE', 'PROVIDER', 'METERFARE', 'TIP', 'SURCHARGE', 'EXTRAS', 'TOLLS', 'PAYMENTTYPE', 'PAYMENTCARDPROVIDER', 'PICKUPCITY', 'PICKUPSTATE', 'PICKUPZIP', 'DROPOFFCITY', 'DROPOFFSTATE', 'DROPOFFZIP', 'TRIPMILEAGE', 'TRIPTIME', 'PICKUP_BLOCK_LATITUDE', 'PICKUP_BLOCK_LONGITUDE', 'PICKUP_BLOCKNAME', 'DROPOFF_BLOCK_LATITUDE', 'DROPOFF_BLOCK_LONGITUDE', 'DROPOFF_BLOCKNAME', 'AIRPORT', 'PICKUPDATETIME_TR', 'DROPOFFDATETIME_TR']


In [7]:
#Format each cell of each row
def trimSpaces(x):
    '''Function that removes prepending whitespace'''
    return str(x).strip()

In [8]:
#For numerical columns, identify them and apply a different approach
numerical_cols = []
for i in object_cols:
    try:
        test.loc[:, i] = test[i].apply(trimSpaces)
    except:
        numerical_cols.append(i)

In [9]:
test.head()

Unnamed: 0,OBJECTID,TRIPTYPE,PROVIDER,METERFARE,TIP,SURCHARGE,EXTRAS,TOLLS,TOTALAMOUNT,PAYMENTTYPE,...,TRIPTIME,PICKUP_BLOCK_LATITUDE,PICKUP_BLOCK_LONGITUDE,PICKUP_BLOCKNAME,DROPOFF_BLOCK_LATITUDE,DROPOFF_BLOCK_LONGITUDE,DROPOFF_BLOCKNAME,AIRPORT,PICKUPDATETIME_TR,DROPOFFDATETIME_TR
0,4897,PSP,United Ventures,11.89,3.65,0.25,0.25,0.0,15.79,CreditCard,...,,38.87648,-77.00005,400 - 499 BLOCK OF M STREET SE,38.900207,-77.010619,1 - 99 BLOCK OF H STREET NW,N,01/27/2016 17:00,01/27/2016 17:00
1,4899,PSP,United Ventures,8.11,0.0,0.25,1.25,0.0,9.36,Cash,...,,38.89632,-77.032789,1400 - 1499 BLOCK OF PENNSYLVANIA AVENUE NW,38.911315,-77.048257,2120 - 2199 BLOCK OF MASSACHUSETTS AVENUE NW,N,01/28/2016 22:00,01/28/2016 22:00
2,4900,PSP,United Ventures,5.68,2.0,0.25,1.25,0.0,8.93,CreditCard,...,,38.89632,-77.032789,1400 - 1499 BLOCK OF PENNSYLVANIA AVENUE NW,38.896671,-77.022954,500 - 599 BLOCK OF 8TH STREET NW,N,01/28/2016 20:00,01/28/2016 20:00
3,4901,PSP,United Ventures,12.16,3.3,0.25,4.25,0.0,19.71,CreditCard,...,,,,,38.900769,-77.033644,800 - 899 BLOCK OF 15TH STREET NW,Y,01/28/2016 21:00,01/28/2016 21:00
4,4902,PSP,United Ventures,23.5,0.0,0.25,0.25,0.0,23.75,Cash,...,,38.895433,-77.027047,400 - 499 BLOCK OF 11TH STREET NW,,,,N,01/28/2016 22:00,01/28/2016 23:00


In [10]:
numerical_cols

[]

In [11]:
#Formatting for numerical columns
def trimSpaces_Numerical(x):
    '''Function that removes prepending whitespace from numerical fields'''
    temp = str(x).strip()
    try:
        return float(temp)
    except ValueError:
        return float(0)

In [12]:
#Apply the formatting
for i in numerical_cols:
    test.loc[:, i] = test[i].apply(trimSpaces_Numerical)

In [13]:
#Rename the geometry columns of choice
test = test.rename(columns={'DROPOFF_BLOCK_LONGITUDE': 'x', 'DROPOFF_BLOCK_LATITUDE': 'y'})
test.head()

Unnamed: 0,OBJECTID,TRIPTYPE,PROVIDER,METERFARE,TIP,SURCHARGE,EXTRAS,TOLLS,TOTALAMOUNT,PAYMENTTYPE,...,TRIPTIME,PICKUP_BLOCK_LATITUDE,PICKUP_BLOCK_LONGITUDE,PICKUP_BLOCKNAME,y,x,DROPOFF_BLOCKNAME,AIRPORT,PICKUPDATETIME_TR,DROPOFFDATETIME_TR
0,4897,PSP,United Ventures,11.89,3.65,0.25,0.25,0.0,15.79,CreditCard,...,,38.87648,-77.00005,400 - 499 BLOCK OF M STREET SE,38.900207,-77.010619,1 - 99 BLOCK OF H STREET NW,N,01/27/2016 17:00,01/27/2016 17:00
1,4899,PSP,United Ventures,8.11,0.0,0.25,1.25,0.0,9.36,Cash,...,,38.89632,-77.032789,1400 - 1499 BLOCK OF PENNSYLVANIA AVENUE NW,38.911315,-77.048257,2120 - 2199 BLOCK OF MASSACHUSETTS AVENUE NW,N,01/28/2016 22:00,01/28/2016 22:00
2,4900,PSP,United Ventures,5.68,2.0,0.25,1.25,0.0,8.93,CreditCard,...,,38.89632,-77.032789,1400 - 1499 BLOCK OF PENNSYLVANIA AVENUE NW,38.896671,-77.022954,500 - 599 BLOCK OF 8TH STREET NW,N,01/28/2016 20:00,01/28/2016 20:00
3,4901,PSP,United Ventures,12.16,3.3,0.25,4.25,0.0,19.71,CreditCard,...,,,,,38.900769,-77.033644,800 - 899 BLOCK OF 15TH STREET NW,Y,01/28/2016 21:00,01/28/2016 21:00
4,4902,PSP,United Ventures,23.5,0.0,0.25,0.25,0.0,23.75,Cash,...,,38.895433,-77.027047,400 - 499 BLOCK OF 11TH STREET NW,,,,N,01/28/2016 22:00,01/28/2016 23:00


In [14]:
#Replace null values with 0s
test = test.fillna(0)

In [15]:
#Clean up geometry fields
test['x'] = test['x'].replace('','0').astype(float)
test['y'] = test['y'].replace('','0').astype(float)
test['x'] = test['x'].replace('NaN','0').astype(float)
test['y'] = test['y'].replace('NaN','0').astype(float)

In [16]:
test[['x', 'y']][:10]

Unnamed: 0,x,y
0,-77.010619,38.900207
1,-77.048257,38.911315
2,-77.022954,38.896671
3,-77.033644,38.900769
4,0.0,0.0
5,-76.995567,38.897333
6,-77.054339,38.923379
7,0.0,0.0
8,-77.042365,38.921623
9,-77.00471,38.89201


In [17]:
#Reset indices of the entire dataset to avoid index duplication
test.reset_index(inplace=True)

In [18]:
#Create a shape column based on the geometry
test['SHAPE'] = test.apply(lambda row : arcgis.geometry.Geometry({'x': row['x'], 'y': row['y']}), axis=1 )

In [19]:
test.SHAPE[:5]

0    {'x': -77.010619, 'y': 38.900207}
1    {'x': -77.048257, 'y': 38.911315}
2    {'x': -77.022954, 'y': 38.896671}
3    {'x': -77.033644, 'y': 38.900769}
4                 {'x': 0.0, 'y': 0.0}
Name: SHAPE, dtype: object

In [20]:
from arcgis import SpatialDataFrame

In [23]:
datetime.datetime.now().time()

datetime.time(22, 24, 7, 172579)

In [24]:
#Create an ArcGIS Spatial DataFrame with specified coordinate system to publish and project the taxi data in
new_sdf = SpatialDataFrame(test)
new_sdf.set_geometry('SHAPE', inplace=True, sr=4326)
new_sdf['SHAPE'] = new_sdf.geometry.project_as(4326)

In [25]:
datetime.datetime.now().time()

datetime.time(23, 21, 14, 765725)

In [26]:
new_sdf.shape

(10679050, 30)

In [27]:
new_sdf[['x', 'y', 'SHAPE']].head()

Unnamed: 0,x,y,SHAPE
0,-77.010619,38.900207,"{'x': -77.010619, 'y': 38.900207, 'spatialRefe..."
1,-77.048257,38.911315,"{'x': -77.048257, 'y': 38.911315, 'spatialRefe..."
2,-77.022954,38.896671,"{'x': -77.022954, 'y': 38.896671, 'spatialRefe..."
3,-77.033644,38.900769,"{'x': -77.033644, 'y': 38.900769, 'spatialRefe..."
4,0.0,0.0,"{'x': 0, 'y': 0, 'spatialReference': {'wkid': ..."


In [48]:
del test

In [28]:
datetime.datetime.now().time()

datetime.time(23, 24, 19, 900879)

In [29]:
type(new_sdf)

arcgis.features._data.geodataset.geodataframe.SpatialDataFrame

In [30]:
#Publish the data
mv_layer = gis.content.import_data(new_sdf, title='taxi_all_18', target_sr=4326, capabilities="Query,Editing")

In [31]:
datetime.datetime.now().time()

datetime.time(2, 20, 13, 373663)

In [57]:
del new_sdf

In [32]:
mv_layer