#### Details  
This notebook has below implementation:
1. Extract California Businesses from postgres nourish db table `ca_business`
2. Filter out only Fast Food Restaurants and Convenience Stores.
2. Convert the lat longs to POINT SHAPES.
3. Publish the final spatially enabled dataframe as Feature Layer Collection Item.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import sqlalchemy as sal
import psycopg2
import arcgis
from arcgis.gis import GIS
from arcgis.features import FeatureLayer, FeatureLayerCollection
import sys
sys.path.append('../../')
sys.path.append('../../../')
from gis_resources import san_diego_county_zips
import os
from utils import get_config
from arcgis.features import GeoAccessor

In [2]:
gis = GIS("https://ucsdonline.maps.arcgis.com/home", client_id=get_config("arcgis","clientid"))

<configparser.ConfigParser object at 0x10dacb5e0>
Please sign in to your GIS and paste the code that is obtained below.
If a web browser does not automatically open, please navigate to the URL below yourself instead.
Opening web browser to navigate to: https://ucsdonline.maps.arcgis.com/sharing/rest/oauth2/authorize?response_type=code&client_id=Elm5V3upnnV17Q3r&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&state=5nEcNRFc3NSPbgMKuxBvfV8ccyh8M7&allow_verification=false
Enter code obtained on signing in using SAML: ········




In [3]:
nourish_user = get_config("nourish_db","username")
nourish_pswd = get_config("nourish_db","passkey")

<configparser.ConfigParser object at 0x134579370>
<configparser.ConfigParser object at 0x134562fd0>


In [4]:
conn = psycopg2.connect(
    host="awesome-hw.sdsc.edu",
    database="nourish",
    user=nourish_user,
    password=nourish_pswd)

In [5]:
# create a cursor
cur = conn.cursor()
        

# execute a statement
cur.execute("""select * from ca_business 
                where ('Fast food restaurant' = any(categories) OR
                     'Convenience store' = any(categories))""")

        # display the PostgreSQL database server version
ca_business_result = cur.fetchall()
       
	# close the communication with the PostgreSQL
cur.close()

In [6]:
ca_business_result_list = [(row[1],row[4],row[5],row[7],row[8],row[9]) for row in ca_business_result]
#lat_long_list[:10]

In [7]:
# Create a DataFrame
# There could be some limitation to keeping `categories` as column since arcgis was messing up the data
# , could it be a keyword ? Let's just change the name to 'business_type'
dataFrame = pd.DataFrame(ca_business_result_list,
              columns=("biz_name","latitude", "longitude","zip", "biz_type","city")
              );

In [8]:
dataFrame.head(2)

Unnamed: 0,biz_name,latitude,longitude,zip,biz_type,city
0,Chipotle Mexican Grill,34.0132844,-118.336625,90008.0,"[Mexican restaurant, Caterer, Fast food restau...",Los Angeles
1,Champion Grill Buffet,37.3197598,-120.4759953,95348.0,"[Buffet restaurant, Asian restaurant, Chinese ...",Merced


In [9]:
# since some of the zip are not present in ca_business table
dataFrame = dataFrame[~dataFrame['zip'].isna()]

In [10]:
dataFrame = dataFrame.astype({"zip": int})

In [11]:
# Pick the data for only San Diego County
# san_diego_county_zips_lst = san_diego_county_zips()
# san_diego_county_zips_lst[:10]

In [12]:
# dataFrame = dataFrame[dataFrame["zip"].isin(san_diego_county_zips_lst)]
# dataFrame.head(2)

In [13]:
ca_business_sgdf = pd.DataFrame.spatial.from_xy(dataFrame.copy(), 'longitude', 'latitude')

In [14]:
# Converting the dataframe with lat/long column to geo dataframe using geopandas
#gdf = geopandas.GeoDataFrame(dataFrame, geometry=geopandas.points_from_xy(dataFrame.longitude, dataFrame.latitude))

In [15]:
#ca_business_sgdf = GeoAccessor.from_geodataframe(gdf, inplace=False, column_name='SHAPE')

In [16]:
#ca_business_sgdf.info()

In [18]:
#ca_business_sgdf.head(5)
ca_business_sgdf['biz_type'].fillna('',inplace=True)
ca_business_sgdf['biz_type']=ca_business_sgdf.biz_type.apply(lambda x: ';'.join(x))
#ca_business_sgdf

In [19]:
ca_business_sgdf=ca_business_sgdf.drop(['latitude','longitude'], axis=1)

In [20]:
ca_business_sgdf['zip'] = ca_business_sgdf['zip'].astype(str)

In [21]:
ca_business_sgdf.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 22649 entries, 0 to 22654
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   biz_name  22649 non-null  object  
 1   zip       22649 non-null  object  
 2   biz_type  22649 non-null  object  
 3   city      22603 non-null  object  
 4   SHAPE     22649 non-null  geometry
dtypes: geometry(1), object(4)
memory usage: 1.0+ MB


In [22]:
print(f"Shape of Dataframe: {ca_business_sgdf.shape}")

Shape of Dataframe: (22649, 5)


In [23]:
%%time
# Convert back from a SEDF into a feature layer Collection, and publishing on AGOL
feature_layer_collection_item = ca_business_sgdf.spatial.to_featurelayer(title="Fast Food Restaurants And Convenience Stores in California", 
                                                         gis=gis, 
                                                         folder='nourish_gis',
                                                         tags=['FastFood','ConvenienceStores'],
                                                        )

CPU times: user 15 s, sys: 651 ms, total: 15.6 s
Wall time: 51.2 s


In [24]:
feature_layer_collection_item