In [61]:
import pandas as pd
import re
import numpy as np

In [62]:
# Specify path and load the csv file
path = '/Users/Dustin/Desktop/Food Deserts/csv_files/'

business_csv = path + 'Listing_of_Active_Businesses.csv'

pd.set_option('display.max_columns', 500)

df_business = pd.read_csv(business_csv)


In [63]:
# Return a dataframe that contains market or food.
df_business_market = df_business[df_business['PRIMARY NAICS DESCRIPTION'].str.contains('market|grocery', flags=re.IGNORECASE, regex=True) == True]

In [64]:
# Return a dataframe that doesn't contain 'telephone|business|research'
df_business_market_clean = df_business_market[~df_business_market['PRIMARY NAICS DESCRIPTION'].str.contains('telephone|business|research') == True]

In [65]:
# Extract Latitude and put it in a column
df_business_market_clean['LATITUDE'] = df_business_market_clean['LOCATION'].str.extract('\((.*),').astype(float)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [66]:
# Extract Longitude and put it in a column
df_business_market_clean['LONGITUDE'] = df_business_market_clean['LOCATION'].str.extract(',(.*)\)').astype(float)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [67]:
# Remove coordinates outside of LA
df_business_market_clean = df_business_market_clean[(df_business_market_clean['LONGITUDE'] < -117.7) & (df_business_market_clean['LONGITUDE'] > -118.9)]

In [68]:
# Dataframe with no Nulls for location
df_test = df_business_market_clean.dropna(subset=['LOCATION'])

In [74]:
# Export
df_test.to_csv('clean.csv', index=False)

# Change to a different Map Projection

In [70]:
import pyproj

project_projection = pyproj.Proj("+init=EPSG:3857") # Output map projections
google_projection = pyproj.Proj("+init=EPSG:4326") # Input map projections

# Save Coordinates to a list
longitude = df_test['LONGITUDE'].values
latitude = df_test['LATITUDE'].values

# Output mercator map projections where x=longitude and y=latitude
x, y = pyproj.transform(google_projection, project_projection, longitude, latitude)

# Bokeh Plot

In [155]:
from bokeh.plotting import figure, show, output_file
from bokeh.tile_providers import get_provider, Vendors
from bokeh.models import ColumnDataSource
from bokeh.models import HoverTool, WheelZoomTool, PanTool, BoxZoomTool, ResetTool, TapTool, SaveTool
from bokeh.models import ColumnDataSource, Range1d, LabelSet, Label

In [72]:
# HTML file
output_file('market_location.html')
# Name of tile used for plotting
tile = get_provider(Vendors.STAMEN_TERRAIN)

# Instantiate the figure
p = figure(plot_width=1000, plot_height=1000, x_axis_type = "mercator", y_axis_type = "mercator")

# Define the data to a dictionary
source = ColumnDataSource(data = dict(lat=y,lon=x))

# Add the tile/map
p.add_tile(tile)
# Plot it to the tile
p.circle(x='lon', y='lat', source=source, alpha=0.1)

# Display the map
show(p)

# GEOPY - Get the distance of 'n' miles

In [14]:
import geopy
import geopy.distance

def travel(lat, lon, bearing=0, miles=0.5):
    # Starting point
    start = geopy.Point(lat, lon)

    # Initialized with a distance of 2 mi.
    d = geopy.distance.geodesic(miles=miles)

    # Destination method which takes the starting point and bearings(North=0).
    # Return coordinates from start to distance,d, traveled
    coord = d.destination(point=start, bearing=bearing)
    return coord
    # printing(coord) gives degrees, min, and sec
    # Slice it coord gets you google mapping

# SHAPELY - Make a circle around the target and the groceries within the circle

In [15]:
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon

In [16]:
def make_circle(lat, lon, miles=0.5):
    circle_points = []  
    for i in range(360):
        new_point = travel(lat, lon, bearing=i, miles=miles)
        new_point = new_point[0], new_point[1]
        circle_points.append(new_point)
    circle = Polygon(circle_points)
    return circle

In [17]:
def get_groceries_within_circle(df, circle):
    df_test_copy = df.copy()
    in_circle = []
    for i in range(len(df_test_copy)):
        lat = df_test_copy.iloc[i]['LATITUDE']
        lon = df_test_copy.iloc[i]['LONGITUDE']
        point = Point(lat, lon)
        
        if circle.contains(point):
            # In Circle
            in_circle.append(True)
        else:
            # Not In Circle
            in_circle.append(False)

    df_test_copy['in_circle'] = in_circle

    # Returns a data frame containing all grocery stores within the circle
    return df_test_copy[df_test_copy['in_circle'] == True]

In [18]:
circle = make_circle(34.0401, -118.2106, 0.25)

In [76]:
available_markets = get_groceries_within_circle(df_test, circle)
available_markets

Unnamed: 0,LOCATION ACCOUNT #,BUSINESS NAME,DBA NAME,STREET ADDRESS,CITY,ZIP CODE,LOCATION DESCRIPTION,MAILING ADDRESS,MAILING CITY,MAILING ZIP CODE,NAICS,PRIMARY NAICS DESCRIPTION,COUNCIL DISTRICT,LOCATION START DATE,LOCATION END DATE,LOCATION,LATITUDE,LONGITUDE,in_circle
2148,0002583451-0001-3,LAING SING HUANG / KARIE KHENG ING,SILVER MOON MARKET,2501 E 4TH STREET,LOS ANGELES,90033-4418,2501 4TH 90033-4418,,,,424400.0,Grocery & related products,14,10/24/2011,,"(34.0401, -118.2106)",34.0401,-118.2106,True
128700,0002087138-0003-1,NORTHGATE GONZALEZ LLC,NORTHGATE MARKET # 37,425 S SOTO STREET,LOS ANGELES,90033-4315,425 SOTO 90033-4315,1201 N MAGNOLIA AVENUE,ANAHEIM,92801-2609,445100.0,Grocery stores (including supermarkets & conve...,14,09/18/2012,,"(34.0401, -118.212)",34.0401,-118.212,True
142971,0002456504-0001-4,TORREZ ENTERPRISE INC,MI BARRIO MARKET,2432 E 1ST STREET,LOS ANGELES,90033-3502,2432 1ST 90033-3502,POST OFFICE BOX #30,WHITTIER,90608-0030,445100.0,Grocery stores (including supermarkets & conve...,14,08/04/2009,,"(34.0432, -118.2087)",34.0432,-118.2087,True
239466,0002884462-0001-5,WILFRIDO ROMERO,,2615 E 2ND STREET APARTMENT #3,LOS ANGELES,90033-4139,2615 2ND 90033-4139,,,,445230.0,Fruit & vegetable markets,14,01/10/2015,,"(34.041, -118.2077)",34.041,-118.2077,True
331885,0000032242-0004-7,SMART & FINAL STORES LLC,SMART/FINAL #345 | SMART/FINAL IRIS CO DIV #345,2308 E 4TH STREET,LOS ANGELES,90033-4306,2308 4TH 90033-4306,POST OFFICE BOX #512377,LOS ANGELES,90051-0377,445100.0,Grocery stores (including supermarkets & conve...,14,01/01/1953,,"(34.0411, -118.2129)",34.0411,-118.2129,True


# Plot the avaialble stores if there's any

In [180]:
import pyproj
def transform_df(df):
    # Dataframe from get_groceries_within_circle
    project_projection = pyproj.Proj("+init=EPSG:3857") # Output map projections
    google_projection = pyproj.Proj("+init=EPSG:4326") # Input map projections

    # Save Coordinates to a list
    longitude = df['LONGITUDE'].values
    latitude = df['LATITUDE'].values

    # Output mercator map projections where x=longitude and y=latitude
    x, y = pyproj.transform(google_projection, project_projection, longitude, latitude)
    return x,y

In [181]:
def transform_input(input_x, input_y):
    #input_x = latitude value & input_y = longitude
    project_projection = pyproj.Proj("+init=EPSG:3857") # Output map projections
    google_projection = pyproj.Proj("+init=EPSG:4326") # Input map projections

    # Save Coordinates to a list
    longitude = input_y
    latitude = input_x

    # Output mercator map projections where x=longitude and y=latitude
    x, y = pyproj.transform(google_projection, project_projection, longitude, latitude)
    return x, y

In [182]:
# Save list of latitude and longitude values
df_x, df_y = transform_df(available_markets)

In [183]:
# Transform the input location to the correct coordinates
input_x, input_y =transform_input(34.0401, -118.2106)
input_x, input_y

(-13159143.798367346, 4034187.7481705165)

In [184]:
# Function to plot the list of stores returned from get_groceries_within_circle
def plot_location(df_x, df_y, input_x, input_y):
    # HTML file
    output_file('market_location.html')
    # Name of tile used for plotting
    tile = get_provider(Vendors.STAMEN_TERRAIN)

    # Instantiate the figure
    p = figure(plot_width=1000, plot_height=1000, x_axis_type = "mercator", y_axis_type = "mercator")

    # Define the data to a dictionary
    source = ColumnDataSource(data = dict(lat=df_y,lon=df_x))
    
    # Add the tile/map
    p.add_tile(tile)
    
    # Plot it to the tile
    p.circle(x='lon', y='lat', source=source, size=15)

    # Plot the center
    p.circle(x=input_x, y=input_y, size=15, color='red')
    
    # Display the map
    return show(p)

In [185]:
# Returns html file of a bokeh plot
plot_location(df_x, df_y, input_x, input_y)

# TEST PLOTS

In [178]:
# Function to plot the list of stores returned from get_groceries_within_circle
def plot_test(df, df_x, df_y, input_x, input_y):
    # HTML file
    output_file('market_location.html')
    # Name of tile used for plotting
    tile = get_provider(Vendors.STAMEN_TERRAIN)

    # Instantiate the figure
    p = figure(plot_width=1000, plot_height=1000, x_axis_type = "mercator", y_axis_type = "mercator")

    # Define the data to a dictionary
    source = ColumnDataSource(data = dict(lat=df_y,lon=df_x, business=df['BUSINESS NAME']))
    
    p.circle(x='lat', y='lon', size=8, source=source)
    labels = LabelSet(x='lat', y='lon', text='business',
                      level='glyph', x_offset=5, y_offset=5,
                      source=source, render_mode='canvas')

    p.add_layout(labels)
    
    # Add the tile/map
    p.add_tile(tile)

    
    # Display the map
    return show(p)

In [179]:
plot_test(available_markets, df_x, df_y, input_x, input_y)