In [205]:
import pandas as pd
import re
import numpy as np

In [206]:
# Specify path and load the csv file
path = '/Users/Dustin/Desktop/Food Deserts/csv_files/'

business_csv = path + 'Listing_of_Active_Businesses.csv'

pd.set_option('display.max_columns', 500)

df_business = pd.read_csv(business_csv)


In [207]:
# Return a dataframe that contains market or food.
df_business_market = df_business[df_business['PRIMARY NAICS DESCRIPTION'].str.contains('market|grocery', flags=re.IGNORECASE, regex=True) == True]

In [208]:
# Return a dataframe that doesn't contain 'telephone|business|research'
df_business_market_clean = df_business_market[~df_business_market['PRIMARY NAICS DESCRIPTION'].str.contains('telephone|business|research') == True]

In [209]:
# Extract Latitude and put it in a column
df_business_market_clean['LATITUDE'] = df_business_market_clean['LOCATION'].str.extract('\((.*),').astype(float)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [210]:
# Extract Longitude and put it in a column
df_business_market_clean['LONGITUDE'] = df_business_market_clean['LOCATION'].str.extract(',(.*)\)').astype(float)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [211]:
# Remove coordinates outside of LA
df_business_market_clean = df_business_market_clean[(df_business_market_clean['LONGITUDE'] < -117.7) & (df_business_market_clean['LONGITUDE'] > -118.9)]

In [212]:
# Dataframe with no Nulls for location
df_test = df_business_market_clean.dropna(subset=['LOCATION'])

In [74]:
# Export
#df_test.to_csv('clean.csv', index=False)

# Change to a different Map Projection

In [213]:
import pyproj

project_projection = pyproj.Proj("+init=EPSG:3857") # Output map projections
google_projection = pyproj.Proj("+init=EPSG:4326") # Input map projections

# Save Coordinates to a list
longitude = df_test['LONGITUDE'].values
latitude = df_test['LATITUDE'].values

# Output mercator map projections where x=longitude and y=latitude
x, y = pyproj.transform(google_projection, project_projection, longitude, latitude)

# Bokeh Plot

In [214]:
from bokeh.plotting import figure, show, output_file
from bokeh.tile_providers import get_provider, Vendors
from bokeh.models import ColumnDataSource
from bokeh.models import HoverTool, WheelZoomTool, PanTool, BoxZoomTool, ResetTool, TapTool, SaveTool
from bokeh.models import ColumnDataSource, Range1d, LabelSet, Label

In [215]:
# HTML file
output_file('market_location.html')
# Name of tile used for plotting
tile = get_provider(Vendors.STAMEN_TERRAIN)

# Instantiate the figure
p = figure(plot_width=1000, plot_height=1000, x_axis_type = "mercator", y_axis_type = "mercator")

# Define the data to a dictionary
source = ColumnDataSource(data = dict(lat=y,lon=x))

# Add the tile/map
p.add_tile(tile)
# Plot it to the tile
p.circle(x='lon', y='lat', source=source, alpha=0.1)

# Display the map
show(p)

# GEOPY - Get the distance of 'n' miles

In [216]:
import geopy
import geopy.distance

def travel(lat, lon, bearing=0, miles=0.5):
    # Starting point
    start = geopy.Point(lat, lon)

    # Initialized with a distance of 2 mi.
    d = geopy.distance.geodesic(miles=miles)

    # Destination method which takes the starting point and bearings(North=0).
    # Return coordinates from start to distance,d, traveled
    coord = d.destination(point=start, bearing=bearing)
    return coord
    # printing(coord) gives degrees, min, and sec
    # Slice it coord gets you google mapping

# SHAPELY - Make a circle around the target and the groceries within the circle

In [217]:
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon

In [218]:
def make_circle(lat, lon, miles=0.5):
    circle_points = []  
    for i in range(360):
        new_point = travel(lat, lon, bearing=i, miles=miles)
        new_point = new_point[0], new_point[1]
        circle_points.append(new_point)
    circle = Polygon(circle_points)
    return circle

In [219]:
def get_groceries_within_circle(df, circle):
    df_test_copy = df.copy()
    in_circle = []
    for i in range(len(df_test_copy)):
        lat = df_test_copy.iloc[i]['LATITUDE']
        lon = df_test_copy.iloc[i]['LONGITUDE']
        point = Point(lat, lon)
        
        if circle.contains(point):
            # In Circle
            in_circle.append(True)
        else:
            # Not In Circle
            in_circle.append(False)

    df_test_copy['in_circle'] = in_circle

    # Returns a data frame containing all grocery stores within the circle
    return df_test_copy[df_test_copy['in_circle'] == True]

In [220]:
inputs = 34.0401, -118.2106, 0.25

In [221]:
circle = make_circle(input[0], input[1])

In [222]:
get_groceries_within_circle(df_test, circle)

Unnamed: 0,LOCATION ACCOUNT #,BUSINESS NAME,DBA NAME,STREET ADDRESS,CITY,ZIP CODE,LOCATION DESCRIPTION,MAILING ADDRESS,MAILING CITY,MAILING ZIP CODE,NAICS,PRIMARY NAICS DESCRIPTION,COUNCIL DISTRICT,LOCATION START DATE,LOCATION END DATE,LOCATION,LATITUDE,LONGITUDE,in_circle


# Plot the avaialble stores if there's any

In [223]:
import pyproj
def transform_df(df):
    # Dataframe from get_groceries_within_circle
    project_projection = pyproj.Proj("+init=EPSG:3857") # Output map projections
    google_projection = pyproj.Proj("+init=EPSG:4326") # Input map projections

    # Save Coordinates to a list
    longitude = df['LONGITUDE'].values
    latitude = df['LATITUDE'].values

    # Output mercator map projections where x=longitude and y=latitude
    x, y = pyproj.transform(google_projection, project_projection, longitude, latitude)
    return x,y

In [224]:
def transform_input(input_x, input_y):
    #input_x = latitude value & input_y = longitude
    project_projection = pyproj.Proj("+init=EPSG:3857") # Output map projections
    google_projection = pyproj.Proj("+init=EPSG:4326") # Input map projections

    # Save Coordinates to a list
    longitude = input_y
    latitude = input_x

    # Output mercator map projections where x=longitude and y=latitude
    x, y = pyproj.transform(google_projection, project_projection, longitude, latitude)
    return x, y

In [225]:
# Function to plot the list of stores returned from get_groceries_within_circle
def plot_location(df_x, df_y, input_x, input_y):
    # HTML file
    output_file('market_location.html')
    # Name of tile used for plotting
    tile = get_provider(Vendors.STAMEN_TERRAIN)

    # Instantiate the figure
    p = figure(plot_width=1000, plot_height=1000, x_axis_type = "mercator", y_axis_type = "mercator")

    # Define the data to a dictionary
    source = ColumnDataSource(data = dict(lat=df_y,lon=df_x))
    
    # Add the tile/map
    p.add_tile(tile)
    
    # Plot it to the tile
    p.circle(x='lon', y='lat', source=source, size=15)

    # Plot the center
    p.circle(x=input_x, y=input_y, size=15, color='red')
    
    # Display the map
    return show(p)

# Test plots

In [231]:
# Lat, long, and miles inputs
input = 34., -118.2550

In [232]:
# Make circle around input
circle = make_circle(input[0], input[1])

In [233]:
# Get Groceries 
available_markets = get_groceries_within_circle(df_test, circle)

In [234]:
# Transform the input location to the correct coordinates
input_x, input_y =transform_input(input[0], input[1])

In [235]:
# Save list of latitude and longitude values
# Will give an error if there's no dataframe
df_x, df_y = transform_df(available_markets)

In [236]:
# Returns html file of a bokeh plot - Red is the input location, Blue is the available markets
plot_location(df_x, df_y, input_x, input_y)