In [9]:
import warnings
warnings.filterwarnings("ignore")

%run imports_and_functions.ipynb
%run Yelp_GetBusinessList_Functions.ipynb
%run Xmatrix_Pipeline_Functions.ipynb

gmaps.configure(api_key=joblib.load('GooglemapsAPI_KEY.joblib')) # Jason's API key
cat_alias_dict = json.load(open('cat_alias_dict.json','r'))

In [2]:
def getzipcodecoords(state_abrev, chosen_zip) :

    with open('./zipcodes_geojson/' + state_abrev + '.json') as f: 
        jsondict = json.load(f)

        coord_dict = {}
        chosen_zip = str(chosen_zip)
        coordlist = []
        for rawdict in jsondict['features'] :
            zipcode = rawdict['properties']['ZCTA5CE10']
            if zipcode == chosen_zip :
                coordlist = rawdict['geometry']['coordinates'][0]
                if type(coordlist[0][0]) != float :
                    coordlist = coordlist[0]
                result = []
                for item in coordlist :
                    try :
                        long, lat = item[0], item[1]
                        result += [(lat, long)]
                    except : pass
                coord_dict[zipcode] = result
                return result
        if coordlist == [] : 
            return 'error zipcode not found'

# getzipcodecoords('ma', '02420')

In [9]:
# zipcode.to_dict() -> {'housing_units': 12476, 'post_office_city': u'New York, NY', 
# 'county': u'New York County', 'population_density': 33959.0, 'radius_in_miles': 0.9090909090909091,
# 'timezone': u'Eastern', 'lng': -73.99, 'common_city_list': [u'New York'], 'zipcode': u'10001', 
# 'state': u'NY', 'major_city': u'New York', 'population': 21102, 'land_area_in_sqmi': 0.62, 
# 'lat': 40.75, 'median_household_income': 81671, 'occupied_housing_units': 11031, 
# 'median_home_value': 650200, 'water_area_in_sqmi': 0.0}

def addzipcodepolygon(lat, long, fig) :
    from uszipcode import SearchEngine
    search = SearchEngine(simple_zipcode=True)
    result = search.by_coordinates(lat, long, radius=20, returns=1)
    result = result[0].to_dict()
    state_abrev = result['state'].lower()
    chosen_zip = result['zipcode']
    coordlist = getzipcodecoords(state_abrev, chosen_zip)

    polygon = gmaps.Polygon( coordlist, stroke_color='blue', fill_color='blue' )
    drawing = gmaps.drawing_layer(features=[polygon], show_controls=False)
    fig.add_layer(drawing)

def addbusinesscat(category, fig, lat, long, df, color) :
    mask = [(True if category in str(string) else False) for string in df['main_category'].values]
    gmapdf = df[mask].copy()
    gmapdf['temp_dist'] = calcDistances(lat, long, gmapdf)
    gmapdf = gmapdf[gmapdf['temp_dist'] < search_radius*2]
    infoboxlist = []
    for index in gmapdf.index :
        row = gmapdf.loc[index,displaycols]
        lis = list(map((lambda col, value : col.capitalize() + ': ' + str(value)), 
                       displaycols, row))
        text = ', '.join(lis)
        infoboxlist.append(text)
    address_layer = gmaps.symbol_layer(gmapdf[['latitude','longitude']],
        info_box_content=infoboxlist, fill_color = color, stroke_color=color, scale=2)
    fig.add_layer(address_layer) 
    return address_layer

In [268]:
# lat, long = 35.223695, -80.841573 # Charlotte, NC

# figure_layout = { 'width': '100%', 'height': '700px'}
# fig = gmaps.figure(center = (lat, long), zoom_level = 14, layout=figure_layout) # zoom should be 13-15
# addzipcodepolygon(lat, long, fig)

In [35]:
# addbusinesscat('food', fig, lat, long, df, 'blue')
# addbusinesscat('active', fig, lat, long, df, 'green')
# # fig

In [32]:
class UserSearch(object) : 
    
    def __init__(self, fig, location) :         
        gmaps.configure(api_key=joblib.load('GooglemapsAPI_KEY.joblib'))
        self._figure = fig 
        self._df = None
        self.latitude = location[0]
        self.longitude = location[1]
        self.location = location
        self._checkboxlist = []
        self._max_radius = 1000
        self._drawing = gmaps.drawing_layer( show_controls=False )
        self._drawing1 = gmaps.drawing_layer( show_controls=False )
        self._drawing2 = gmaps.drawing_layer( show_controls=False )
        self._figure.add_layer(self._drawing)
        self._figure.add_layer(self._drawing1)
        self._figure.add_layer(self._drawing2) 
        self._drawing.on_new_feature(self._marker_search_callback) 
        self._displaycols = ['name','price','main_category','stars','review_count']
        
        self.searchable_cats = {'Restaurants':1,'Shopping':2,'Food':3,'Beauty & Spas':4,
                                'Home Services':5,'Health & Medical':6,'Local Services':7,
                                'Automotive':8,'Nightlife':9,'Event Planning & Services':10,
                                'Active Life':11,'Fashion':12}
        self._color_dict = {
            'Restaurants':'blue','Shopping':'#EE82EE','Food':'#A52A2A','Beauty & Spas':'red',
            'Home Services':'#FFA500','Health & Medical':'yellow','Local Services':'green',
            'Automotive':'gray','Nightlife':'#FFB6C1','Event Planning & Services':'black',
            'Active Life':'aqua','Fashion':'#228B22'
        }
        
        self._search1_layers = []
        self._search2_layers = []
        self._is_current_layer1 = True  # layers 1 or 2, right above, with 12 drawing layers for each cat
        self._cat_on_off_list = [True for _ in self.searchable_cats]
        for _ in self.searchable_cats :
            _symbol_layer1 =  gmaps.drawing_layer( show_controls=False )
            _symbol_layer2 =  gmaps.drawing_layer( show_controls=False )
            self._figure.add_layer(_symbol_layer1)
            self._figure.add_layer(_symbol_layer2)
            self._search1_layers += [_symbol_layer1]
            self._search2_layers += [_symbol_layer2]
            
        self._get_yelp_businesses(location)

        self._search_box = widgets.Text( 
            description='Model result: ', disabled=True, 
            layout={'width': '95%', 'margin': '10px 0 0 0'} ) 
        self._search_box.value = 'Place marker to find businesses around it and predict its income!'
        self._title_widget = widgets.HTML( 
            '<h3>Explore City Businesses and Predict Area Income Level</h3>') 
        self._controls = self._render_controls()
#         self._map_figure = self._render_map()
        self._container = widgets.VBox( [self._title_widget, self._controls[0], self._controls[1],
                                         self._controls[2], self._search_box, self._figure,
                                        ])

    def _render_controls(self) :
        for cat in self.searchable_cats :
            widget = widgets.Button( value=False, description=cat)
            widget.on_click( self._button_press) 
            self._checkboxlist += [widget]
        clear_widget = widgets.Button( value=False, description='Clear Map')
        clear_widget.on_click( self._clear_map) 
        controls = (widgets.HBox( [*self._checkboxlist[0:6]]),
                    widgets.HBox( [*self._checkboxlist[6:]]),
                    clear_widget
                   )
        return controls
        
    def _clear_map(self, button) : 
        self._search_box.value = ''
        self._drawing1.features = []
        self._drawing1.features = []
        self._drawing2.features = []
        for layer in self._search1_layers :
            layer.features = []
        for layer in self._search2_layers :
            layer.features = []

    def _marker_search_callback(self, feature) :
        self._search_box.value = "Extracting, Modeling and Computing..."
        try:
            self._drawing.features = [feature]
            location = feature.location
            self._get_yelp_businesses(location)
            results = 'Prediction of 4 income bracket Logistic Model for zipcode, '
            results += str(self.latlong_tozipcode(location))
            results += '. Average income: ' + self.predict_income(location)
            self._search_box.value = results
#             print(location, self._search_box.value)
        except AttributeError:
            print('error')
    
    def predict_income(self, location) :
        df = make_Xmatrix(self._df)
        corpus = makecorpus(df, location[0], location[1])
        
        X_features = joblib.load('X_features.joblib')
        vect = CountVectorizer(dtype='uint16',min_df=1, vocabulary=X_features,
                               decode_error='ignore', lowercase=True) 
        X_sparse = vect.transform([corpus])
        dfvect = pd.DataFrame(X_sparse.toarray(),dtype='uint16',columns=X_features)
        standardscaler = joblib.load('4brackets_StandardScaler.joblib')
        pca = joblib.load('4brackets_PCA.joblib')
        X_test_scaled = standardscaler.transform(dfvect)
        Z_test = pca.transform(X_test_scaled)
        
        logreg_pca = joblib.load('mediocre_logistic_4brackets.joblib')
        pred = logreg_pca.predict(Z_test)
        brackets = {0:'$0-$20.99k',1:'$21-$40.99k',2:'$41-$60.99k',3:'$61+k'}
        return brackets[pred[0]]
    
    def _get_yelp_businesses(self, location) :
        if location == None :
            startlat = 42.349397
            startlong = -71.092480 # Boston, MA
        else :
            startlat, startlong = location
        self.addzipcodepolygon(location)
        area_search = AreaData('User_Search', coords=(startlat, startlong), radius=self._max_radius)
        self._df = area_search.extract_yelp_businesses(coords=(startlat, startlong))
#         print(location, self._df.shape)      
        self._update_symbol_layers(location)

    def _update_symbol_layers(self, location) :
        self._is_current_layer1 = not self._is_current_layer1
        for business_cat in self.searchable_cats :
            self._update_business_cat(business_cat, location)
        
    def _button_press(self, button):
        try :
#             matched_symbol_layer = self.searchable_cats[button.description]
            cat_index = self.searchable_cats[button.description]
            on_now = self._cat_on_off_list[cat_index]
            self._cat_on_off_list[cat_index] = not on_now
            if on_now == True :
                self._search1_layers[cat_index].features = []
                self._search2_layers[cat_index].features = []
            else : self._update_business_cat(button.description, (self.location))
        except : pass
    
    def addzipcodepolygon(self, location) :
        lat, long = location
        from uszipcode import SearchEngine
        search = SearchEngine(simple_zipcode=True)
        result = search.by_coordinates(lat, long, radius=20, returns=1)
        result = result[0].to_dict()
        state_abrev = result['state'].lower()
        chosen_zip = result['zipcode']
        coordlist = getzipcodecoords(state_abrev, chosen_zip)

        polygon = gmaps.Polygon( coordlist, stroke_color='blue', fill_color='blue' )
        self._drawing2.features = self._drawing1.features
        self._drawing1.features = [polygon]

    def _update_business_cat(self, business_cat, location) :
        lat, long = location
        alias = cat_alias_dict[business_cat]
        try :
            mask = self._df['main_category'].str.contains(alias)
            gmapdf = self._df[mask].copy()
            if gmapdf.shape[0] != 0 :
                gmapdf['temp_dist'] = calcDistances(lat, long, gmapdf)
                gmapdf = gmapdf[gmapdf['temp_dist'] < self._max_radius*2]
                infoboxlist = []
                for index in gmapdf.index :
                    row = gmapdf.loc[index,self._displaycols]
                    lis = list(map((lambda col, value : col.capitalize() + ': ' + str(value)), 
                                   self._displaycols, row))
                    text = ', '.join(lis)
                    infoboxlist.append(text)
                cat_index = self.searchable_cats[business_cat]
                if self._is_current_layer1 == True :
                    symbol_layer = self._search1_layers[cat_index]
                elif self._is_current_layer1 == False :
                    symbol_layer = self._search2_layers[cat_index]
#                 symbol_layer = self.searchable_cats[business_cat]
                color = self._color_dict[business_cat]
                symbol_layer.features = self._make_gmap_symbols(gmapdf[['latitude','longitude']],
                                                               infoboxlist, color)
        except : 
            pass
#             print(self._is_current_layer1, 'error in _update_business_cat')

    def _make_gmap_symbols(self, df, infoboxlist, color) :
        gmap_symbols = []
        next_infobox = iter(infoboxlist)
        for row in df.index :
            location = df.loc[row, 'latitude'], df.loc[row, 'longitude']
            gmap_symbols += [gmaps.Symbol(location=location, fill_color=color, stroke_color=color,
                                         info_box_content=next(next_infobox), scale=2)]
        return gmap_symbols
        
    def latlong_tozipcode(self, tupl) :
        search = SearchEngine(simple_zipcode=True)
        try :
            (lat, long) = tupl
            result = search.by_coordinates(lat, long, radius=10, returns=1)
            int(result[0].to_dict()['zipcode'])
        except : return 0
        return int(result[0].to_dict()['zipcode'])
    
    def render(self) : 
        display(self._container)


In [33]:
# Sample Test Cases
lat, long = 25.786686, -80.237440 # Miami, FL
lat, long = 35.223695, -80.841573 # Charlotte, NC

fig = gmaps.figure(center = (lat, long), zoom_level = 13, layout={ 'width': '100%', 'height': '700px'}) # zoom should be 13-15
test = UserSearch(fig, (lat, long))

# test.addzipcodepolygon((lat-.01, long-.1))

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return getattr(section, self.name)[new_key]


In [34]:
test.render()

VBox(children=(HTML(value='<h3>Explore City Businesses and Predict Area Income Level</h3>'), HBox(children=(Bu…