In [181]:
import sys
import os
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.pipeline import make_pipeline
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import Normalizer, StandardScaler
from sklearn.externals import joblib

import folium
from folium import plugins

import matplotlib.pyplot as plt

home_folder = os.environ["home_folder"]
sys.path.append(os.path.join(home_folder, "airbnb_app/airbnb/web_app/flaskexample/"))
import airbnb_pipeline

#sql stuff:
from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database
import psycopg2
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [68]:
listings.head()

Unnamed: 0,id,neighborhood_overview,price,diff,listing_url,name
0,685006,The apartment is right across the street from ...,75.0,3.206529,https://www.airbnb.com/rooms/685006,Best Studio on Prospect Park
1,9461238,"Prime Williamsburg, the hot South side: Steps ...",117.0,57.554158,https://www.airbnb.com/rooms/9461238,1 bedrms Williamsburg 1st Stop L
2,4873690,"Long Island City is the ultimate, in my mind, ...",89.0,2.726522,https://www.airbnb.com/rooms/4873690,"Private room, 10min to Times Square"
3,12738047,,80.0,19.614082,https://www.airbnb.com/rooms/12738047,clean and tidy separated livingroom
4,15359479,,200.0,9.892523,https://www.airbnb.com/rooms/15359479,Awesome One Bedroom Gramercy Park


In [63]:
train.head()

Unnamed: 0,id,neighborhood_overview,neighbourhood_cleansed,city,latitude,longitude
0,685006.0,The apartment is right across the street from ...,Prospect-Lefferts Gardens,Brooklyn,40.661408,-73.96175
1,9461238.0,"Prime Williamsburg, the hot South side: Steps ...",Williamsburg,Brooklyn,40.71632,-73.957255
2,4873690.0,"Long Island City is the ultimate, in my mind, ...",Long Island City,Queens,40.742824,-73.949939
3,14179829.0,"Located on a beautiful, quiet tree lined stree...",Sunnyside,Queens,40.741264,-73.925798
4,7364064.0,This spacious three bedroom is located in the ...,Stuyvesant Town,New York,40.733109,-73.979277


In [75]:
train = pd.read_sql_query("SELECT * FROM location_descriptions", con)

In [77]:
train.columns

Index(['id', 'neighborhood_overview', 'neighbourhood_cleansed', 'city',
       'latitude', 'longitude'],
      dtype='object')

In [78]:
train = pd.read_sql_query("""
                          SELECT id, neighborhood_overview, neighbourhood_cleansed, latitude, longitude,
                          price, diff, listing_url, name FROM listings_price
                          """, con)

nbd_counts = train["neighbourhood_cleansed"].value_counts()
descp = train[["id", "neighborhood_overview"]]
descp = descp.drop_duplicates()

In [87]:
print("loading models")
model = joblib.load(os.path.join(home_folder, 'airbnb_app/Data/tf_idf_model.pkl'))

knn = NearestNeighbors(500, metric = "cosine", algorithm = "brute")
X = descp["neighborhood_overview"].dropna()

#somewhat slow - could save the projections first here:
X_proj = model.transform(X)

#fast since there's no real fittting going on here
#should check how fast predicting is - should be fine for a few values.
knn.fit(X_proj)

loading models


NearestNeighbors(algorithm='brute', leaf_size=30, metric='cosine',
         metric_params=None, n_jobs=1, n_neighbors=500, p=2, radius=1.0)

In [88]:
descp = "jamaican cusine culture gritty"

In [89]:
nbd_score = airbnb_pipeline.get_nbds(descp, knn, model, train, nbd_counts)

In [98]:

def get_colors(n):
    """
    color scales based on the new matplotlib scales with slight modifications
    """

    scales = [["#f2eff1", "#f2eff1", "#451077", "#721F81", "#9F2F7F", "#CD4071",
    "#F1605D",  "#FD9567",  "#FEC98D", "#FCFDBF"],

    ["#f2eff1", "#f2eff1", "#3E4A89", "#31688E", "#26828E", "#1F9E89", "#35B779",
    "#6DCD59", "#B4DE2C", "#FDE725"],

    ["#f2eff1", "#f2eff1", "#4B0C6B", "#781C6D", "#A52C60", "#CF4446",
    "#ED6925", "#FB9A06", "#F7D03C", "#FCFFA4"]]

    return(scales[n-1])

def return_color_scale(n):
    df = pd.Series(get_colors(n))
    df.index = np.power(df.index/10, 1/1.75)
    return df.to_dict()

def locations_of_best_match(new_descp, knn, model, train):
    neighbors = knn.kneighbors(model.transform([new_descp]))
    closest_listings = neighbors[1][0]
    results = train.iloc[closest_listings]
    return results

def get_heat_map(descp, knn, model, train):
    map_osm = folium.Map(tiles='cartodbdark_matter',
                         location = [40.7831, -73.970], zoom_start=13)
    results = locations_of_best_match(descp, knn, model, train)
    temp = results[["latitude", "longitude"]].values.tolist()

    map_osm.add_children(plugins.HeatMap(temp, min_opacity = 0.45,
                                         radius = 30, blur = 30,
                                         gradient = return_color_scale(1),
                                         name = descp))
    
    for i in range(10):
        folium.Marker([results.iloc[i].latitude, results.iloc[i].longitude], 
                      popup=results.iloc[i].neighborhood_overview).add_to(map_osm)

    return map_osm

In [99]:
results = locations_of_best_match(descp, knn, model, train)

In [100]:
results = locations_of_best_match(descp, knn, model, train)

In [101]:
results

Unnamed: 0,id,neighborhood_overview,neighbourhood_cleansed,latitude,longitude,price,diff,listing_url,name
22205,1217046,"The apartment is in the heart of Williamsburg,...",Williamsburg,40.713168,-73.948511,250.0,-47.514673,https://www.airbnb.com/rooms/1217046,Sunny Williamsburg Apt w/ Deck
10736,6437014,Park Slope is considered one of New York City'...,South Slope,40.666316,-73.990132,120.0,17.797299,https://www.airbnb.com/rooms/6437014,Family-Friendly Park Slope Apt 1 BD
11561,15287313,,Upper East Side,40.770456,-73.952237,133.0,31.645662,https://www.airbnb.com/rooms/15287313,Cozy and cool 1BR in perfect UES
11558,15025178,"My neighborhood is up and coming, funky, diver...",Bushwick,40.686089,-73.915714,65.0,4.063596,https://www.airbnb.com/rooms/15025178,Large spacious room!
17904,9512844,Very safe neighborhood - quiet and clean. Grea...,East Harlem,40.792248,-73.944230,345.0,-66.895721,https://www.airbnb.com/rooms/9512844,Decorative 3BR * Sleep 8 *C. Park
14641,11211612,,Williamsburg,40.714995,-73.950001,50.0,38.514264,https://www.airbnb.com/rooms/11211612,Private Room in Williamsburg BK
19238,3463475,The neighborhood is a nightlife hub on weekend...,Lower East Side,40.718118,-73.986186,126.0,62.308755,https://www.airbnb.com/rooms/3463475,Enjoy Local Life in the Best NYC Neighborhood
22036,15513170,,Washington Heights,40.837788,-73.942184,55.0,23.273250,https://www.airbnb.com/rooms/15513170,Uptown Bedroom
16051,13961073,Hell's Kitchen is perfectly located for you to...,Civic Center,40.712502,-74.005241,135.0,-0.567293,https://www.airbnb.com/rooms/13961073,Queen of the Castle
11794,1523556,The neighborhood is unique as its right betwee...,Williamsburg,40.710411,-73.938474,178.0,-115.258575,https://www.airbnb.com/rooms/1523556,Simply brooklyn. Bedroom with office off L train.


In [182]:
descp

'jamaican cusine culture gritty'

In [183]:
map_osm = folium.Map(tiles='cartodbdark_matter',
                         location = [40.7831, -73.970], zoom_start=13)
results = locations_of_best_match(descp, knn, model, train)
temp = results[["latitude", "longitude"]].values.tolist()

map_osm.add_children(plugins.HeatMap(temp, min_opacity = 0.45,
                                     radius = 30, blur = 30,
                                     gradient = return_color_scale(1),
                                     name = descp))

In [121]:

html="""
    <h1> This is a big popup</h1><br>
    With a few lines of code...
    <p>
    <code>
        from numpy import *<br>
        exp(-2*pi)
    </code>
    </p>
    """
iframe = folium.element.IFrame(html=html, width=500, height=300)
popup = folium.Popup(iframe, max_width=2650)

In [155]:
train

Unnamed: 0,id,neighborhood_overview,neighbourhood_cleansed,latitude,longitude,price,diff,listing_url,name
0,685006,The apartment is right across the street from ...,Prospect-Lefferts Gardens,40.661408,-73.961750,75.0,3.206529,https://www.airbnb.com/rooms/685006,Best Studio on Prospect Park
1,9461238,"Prime Williamsburg, the hot South side: Steps ...",Williamsburg,40.716320,-73.957255,117.0,57.554158,https://www.airbnb.com/rooms/9461238,1 bedrms Williamsburg 1st Stop L
2,4873690,"Long Island City is the ultimate, in my mind, ...",Long Island City,40.742824,-73.949939,89.0,2.726522,https://www.airbnb.com/rooms/4873690,"Private room, 10min to Times Square"
3,12738047,,Upper West Side,40.799973,-73.965293,80.0,19.614082,https://www.airbnb.com/rooms/12738047,clean and tidy separated livingroom
4,15359479,,Gramercy,40.735916,-73.986296,200.0,9.892523,https://www.airbnb.com/rooms/15359479,Awesome One Bedroom Gramercy Park
5,14179829,"Located on a beautiful, quiet tree lined stree...",Sunnyside,40.741264,-73.925798,63.0,-26.414698,https://www.airbnb.com/rooms/14179829,"HUGE zen room, queen bed in beautiful Sunnyside"
6,810483,,East Village,40.729934,-73.979829,119.0,-9.921633,https://www.airbnb.com/rooms/810483,"Beautiful, central,sunny, quiet shared apt"
7,9854420,,SoHo,40.727673,-74.003439,200.0,12.590218,https://www.airbnb.com/rooms/9854420,Beautiful one bedroom in Soho
8,4349358,"The apartment is located in a beautiful, safe,...",Upper West Side,40.774744,-73.978863,125.0,-2.453585,https://www.airbnb.com/rooms/4349358,Spacious UWS Apt. by Central Park
9,9330135,,Brighton Beach,40.578082,-73.962530,85.0,46.160573,https://www.airbnb.com/rooms/9330135,Brand new 1 BR apt Steps from train


In [172]:
results

Unnamed: 0,id,neighborhood_overview,neighbourhood_cleansed,latitude,longitude,price,diff,listing_url,name
22205,1217046,"The apartment is in the heart of Williamsburg,...",Williamsburg,40.713168,-73.948511,250.0,-47.514673,https://www.airbnb.com/rooms/1217046,Sunny Williamsburg Apt w/ Deck
10736,6437014,Park Slope is considered one of New York City'...,South Slope,40.666316,-73.990132,120.0,17.797299,https://www.airbnb.com/rooms/6437014,Family-Friendly Park Slope Apt 1 BD
11561,15287313,,Upper East Side,40.770456,-73.952237,133.0,31.645662,https://www.airbnb.com/rooms/15287313,Cozy and cool 1BR in perfect UES
11558,15025178,"My neighborhood is up and coming, funky, diver...",Bushwick,40.686089,-73.915714,65.0,4.063596,https://www.airbnb.com/rooms/15025178,Large spacious room!
17904,9512844,Very safe neighborhood - quiet and clean. Grea...,East Harlem,40.792248,-73.944230,345.0,-66.895721,https://www.airbnb.com/rooms/9512844,Decorative 3BR * Sleep 8 *C. Park
14641,11211612,,Williamsburg,40.714995,-73.950001,50.0,38.514264,https://www.airbnb.com/rooms/11211612,Private Room in Williamsburg BK
19238,3463475,The neighborhood is a nightlife hub on weekend...,Lower East Side,40.718118,-73.986186,126.0,62.308755,https://www.airbnb.com/rooms/3463475,Enjoy Local Life in the Best NYC Neighborhood
22036,15513170,,Washington Heights,40.837788,-73.942184,55.0,23.273250,https://www.airbnb.com/rooms/15513170,Uptown Bedroom
16051,13961073,Hell's Kitchen is perfectly located for you to...,Civic Center,40.712502,-74.005241,135.0,-0.567293,https://www.airbnb.com/rooms/13961073,Queen of the Castle
11794,1523556,The neighborhood is unique as its right betwee...,Williamsburg,40.710411,-73.938474,178.0,-115.258575,https://www.airbnb.com/rooms/1523556,Simply brooklyn. Bedroom with office off L train.


In [179]:
for i in range(10):
    
    html = """<h1> <a href = {0} target="_blank"> View on AirBnB </a> </h1>
              <p> {1} </p>
              
           """.format(results.iloc[i].listing_url ,results.iloc[i]["name"])
    iframe = folium.element.IFrame(html=html, width=500, height=300)
    popup = folium.Popup(iframe, max_width=2650)
    folium.Marker([results.iloc[i].latitude,
                   results.iloc[i].longitude],
                   popup=popup).add_to(map_osm)


In [157]:
results.iloc[10].neighborhood_overview

'The apartment is located in Stuyvestant Heights, a quiet and historic Brooklyn neighborhood, filled with beautiful brownstone buildings. The apartment is only a block away from Saraghina, a delicious pizza place and bakery. There are a ton of other restaurants nearby including Peaches, Georges Andre, Tepache Mexican, and Il Lunatico.'

In [180]:
map_osm