In [33]:
import pandas as pd
import numpy as np
import geopandas as gpd

import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
# nltk.download('stopwords')
from collections import Counter

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

import pickle

In [2]:
df = pd.read_csv('data/airbnb.csv.zip')

  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
df.shape

(48377, 106)

In [4]:
df[['id','neighborhood_overview','neighbourhood_cleansed']].isnull().sum()

id                            0
neighborhood_overview     16946
neighbourhood_cleansed        0
dtype: int64

In [5]:
print(list(df.columns))

['id', 'listing_url', 'scrape_id', 'last_scraped', 'name', 'summary', 'space', 'description', 'experiences_offered', 'neighborhood_overview', 'notes', 'transit', 'access', 'interaction', 'house_rules', 'thumbnail_url', 'medium_url', 'picture_url', 'xl_picture_url', 'host_id', 'host_url', 'host_name', 'host_since', 'host_location', 'host_about', 'host_response_time', 'host_response_rate', 'host_acceptance_rate', 'host_is_superhost', 'host_thumbnail_url', 'host_picture_url', 'host_neighbourhood', 'host_listings_count', 'host_total_listings_count', 'host_verifications', 'host_has_profile_pic', 'host_identity_verified', 'street', 'neighbourhood', 'neighbourhood_cleansed', 'neighbourhood_group_cleansed', 'city', 'state', 'zipcode', 'market', 'smart_location', 'country_code', 'country', 'latitude', 'longitude', 'is_location_exact', 'property_type', 'room_type', 'accommodates', 'bathrooms', 'bedrooms', 'beds', 'bed_type', 'amenities', 'square_feet', 'price', 'weekly_price', 'monthly_price', '

# Remap Neighborhoods to Unique IDs

In [6]:
neighborhood_map = {
'Allerton': 1,
'Pelham Gardens': 1,
'Arden Heights': 2,
'Astoria': 3,
'Bath Beach': 4,
'Battery Park City': 5,
'Financial District': 5,
'Bay Terrace': 6,
'Bayside': 7,
'Bayswater': 8,
'Far Rockaway': 8,
'Bedford-Stuyvesant': 9,
'Bellerose': 10,
'Belmont': 11,
'Bensonhurst': 12,
'Borough Park': 13,
'Breezy Point': 14,
'Neponsit': 14,
'Belle Harbor': 14,
'Briarwood': 15,
'Jamaica Hills': 15,
'Brighton Beach': 16,
'Brooklyn Heights': 17,
'Cobble Hill': 17,
'Brownsville': 18,
'Bushwick': 19,
'Cambria Heights': 20,
'Canarsie': 21,
'Castle Hills': 22,
'Clason Point': 22,
'Castleton Corners': 23,
'Westerleigh': 23,
'Charleston': 24,
'Pleasant Plains': 24,
'Tottenville': 24,
'Chelsea': 25,
'Flatiron District': 25,
'Chelsea, Staten Island': 26,
'Bloomfield': 26,
"Bull's Head": 26,
'Willowbrook': 26,
'Emerson Hill': 26,
'Chinatown': 27,
'Two Bridges': 27,
'City Island': 28,
'Claremont Village': 29,
'Clinton Hill': 30,
'Co-op City': 31,
'College Point': 32,
'Concord': 33,
'Arrochar': 33,
'Fort Wadsworth': 33,
'Concourse': 34,
'Concourse Village': 35,
'Corona': 36,
'Crown Heights': 37,
'Cypress Hills': 38,
'Ditmars Steinway': 39,
'Dongan Hills': 40,
'South Beach': 40,
'Douglaston': 41,
'Little Neck': 41,
'DUMBO': 42,
'Vinegar Hills': 42,
'Downtown Brooklyn': 42,
'Boerum Hill': 42,
'Dyker Heights': 43,
'East Elmhurst': 44,
'East Flatbush': 45,
'East Harlem': 46,
'East Morrisania': 47,
'East New York': 48,
'East Village': 49,
'Eastchester': 50,
'Baychester': 50,
'Edenwald': 50,
'Elmhurst': 51,
'Eltingville': 52,
'Great Kills': 52,
'Bay Terrace, Staten Island': 52,
'Flatbush': 53,
'Flatlands': 54,
'Flushing': 55,
'Fordham': 56,
'Forest Hills': 57,
'Fort Greene': 58,
'Fort Hamilton': 59,
'Bay Ridge': 59,
'Fresh Meadows': 60,
'Glen Oaks': 61,
'Glendale': 62,
'Gowanus': 63,
'Park Slope': 63,
'South Slope': 63,
'Gramercy': 64,
'Gravesend': 65,
'Greenpoint': 66,
'Grymes Hills': 67,
'Tompkinsville': 67,
'Harlem': 68,
"Hell's Kitchen": 69,
'Highbridge': 70,
'Mount Eden': 70,
'Hollis': 71,
'Hollis Hills': 72,
'Howard Beach': 73,
'Howland Hook': 74,
'Port Ivory': 74,
'Mariners Harbor': 74,
'Graniteville': 74,
'Hunts Point': 75,
'Jackson Heights': 76,
'Jamaica': 77,
'Jamaica Estates': 78,
'Holliswood': 78,
'Kensington': 79,
'Kew Gardens': 80,
'Kew Gardens Hills': 81,
'Kingsbridge': 82,
'Spuyten Duyvil': 82,
'Kips Bay': 83,
'Murray Hill': 83,
'Laurelton': 84,
'Long Island City': 85,
'Longwood': 86,
'Lower East Side': 87,
'Alphabet City': 87,
'Manhattan Beach': 88,
'Sheepshead Bay': 88,
'Gerritsen Beach': 88,
'Marble Hill': 89,
'Inwood': 89,
'Melrose': 90,
'Middle Village': 91,
'Midtown': 92,
'Theater District': 92,
'Midwood': 93,
'Mill Basin': 94,
'Bergen Beach': 94,
'Morningside Heights': 95,
'Morrisania': 96,
'Mott Haven': 97,
'Port Morris': 97,
'Mount Hope': 98,
'Navy Yard': 99,
'New Dorp': 100,
'New Dorp Beach': 100,
'Grant City': 100,
'Midland Beach': 100,
'New Springville': 101,
'Lighthouse Hill': 101,
'Todt Hill': 101,
'North Riverdale': 102,
'Fieldston': 102,
'Riverdale': 102,
'Norwood': 103,
'Ozone Park': 104,
'Parkchester': 105,
'Pelham Bay': 106,
'Van Nest': 106,
'Bronxdale': 106,
'Morris Park': 106,
'Port Richmond': 107,
"Prince's Bay": 107,
'Huguenot': 108,
'Prospect Heights': 109,
'Prospect-Lefferts Gardens': 110,
'Queens Village': 111,
'Randall Manor': 112,
'New Brighton': 112,
'St. George': 112,
'Red Hook': 113,
'Columbia St': 113,
'Carroll Gardens': 113,
'Rego Park': 114,
'Richmond Hill': 115,
'Richmondtown': 116,
'Oakwood': 116,
'Ridgewood': 117,
'Rockaway Beach': 118,
'Arverne': 118,
'Edgemere': 118,
'Rosedale': 119,
'Rossville': 120,
'Woodrow': 120,
'Sea Gate': 121,
'Coney Island': 121,
'Soundview': 122,
'South Ozone Park': 123,
'Springfield Gardens': 124,
'St. Albans': 125,
'Stapleton': 126,
'Clifton': 126,
'Rosebank': 126,
'Shore Acres': 126,
'Stuyvesant Town': 127,
'Sunnyside': 128,
'Maspeth': 128,
'Sunset Park': 129,
'Throgs Neck': 130,
'Schuylerville': 130,
'Country Club': 130,
'Tremont': 131,
'West Farms': 131,
'Tribeca': 132,
'Civic Center': 132,
'Soho': 132,
'Little Italy': 132,
'Nolita': 132,
'Unionport': 133,
'University Heights': 134,
'Morris Heights': 134,
'Upper East Side': 135,
'Roosevelt Island': 135,
'Upper West Side': 136,
'Wakefield': 137,
'Woodlawn': 137,
'Washington Heights': 138,
'West Brighton': 139,
'Silver Lake': 139,
'West Village': 140,
'Greenwich Village': 140,
'Noho': 140,
'Westchester Square': 141,
'Whitestone': 142,
'Williamsbridge': 143,
'Olinville': 143,
'Williamsburg': 144,
'Windsor Terrace': 145,
'Woodhaven': 146,
'Woodside': 147
}

In [7]:
df['neighborhood_id'] = df['neighbourhood_cleansed'].map(neighborhood_map)
df.groupby('neighborhood_id').count()['id']

neighborhood_id
1.0        77
2.0         5
3.0       919
4.0        27
5.0       832
6.0         6
7.0        41
8.0        51
9.0      3785
10.0       11
11.0       27
12.0       80
13.0      132
14.0       14
15.0       60
16.0       80
17.0      241
18.0       65
19.0     2411
20.0       30
21.0      150
22.0       20
23.0        7
24.0        8
25.0     1162
26.0        8
27.0      433
28.0       15
29.0       29
30.0      561
         ... 
118.0     169
119.0      63
120.0       2
121.0      23
122.0      16
123.0      40
124.0      92
125.0      72
126.0      61
127.0      36
128.0     490
129.0     407
130.0      40
131.0      13
132.0     601
133.0      11
134.0      40
135.0    1830
136.0    1903
137.0      63
138.0     928
139.0      22
140.0    1122
141.0      10
142.0      10
143.0      47
144.0    3815
145.0     151
146.0      93
147.0     246
Name: id, Length: 145, dtype: int64

# Pull Quantitative Features

In [8]:
df2 = df[['id','neighborhood_id','neighbourhood_cleansed','accommodates','price']]
df2['price_norm'] = df2.price.str.replace('[\$\,\.]', '').astype(int)/100/df2['accommodates']
df2.drop(['accommodates','price'],axis=1,inplace=True)
neighborhood_quant = df2.groupby('neighborhood_id').agg({'neighbourhood_cleansed':'first','id':'count','price_norm':'mean'}).reset_index()
neighborhood_quant.sort_values(by='price_norm',ascending=False)
neighborhood_quant.columns=['neighborhood_id','neighborhood','num_listings','price_norm']
neighborhood_quant.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


Unnamed: 0,neighborhood_id,neighborhood,num_listings,price_norm
0,1.0,Allerton,77,31.00637
1,2.0,Arden Heights,5,28.17
2,3.0,Astoria,919,59.324403
3,4.0,Bath Beach,27,40.018519
4,5.0,Financial District,832,84.430094


# Download Words (for tf-idf Vectorizer)

In [9]:
words_df = pd.DataFrame(columns=['neighborhood_id','descriptions'])
for neighborhood_id in range(1,148):
    descs = df[df['neighborhood_id']== neighborhood_id].dropna(subset=['neighborhood_overview']).neighborhood_overview.str.cat(sep=' ')
    words_df = words_df.append({'neighborhood_id': neighborhood_id, 
                               'descriptions': descs}, ignore_index=True)
words_df.to_csv('data/airbnb_words.csv',index=False)

# Generate Stopwords

In [10]:
neighborhoods = df.neighbourhood_cleansed.unique()
neighborhood_words = [words for segments in neighborhoods for words in str(segments).lower().split()]
print(neighborhood_words)

['harlem', 'clinton', 'hill', 'east', 'harlem', 'murray', 'hill', 'bedford-stuyvesant', "hell's", 'kitchen', 'upper', 'west', 'side', 'east', 'village', 'chinatown', 'south', 'slope', 'west', 'village', 'williamsburg', 'fort', 'greene', 'chelsea', 'crown', 'heights', 'park', 'slope', 'windsor', 'terrace', 'inwood', 'greenpoint', 'bushwick', 'flatbush', 'lower', 'east', 'side', 'prospect-lefferts', 'gardens', 'long', 'island', 'city', 'kips', 'bay', 'soho', 'upper', 'east', 'side', 'red', 'hook', 'prospect', 'heights', 'washington', 'heights', 'woodside', 'brooklyn', 'heights', 'carroll', 'gardens', 'gowanus', 'flatlands', 'cobble', 'hill', 'flushing', 'boerum', 'hill', 'sunnyside', 'dumbo', 'st.', 'george', 'highbridge', 'financial', 'district', 'noho', 'ridgewood', 'morningside', 'heights', 'jamaica', 'middle', 'village', 'ditmars', 'steinway', 'flatiron', 'district', 'roosevelt', 'island', 'midtown', 'little', 'italy', 'tompkinsville', 'gramercy', 'astoria', 'kensington', 'clason', '

In [11]:
sw = stopwords.words("english")
sw.extend(['rockaway','empire','state','new','york','city','nyc','neighborhoods','neighborhood','one',
           'walk','away','blocks','block','area','apartment','around','minutes','public','many',
           'street','streets','right','best','distance','located','also','building','close','within','minute',
           'avenue','bed','near','fidi','please','hidden','number','could','go','lot','set','upon','departure',
           'stuy', 'mi', 'hell','etc', 'grand', 'central', 'take', 'ave','ditmas','attractions',
           'plenty','th', 'location', 'st', 'airbnb', 'far', 'min', 'mins', 'everything', 'get','like','even',
           'along','dozen','would','return','hudson','pratt','yankees','yankee','several','atmosphere','https','www','com',
           'lots', 'nearby', 'find', 'it’s', 'place', 'lined','predominantly','easy','access',"you're",'two',
           'ny', 'high', 'miles', 'line','highlights','tompkins','highline','rockefeller','united','nations','bedford',
           'manhattan','brooklyn','queens','bronx','staten','island','times','broadway','blvd','hell’s','madison',
           'whole','foods','trader','joes','jfk','newark','intl','across','including','need','day','full','stop',
           'much','feel','tons','union','next','see','part','want','hour','less','major','top','website','still','offer',
           'offers','areas','really','years','spot','make','way','coming','check','things','always','end',
           'nd','lincoln','starbucks','back','surrounded','main','yet','excellent','quite','come'
           'a','b','c','d','e','f','g','l','m','n','q','r','s','w','1','2','3','4','5','6','7'])
sw.extend(neighborhood_words)

In [12]:
with open("stopwords.txt", "wb") as fp: 
      pickle.dump(sw, fp)

# Generate Neighborhood Datasets
Features are words that appear in >1% of all entries

In [13]:
def clean(text):
  for c in ",.!-/:;()*&[]_~`+=\|'?<>^%$#@•–0123456789":
    text = text.replace(c, " ")
  return text.lower()

In [14]:
all_words = clean(df.neighborhood_overview.str.cat(sep=' '))
all_top = pd.DataFrame(Counter(all_words.split()).most_common(), columns=['word','freq'])
all_top = all_top[~all_top.word.isin(sw)]

In [15]:
threshold = len(df.neighborhood_overview)*0.01

ny_top_words = all_top[all_top.freq > threshold].word.tolist()
print(ny_top_words)
print(len(ny_top_words))

['restaurants', 'bars', 'shops', 'walking', 'food', 'subway', 'coffee', 'train', 'stores', 'shopping', 'museum', 'safe', 'quiet', 'beautiful', 'cafes', 'well', 'grocery', 'home', 'art', 'corner', 'music', 'local', 'amazing', 'bar', 'restaurant', 'store', 'river', 'parks', 'station', 'short', 'historic', 'ride', 'live', 'people', 'night', 'world', 'diverse', 'friendly', 'heart', 'famous', 'places', 'market', 'house', 'community', 'love', 'residential', 'enjoy', 'steps', 'known', 'shop', 'bus', 'every', 'garden', 'vibrant', 'time', 'nightlife', 'bridge', 'favorite', 'transportation', 'galleries', 'good', 'nice', 'convenient', 'hip', 'pizza', 'trendy', 'tree', 'options', 'culture', 'fun', 'italian', 'trains', 'cafe', 'supermarkets', 'supermarket', 'dining', 'cool', 'family', 'spots', 'young', 'old', 'museums', 'boutiques', 'perfect', 'super', 'views', 'open', 'experience', 'stay', 'unique', 'clubs', 'history', 'life', 'popular', 'cultural', 'small', 'door', 'families', 'lines', 'brownston

In [16]:
nb_maps = {}
for neighborhood_id in range(1,148):
  words = df[df['neighborhood_id']==neighborhood_id].dropna(subset=['neighborhood_overview'])
  words_str = clean(words.neighborhood_overview.str.cat(sep=' '))
  words_top = pd.DataFrame(Counter(words_str.split()).most_common(), columns=['word','freq'])
  words_cleaned = words_top[~words_top.word.isin(sw)]
  words_cleaned['freq_norm'] = words_cleaned.freq/len(words)
  print(neighborhood_id,words_cleaned.word[:10].tolist())
  nb_maps[neighborhood_id] = words_cleaned

1 ['zoo', 'quiet', 'restaurants', 'safe', 'parks', 'mall', 'bus', 'walking', 'botanical', 'plaza']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


2 ['friendly', 'restaurants', 'parks', 'shopping', 'quiet', 'family', 'liberty', 'safest', 'entire', 'beautiful']
3 ['restaurants', 'bars', 'food', 'greek', 'museum', 'safe', 'shops', 'stores', 'walking', 'diverse']
4 ['restaurants', 'verrazano', 'bridge', 'good', 'water', 'path', 'residential', 'cultures', 'quiet', 'safe']
5 ['walkability', 'score', 'bar', 'tavern', 'grocery', 'dead', 'rabbit', 'live', 'grog', 'coffee']
6 ['shopping', 'museum', 'totten', 'large', 'book', 'parks', 'sound', 'marina', 'bike', 'boat']
7 ['restaurants', 'safe', 'quiet', 'walking', 'bell', 'bank', 'shopping', 'express', 'bars', 'path']
8 ['phone', 'boardwalk', 'walking', 'rockaways', 'transportation', 'beaches', 'shopping', 'airport', 'beautiful', 'available']
9 ['restaurants', 'bars', 'coffee', 'shops', 'historic', 'cafes', 'walking', 'train', 'brownstones', 'beautiful']
10 ['quiet', 'tree', 'restaurants', 'bars', 'parkway', 'family', 'field', 'mile', 'open', 'shops']
11 ['botanical', 'garden', 'zoo', 'art

83 ['restaurants', 'walking', 'bars', 'station', 'safe', 'quiet', 'subway', 'residential', 'score', 'walkability']
84 ['safe', 'quiet', 'beautiful', 'home', 'bus', 'food', 'mall', 'ride', 'community', 'another']
85 ['restaurants', 'bars', 'subway', 'museum', 'safe', 'train', 'food', 'station', 'walking', 'moma']
86 ['train', 'shopping', 'food', 'stadium', 'restaurants', 'store', 'ride', 'mexican', 'house', 'chinese']
87 ['restaurants', 'bars', 'galleries', 'shops', 'les', 'bridge', 'walking', 'art', 'museum', 'river']
88 ['restaurants', 'train', 'safe', 'walking', 'subway', 'stores', 'quiet', 'shopping', 'russian', 'shops']
89 ['restaurants', 'cloisters', 'parks', 'tryon', 'river', 'bars', 'museum', 'beautiful', 'train', 'stores']
90 ['restaurants', 'documentary', 'couple', 'cheap', 'growing', 'stores', 'bars', 'friendly', 'people', 'time']
91 ['beautiful', 'mall', 'restaurants', 'safe', 'quiet', 'walking', 'short', 'food', 'parks', 'house']
92 ['restaurants', 'shopping', 'station', 'h

In [17]:
def get_freq(word):
  test = pd.DataFrame(columns=['neighborhood','freq','word'])
  for key in nb_maps.keys():
    words = nb_maps[key]
    if len(words.loc[words['word']==word]['freq_norm'])>0:
      test = test.append({'word':word,
                          'neighborhood':key,
                          'freq':words.loc[words['word']==word]['freq_norm'].item()
                         },ignore_index=True)
  return test.sort_values(by='freq',ascending=False)

In [18]:
all_nbs = list(nb_maps.keys())
clrs = sns.color_palette('hls', n_colors=len(all_nbs)) 
colors = {}
for i in range(len(all_nbs)):
  colors[all_nbs[i]] = clrs[i]

In [19]:
all_words = pd.DataFrame(columns=['neighborhood','freq','word'])
for word in ny_top_words:
  freqs = get_freq(word)
#   freqs.plot(x='neighborhood',y='freq',title=word,kind='bar',legend=False,
#              color=freqs['neighborhood'].apply(lambda x: colors[x]))
#   plt.savefig('images/barchart_'+word+'.png',bbox_inches = "tight")
  all_words = all_words.append(freqs)

In [20]:
neighborhood_words = pd.pivot_table(all_words,values=['freq'],index='neighborhood',columns=['word'],fill_value=0).reset_index()
neighborhood_words.columns = neighborhood_words.columns.droplevel()
neighborhood_words.rename(columns={'':'neighborhood_id'},inplace=True)
neighborhood_words.head()

word,neighborhood_id,accessible,airport,amazing,american,anywhere,architecture,art,artists,authentic,...,vintage,visit,walking,waterfront,well,wine,wonderful,world,young,zoo
0,1,0.035714,0.017857,0.017857,0.0,0.017857,0.0,0.0,0.0,0.0,...,0.0,0.017857,0.25,0.0,0.232143,0.0,0.0,0.035714,0.0,0.428571
1,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3,0.008475,0.042373,0.110169,0.037288,0.008475,0.00339,0.020339,0.011864,0.032203,...,0.0,0.028814,0.177966,0.013559,0.147458,0.018644,0.057627,0.1,0.076271,0.00339
3,4,0.052632,0.052632,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.105263,0.0,0.0,0.0,0.0,0.052632,0.0,0.0
4,5,0.003311,0.029801,0.024834,0.048013,0.006623,0.019868,0.001656,0.0,0.02649,...,0.028146,0.019868,0.087748,0.02649,0.092715,0.009934,0.008278,0.240066,0.004967,0.0


# Merge Features

In [40]:
merged = pd.merge(neighborhood_words,neighborhood_quant)
merged.head()

Unnamed: 0,neighborhood_id,accessible,airport,amazing,american,anywhere,architecture,art,artists,authentic,...,waterfront,well,wine,wonderful,world,young,zoo,neighborhood,num_listings,price_norm
0,1,0.035714,0.017857,0.017857,0.0,0.017857,0.0,0.0,0.0,0.0,...,0.0,0.232143,0.0,0.0,0.035714,0.0,0.428571,Allerton,77,31.00637
1,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Arden Heights,5,28.17
2,3,0.008475,0.042373,0.110169,0.037288,0.008475,0.00339,0.020339,0.011864,0.032203,...,0.013559,0.147458,0.018644,0.057627,0.1,0.076271,0.00339,Astoria,919,59.324403
3,4,0.052632,0.052632,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,Bath Beach,27,40.018519
4,5,0.003311,0.029801,0.024834,0.048013,0.006623,0.019868,0.001656,0.0,0.02649,...,0.02649,0.092715,0.009934,0.008278,0.240066,0.004967,0.0,Financial District,832,84.430094


In [42]:
merged.columns

Index(['neighborhood_id', 'accessible', 'airport', 'amazing', 'american',
       'anywhere', 'architecture', 'art', 'artists', 'authentic',
       ...
       'waterfront', 'well', 'wine', 'wonderful', 'world', 'young', 'zoo',
       'neighborhood', 'num_listings', 'price_norm'],
      dtype='object', length=218)

In [43]:
merged.to_csv('cleaned_data/airbnb.csv',index=False)