# Airbnb Rate Analysis 

- Find trends in Airbnb rates in different neighbourhoods in San Francisco

### Importing Dependencies

In [1]:
# Dependencies
import pandas as pd
import numpy as np

### Creating Dataframe from the csv file

In [2]:
#Create a dataframe from the csv file
data = pd.read_csv("Data/Airbnb_listings.csv")
data = data.rename(columns = {"annual rate (USD)": "Average annual rate"})

data.head()

Unnamed: 0,date,neighbourhood,property type,daily rate,Average annual rate
0,2016-10,Seacliff,House,$105.00,38325.0
1,2016-10,Seacliff,House,$300.00,109500.0
2,2016-10,Seacliff,Apartment,$175.00,63875.0
3,2016-10,Seacliff,House,$90.00,32850.0
4,2016-10,Seacliff,Condominium,$400.00,146000.0


### Cleaning Data and Creating new DataFrame

In [3]:
# Remove the month from the date by using the list comprehension
#for each in data['date']:
  #  return each[:4]
data['date'] = [e[:4] for e in data['date']]

data.head()

Unnamed: 0,date,neighbourhood,property type,daily rate,Average annual rate
0,2016,Seacliff,House,$105.00,38325.0
1,2016,Seacliff,House,$300.00,109500.0
2,2016,Seacliff,Apartment,$175.00,63875.0
3,2016,Seacliff,House,$90.00,32850.0
4,2016,Seacliff,Condominium,$400.00,146000.0


In [4]:
# Get the number of listings per year and per neighbourhood
data1 = data.groupby(["date", "neighbourhood"])["daily rate"].nunique()
data1 = data1.reset_index()
data1 = data1.rename(columns = {"daily rate": "No. of listings",
                                "date": "year"})
# Use years as headers
data1 = data1.pivot(index = "neighbourhood", columns = "year")
data1.columns = data1.columns.droplevel(0)
data1 = data1.rename(columns = {"2015": "Listings_2015", 
                                "2016": "Listings_2016", 
                                "2017": "Listings_2017"})
data1 = data1.reset_index()

data1.head()

year,neighbourhood,Listings_2015,Listings_2016,Listings_2017
0,Bayview,84,126,117
1,Bernal Heights,156,191,222
2,Castro/Upper Market,191,246,231
3,Chinatown,78,123,129
4,Crocker Amazon,46,51,52


In [5]:
# Calculate mean annual rental rates based on years and neighbourhoods
data2 = round(data.groupby(["date", "neighbourhood"]).mean(), 2)
data2 = data2.reset_index()
data2 = data2.rename(columns = {"date": "year"})

# Use years as headers
data2 = data2.pivot(index = "neighbourhood", columns = "year")
data2.columns = data2.columns.droplevel(0)
data2 = data2.rename(columns = {"2015": "STR_2015", 
                                "2016": "STR_2016", 
                                "2017": "STR_2017"})
data2 = data2.reset_index()

data2.head()

year,neighbourhood,STR_2015,STR_2016,STR_2017
0,Bayview,46374.21,58118.04,45354.45
1,Bernal Heights,68704.42,77447.15,72724.98
2,Castro/Upper Market,84268.13,97370.54,90733.81
3,Chinatown,91821.37,108057.51,99129.42
4,Crocker Amazon,47207.84,43735.51,35399.72


In [6]:
# Merge the two dataframes based on 
Airbnb_avg = pd.merge(data1, data2, on = "neighbourhood")
Airbnb_avg.head()

year,neighbourhood,Listings_2015,Listings_2016,Listings_2017,STR_2015,STR_2016,STR_2017
0,Bayview,84,126,117,46374.21,58118.04,45354.45
1,Bernal Heights,156,191,222,68704.42,77447.15,72724.98
2,Castro/Upper Market,191,246,231,84268.13,97370.54,90733.81
3,Chinatown,78,123,129,91821.37,108057.51,99129.42
4,Crocker Amazon,46,51,52,47207.84,43735.51,35399.72


In [7]:
# List districts and neighbourhoods
Downtown = ["Chinatown", "Civic Center", "Financial District", "French Quarter", "Mid-Market", "Nob Hill", 
            "North Beach", "Mission Bay", "South of Market", "Telegraph Hill", "Tenderloin", "Union Square", 
            "Downtown","Van Ness/ Civic Center", "Downtown/Civic Center", "Financial District North", 
            "Financial District South", "South Beach"]
North_Downtown = ["Cow Hollow", "Fisherman's Wharf", "Marina", "Pacific Heights", "Presidio", "Russian Hill",
                  "Treasure Island", "Yerba Buena Island", "Treasure Island/YBI", "Presidio Heights", 
                  "Lake --The Presidio", "Yerba Buena", "North Waterfront"]
Outside_Lands = ["Forest Hill", "Ingleside", "Ingleside Terrace", "Ocean View", "Parkside", "Outer Richmond",  
                 "Seacliff", "Sea Cliff", "St. Francis Wood", "Inner Sunset", "West Portal", "Westwood Highlands", 
                 "Westwood Park", "Golden Gate Park", "Inner Richmond","Oceanview", "Outer Sunset", "Balboa Terrace",
                 "Central Richmond", "Lakeshore", "Lake Shore", "Monterey Heights", "Central Sunset", 
                 "Forest Hill Extension", "Golden Gate Heights", "Ingleside Heights", "Pine Lake Park", 
                 "Stonestown", "Lakeside", "Lake", "Inner Parkside", "Merced Heights", "Forest Knolls",
                 "Clarendon Heights", "Merced Manor", "Mount Davidson Manor", "Outer Parkside", "Park North",
                 "Sunnyside"]
Western_Addition = ["Alamo Square", "Anza Vista", "Cathedral Hill", "Cole Valley", "Corona Heights", "Duboce Triangle",
                    "Fillmore", "Haight Ashbury", "Hayes Valley", "Japantown", "Jordan Park/Laurel Heights", 
                    "Laurel Heights", "Lower Haight", "Western Addition", "Haight", "Parnassus/Ashbury Heights",
                    "Lower Pacific Heights", "Lone Mountain", "Panhandle"]
Southern = ["Bayview", "Hunters Point", "Bernal Heights", "Castro", "Croker Amazon", "Crocker Amazon", 
            "Diamond Heights", "Dogpatch", "Eureka Valley", "Excelsior", "Glen Park", "Mission", "Noe Valley", 
            "Mission Dolores","Outer Mission", "Portola", "Potrero Hill", "Visitacion Valley", "Castro/Upper Market", 
            "Upper Market", "Mission Terrace", "Bayview Heights", "Buena Vista", "Buena Vista Park", 
            "Eureka Valley/Dolores Heights", "Inner Mission", "Twin Peaks", "West of Twin Peaks", "Little Hollywood",
            "Midtown Terrace", "Miraloma Park", "Sherwood Forest", "Silver Terrace"]

In [8]:
# Create a new column called District
Airbnb_avg["District"] = np.where(Airbnb_avg["neighbourhood"].isin(Downtown),"Downtown", 
                             (np.where(Airbnb_avg["neighbourhood"].isin(North_Downtown),"North Downtown",
                             (np.where(Airbnb_avg["neighbourhood"].isin(Outside_Lands), "Outside Lands",
                             (np.where(Airbnb_avg["neighbourhood"].isin(Western_Addition), "Western Addition",
                              np.where(Airbnb_avg["neighbourhood"].isin(Southern), "Southern", ""))))))))

Airbnb_avg.head()

year,neighbourhood,Listings_2015,Listings_2016,Listings_2017,STR_2015,STR_2016,STR_2017,District
0,Bayview,84,126,117,46374.21,58118.04,45354.45,Southern
1,Bernal Heights,156,191,222,68704.42,77447.15,72724.98,Southern
2,Castro/Upper Market,191,246,231,84268.13,97370.54,90733.81,Southern
3,Chinatown,78,123,129,91821.37,108057.51,99129.42,Downtown
4,Crocker Amazon,46,51,52,47207.84,43735.51,35399.72,Southern


In [9]:
# Save the dataframe to csv
Airbnb_avg.to_csv('Data/airbnbdataanalysis.csv', index = False)