### Importing packages

In [5]:
import pandas as pd
import numpy as np

In [6]:
dc_listings = pd.read_csv('https://raw.githubusercontent.com/rushabh-mehta/Airbnbdataset/master/dc_airbnb.csv')
dc_listings.head()

Unnamed: 0,host_response_rate,host_acceptance_rate,host_listings_count,accommodates,room_type,bedrooms,bathrooms,beds,price,cleaning_fee,security_deposit,minimum_nights,maximum_nights,number_of_reviews,latitude,longitude,city,zipcode,state
0,92%,91%,26,4,Entire home/apt,1.0,1.0,2.0,$160.00,$115.00,$100.00,1,1125,0,38.890046,-77.002808,Washington,20003,DC
1,90%,100%,1,6,Entire home/apt,3.0,3.0,3.0,$350.00,$100.00,,2,30,65,38.880413,-76.990485,Washington,20003,DC
2,90%,100%,2,1,Private room,1.0,2.0,1.0,$50.00,,,2,1125,1,38.955291,-76.986006,Hyattsville,20782,MD
3,100%,,1,2,Private room,1.0,1.0,1.0,$95.00,,,1,1125,0,38.872134,-77.019639,Washington,20024,DC
4,92%,67%,1,4,Entire home/apt,1.0,1.0,1.0,$50.00,$15.00,$450.00,7,1125,0,38.996382,-77.041541,Silver Spring,20910,MD


#### Feature engineering

In [7]:
stripped_commas = dc_listings['price'].str.replace(',', '')
stripped_dollars = stripped_commas.str.replace('$', '')
dc_listings['price'] = stripped_dollars.astype('float')
dc_listings = dc_listings.loc[np.random.permutation(len(df))]

  stripped_dollars = stripped_commas.str.replace('$', '')


In [8]:
dc_listings.head()

Unnamed: 0,host_response_rate,host_acceptance_rate,host_listings_count,accommodates,room_type,bedrooms,bathrooms,beds,price,cleaning_fee,security_deposit,minimum_nights,maximum_nights,number_of_reviews,latitude,longitude,city,zipcode,state
3692,90%,100%,1,1,Private room,1.0,1.0,1.0,109.0,,,1,1125,3,38.8866,-77.001289,Washington,20003,DC
876,,,1,2,Entire home/apt,1.0,1.0,1.0,100.0,$50.00,$400.00,2,90,11,38.911169,-77.054828,Washington,20007,DC
485,60%,78%,1,2,Private room,1.0,1.0,1.0,85.0,,,1,1125,2,38.912154,-77.034291,Washington,20009,DC
2861,100%,78%,6,2,Private room,1.0,1.0,1.0,65.0,,,2,1125,113,38.936741,-77.019947,Washington,20011,DC
3178,,,1,4,Entire home/apt,1.0,1.0,2.0,115.0,$100.00,,3,1125,0,38.919826,-77.035559,Washington,20009,DC


We will write a function that can suggest the optimal price for other values of the _accommodates_ column. The **dc_listings** Dataframe has information specific to our living space, e.g. the _distance_ column. 

The function will use the _k-nearest neighbors_ machine learning technique to calculate the suggested price for any value for _accommodates_. This function should:

 * Take in a single parameter, _new_listing_, that describes the number of bedrooms.
 * Assign _dc_listings_ to a new Dataframe named _temp_df_. We used the **pandas.DataFrame.copy()** method so the underlying dataframe is assigned to _temp_df_, instead of just a reference to dc_listings.
 * Calculate the distance between each value in the _accommodates_ column and the _new_listing_ value that was passed in. Assign the resulting Series object to the _distance_ column in _temp_df_.
 * Sort _temp_df_ by the _distance_ column and select the first 5 values in the _price_ column.
 * Calculate the mean of these 5 values and use that as the return value for the entire _predict_price_ function.


In [12]:
#Function for new price prediction

def predict_price(new_listing):
    temp_df = dc_listings.copy()
    temp_df['distance'] = temp_df['accommodates'].apply(lambda x: np.abs(x - new_listing))
    temp_df = temp_df.sort_values('distance')
    nearest_neighbors = temp_df.iloc[0:5]['price']
    predicted_price = nearest_neighbors.mean()
    return(predicted_price)

acc_one = predict_price(1)
acc_two = predict_price(2)
acc_four = predict_price(4)
print('The price prediction for living space that accommodates 1 person is {}'.format(acc_one))
print('The price prediction for living space that accommodates 2 person is {}'.format(acc_two))
print('The price prediction for living space that accommodates 4 person is {}'.format(acc_four))

The price prediction for living space that accommodates 1 person is 97.0
The price prediction for living space that accommodates 2 person is 101.6
The price prediction for living space that accommodates 4 person is 159.2


In [10]:
dc_listings.head()

Unnamed: 0,host_response_rate,host_acceptance_rate,host_listings_count,accommodates,room_type,bedrooms,bathrooms,beds,price,cleaning_fee,security_deposit,minimum_nights,maximum_nights,number_of_reviews,latitude,longitude,city,zipcode,state
3692,90%,100%,1,1,Private room,1.0,1.0,1.0,109.0,,,1,1125,3,38.8866,-77.001289,Washington,20003,DC
876,,,1,2,Entire home/apt,1.0,1.0,1.0,100.0,$50.00,$400.00,2,90,11,38.911169,-77.054828,Washington,20007,DC
485,60%,78%,1,2,Private room,1.0,1.0,1.0,85.0,,,1,1125,2,38.912154,-77.034291,Washington,20009,DC
2861,100%,78%,6,2,Private room,1.0,1.0,1.0,65.0,,,2,1125,113,38.936741,-77.019947,Washington,20011,DC
3178,,,1,4,Entire home/apt,1.0,1.0,2.0,115.0,$100.00,,3,1125,0,38.919826,-77.035559,Washington,20009,DC


We explored the problem of predicting the optimal price to list an AirBnB rental for based on the price of similar listings on the site. We stepped through the entire machine learning workflow, from selecting a feature to testing the model. 