In [32]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as st

In [33]:
#reading the csv file
airbnb_data = pd.read_csv('AB_NYC_2019.csv')
#putting the csv file data in a df
airbnb_df = pd.DataFrame(airbnb_data)
airbnb_df.head()
#Creating a column for the minimum stay price per night
airbnb_df['min_price_to_stay'] = airbnb_df['price'] * airbnb_df['minimum_nights']
#cleaning data
airbnb_df = airbnb_df[['id', 'neighbourhood_group','neighbourhood',
 'room_type',
 'price',
 'minimum_nights',
 'min_price_to_stay',
 'number_of_reviews',
 'reviews_per_month',
 'calculated_host_listings_count',
 'availability_365',
 'latitude',
 'longitude']]
#Getting only City Data
bronx_airbnb_df = airbnb_df.loc[airbnb_df['neighbourhood_group'] == 'Bronx']
bronx_airbnb_df.head()

Unnamed: 0,id,neighbourhood_group,neighbourhood,room_type,price,minimum_nights,min_price_to_stay,number_of_reviews,reviews_per_month,calculated_host_listings_count,availability_365,latitude,longitude
171,44096,Bronx,Highbridge,Private room,40,1,40,219,2.04,3,353,40.83232,-73.93184
207,54626,Bronx,Highbridge,Private room,45,1,45,138,1.45,3,323,40.83075,-73.93058
260,63588,Bronx,Clason Point,Private room,90,2,180,0,,7,349,40.81309,-73.85514
261,63610,Bronx,Eastchester,Entire home/apt,105,2,210,38,0.5,13,365,40.88057,-73.83572
309,71812,Bronx,Kingsbridge,Entire home/apt,90,30,2700,4,0.35,2,346,40.87207,-73.90193


In [34]:
# Number of records in the data
bronx_airbnb_df.count()

id                                1091
neighbourhood_group               1091
neighbourhood                     1091
room_type                         1091
price                             1091
minimum_nights                    1091
min_price_to_stay                 1091
number_of_reviews                 1091
reviews_per_month                  876
calculated_host_listings_count    1091
availability_365                  1091
latitude                          1091
longitude                         1091
dtype: int64

In [28]:
#Filling in all NA values with 0 reviews
bronx_airbnb_df.loc[:, 'reviews_per_month'] = bronx_airbnb_df.loc[:, 'reviews_per_month'].fillna(0)
bronx_airbnb_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,min_price_to_stay,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
171,44096,Room with a View,190409,Waundell,Bronx,Highbridge,40.83232,-73.93184,Private room,40,1,40,219,2019-07-04,2.04,3,353
207,54626,Cozy bedroom by Yankee Stadium,190409,Waundell,Bronx,Highbridge,40.83075,-73.93058,Private room,45,1,45,138,2019-06-30,1.45,3,323
260,63588,LL3,295128,Carol Gloria,Bronx,Clason Point,40.81309,-73.85514,Private room,90,2,180,0,,0.0,7,349
261,63610,DOMINIQUE'S NY mini efficiency* wifi*metro*quiet,310670,Vie,Bronx,Eastchester,40.88057,-73.83572,Entire home/apt,105,2,210,38,2019-06-27,0.5,13,365
309,71812,Condo Apartment with laundry in unit,369015,Thai,Bronx,Kingsbridge,40.87207,-73.90193,Entire home/apt,90,30,2700,4,2019-01-02,0.35,2,346


In [50]:
#Creating Bins for prices
price_bins = [0,49.9,99.9,149.9,180.9]
price_bin_names = ['0-49','50-99','100-149','150-180'] #used quartiles to cut out anything higher than 180
#placed prices in bins
bronx_airbnb_df['price range'] = pd.cut(bronx_airbnb_df['price'],price_bins,labels=price_bin_names)

min_nights_bronx_quartiles = bronx_airbnb_df['minimum_nights'].quantile([.25,.5,.75])
min_nights_lowerq = min_nights_bronx_quartiles[.25]
min_nights_upperq = min_nights_bronx_quartiles[.75]
min_nights_iqr = (min_nights_upperq-min_nights_lowerq).round(2)
min_nights_low_b = (min_nights_lowerq - (1.5*min_nights_iqr)).round(2)
print(f'lower bound is {min_nights_low_b}')
min_nights_up_b = (min_nights_upperq + (1.5*min_nights_iqr)).round(2)
print(f'upper bound is {min_nights_up_b}')

price_bronx_quartiles = bronx_airbnb_df['price'].quantile([.25,.5,.75])
price_lowerq = price_bronx_quartiles[.25]
price_upperq = price_bronx_quartiles[.75]
price_iqr = (price_upperq-price_lowerq).round(2)
price_low_b = (price_lowerq - (1.5*price_iqr)).round(2)
print(f'lower bound is {price_low_b}')
price_up_b = (price_upperq + (1.5*price_iqr)).round(2)
print(f'upper bound is {price_up_b}')




lower bound is -2.0
upper bound is 6.0
lower bound is -36.0
upper bound is 180.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bronx_airbnb_df['price range'] = pd.cut(bronx_airbnb_df['price'],price_bins,labels=price_bin_names)


In [55]:
over_180 = bronx_airbnb_df.loc[(bronx_airbnb_df['price']<=180) & (bronx_airbnb_df['minimum_nights']<=6)]
popularity_df=over_180.groupby(['neighbourhood','price range']).agg(['count']).reset_index()
#popularity_df = popularity_df.loc[(popularity_df['id']= )]
popularity_df

Unnamed: 0_level_0,neighbourhood,price range,id,neighbourhood_group,room_type,price,minimum_nights,min_price_to_stay,number_of_reviews,reviews_per_month,calculated_host_listings_count,availability_365,latitude,longitude
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,count,count,count,count,count,count,count,count,count,count,count
0,Allerton,0-49,15,15,15,15,15,15,15,12,15,15,15,15
1,Allerton,50-99,12,12,12,12,12,12,12,12,12,12,12,12
2,Allerton,100-149,11,11,11,11,11,11,11,9,11,11,11,11
3,Allerton,150-180,0,0,0,0,0,0,0,0,0,0,0,0
4,Baychester,0-49,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187,Williamsbridge,150-180,0,0,0,0,0,0,0,0,0,0,0,0
188,Woodlawn,0-49,3,3,3,3,3,3,3,3,3,3,3,3
189,Woodlawn,50-99,8,8,8,8,8,8,8,8,8,8,8,8
190,Woodlawn,100-149,0,0,0,0,0,0,0,0,0,0,0,0


In [44]:
bronx_airbnb_df.loc[:,'price range'] = pd.cut(bronx_airbnb_df['price'],price_bins,labels=price_bin_names)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [None]:
bronx_neighbourhood_by_popularity = []

for row in neighbourhood_by_count.head(25).index:
    if row[0] not in manhattan_neighbourhood_by_popularity:
        manhattan_neighbourhood_by_popularity.append(row[0])

manhattan_neighbourhood_listing_count = []


for n in manhattan_neighbourhood_by_popularity:
    count = len(manhattan_airbnb_df_1000.loc[manhattan_airbnb_df_1000['neighbourhood'] == n])
    manhattan_neighbourhood_listing_count.append(count)

listing_count_by_neighbourhood_df = pd.DataFrame({
    'neighbourhood': manhattan_neighbourhood_by_popularity,
    'count': manhattan_neighbourhood_listing_count
})

listing_count_by_neighbourhood_df = listing_count_by_neighbourhood_df.sort_values(by='count', ascending=False)
listing_count_by_neighbourhood_df

In [None]:
manhattan_neighbourhood_by_popularity = []

for row in neighbourhood_by_count.head(25).index:
    if row[0] not in manhattan_neighbourhood_by_popularity:
        manhattan_neighbourhood_by_popularity.append(row[0])

manhattan_neighbourhood_listing_count = []


for n in manhattan_neighbourhood_by_popularity:
    count = len(manhattan_airbnb_df_1000.loc[manhattan_airbnb_df_1000['neighbourhood'] == n])
    manhattan_neighbourhood_listing_count.append(count)

listing_count_by_neighbourhood_df = pd.DataFrame({
    'neighbourhood': manhattan_neighbourhood_by_popularity,
    'count': manhattan_neighbourhood_listing_count
})

listing_count_by_neighbourhood_df = listing_count_by_neighbourhood_df.sort_values(by='count', ascending=False)
listing_count_by_neighbourhood_df

In [7]:
price_bins = [0,49.9,99.9,149.9,249.9,499.9,10001]
price_bin_names = ['0-49','50-99','100-149','150-249','250-499','>500']
airbnb_df['price range'] = pd.cut(airbnb_df['price'],price_bins,labels=price_bin_names)
neighbourhood_by_price = airbnb_df[['neighbourhood_group','price range','number_of_reviews','minimum_nights','availability_365']].groupby(['neighbourhood_group','price range']).mean()
neighbourhood_by_price

Unnamed: 0_level_0,Unnamed: 1_level_0,number_of_reviews,minimum_nights,availability_365
neighbourhood_group,price range,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bronx,0-49,25.153355,4.690096,151.897764
Bronx,50-99,28.552063,5.084479,168.787819
Bronx,100-149,26.736527,3.91018,169.520958
Bronx,150-249,15.048387,2.774194,154.129032
Bronx,250-499,16.333333,2.366667,255.366667
Bronx,>500,2.444444,2.444444,192.444444
Brooklyn,0-49,16.468248,7.787226,98.220803
Brooklyn,50-99,23.92336,5.472348,94.243286
Brooklyn,100-149,29.251578,6.199126,96.766634
Brooklyn,150-249,27.20118,5.760326,105.750492


In [9]:
bronx_df = airbnb_df.loc[airbnb_df['neighbourhood_group'] == 'Bronx']

bronx_df_room_type = bronx_df[['room_type', 'neighbourhood','price', 'minimum_nights', 'number_of_reviews']].groupby(['room_type','neighbourhood',]).agg(['mean','count', 'min', 'max'])
sorted_bronx_df = bronx_df_room_type.sort_values(by=('price', 'mean'), ascending=False)

# bronx_df.loc[bronx_df['neighbourhood'] == 'Allerton'][['room_type', 'neighbourhood','price', 'minimum_nights']].groupby(['room_type','neighbourhood',]).agg(['mean','count', 'min', 'max'])
sorted_bronx_df = sorted_bronx_df.loc[(sorted_bronx_df[('price', 'mean')] <= 400) & (sorted_bronx_df[('price', 'mean')] >= 100)]

sorted_bronx_df

Unnamed: 0_level_0,Unnamed: 1_level_0,price,price,price,price,minimum_nights,minimum_nights,minimum_nights,minimum_nights,number_of_reviews,number_of_reviews,number_of_reviews,number_of_reviews
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,count,min,max,mean,count,min,max,mean,count,min,max
room_type,neighbourhood,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
Entire home/apt,City Island,235.272727,11,84,1000,1.909091,11,1,3,56.818182,11,0,227
Entire home/apt,Eastchester,233.166667,6,105,475,1.666667,6,1,2,12.0,6,0,38
Entire home/apt,Belmont,228.666667,3,140,299,1.666667,3,1,2,32.0,3,10,71
Entire home/apt,Westchester Square,221.75,4,70,670,1.5,4,1,2,27.25,4,2,67
Entire home/apt,Van Nest,216.4,5,100,425,2.0,5,1,5,34.8,5,0,98
Entire home/apt,Spuyten Duyvil,186.333333,3,79,360,63.333333,3,2,186,20.666667,3,7,47
Entire home/apt,Unionport,183.75,4,85,450,2.0,4,2,2,25.75,4,1,68
Entire home/apt,Clason Point,171.75,8,75,379,2.0,8,1,3,27.5,8,0,95
Entire home/apt,West Farms,165.0,1,165,165,2.0,1,2,2,4.0,1,4,4
Entire home/apt,Hunts Point,150.0,1,150,150,2.0,1,2,2,5.0,1,5,5


In [12]:
sorted_bronx_df.mean()

price              mean     141.112892
                   count      8.868421
                   min       74.605263
                   max      302.552632
minimum_nights     mean       7.009494
                   count      8.868421
                   min        1.157895
                   max       24.368421
number_of_reviews  mean      24.593938
                   count      8.868421
                   min        1.526316
                   max       92.605263
dtype: float64