In [1]:
import pandas as pd

## Top Amenities per Country
#### Goals for this step

- Find out which are the most common amenities for the higher review scores per country


In [2]:
df_countries = pd.read_pickle("C:/Users/Admin/Documents/ironhack/AirBnB_data/airbnb_listings_countries.pkl")

In [3]:
df_amenities = pd.read_pickle("C:/Users/Admin/Documents/ironhack/AirBnB_data/airbnb_amenities.pkl")

In [4]:
df_amenities["country"] = df_countries["Country"]

In [41]:
# df_amenities.to_pickle("C:/Users/Admin/Documents/ironhack/AirBnB_data/airbnb_amenities_countries.pkl")

In [6]:
df_amenities.head()

amenity,wireless internet,kitchen,heating,essentials,washer,tv,smoke detector,internet,hangers,shampoo,...,accessible-height toilet,handheld shower head,fireplace guards,baby monitor,hot water kettle,wide clearance to shower & toilet,firm mattress,review_score,price,country
0,1,0,1,1,0,1,0,1,0,1,...,0,0,0,0,0,0,0,90.0,74.0,Italy
1,1,1,1,1,0,0,0,1,1,0,...,0,0,0,0,0,0,0,87.0,55.0,Austria
2,1,1,1,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,100.0,993.0,Denmark
3,1,1,1,1,1,1,1,1,1,1,...,0,0,0,0,0,0,0,100.0,697.0,Denmark
4,1,1,1,1,0,0,1,1,0,1,...,0,0,0,0,0,0,0,94.0,424.0,Denmark


In [7]:
df_amenities["review_score"].describe()

count    380224.000000
mean         93.088024
std           8.373247
min          20.000000
25%          90.000000
50%          95.000000
75%         100.000000
max         100.000000
Name: review_score, dtype: float64

We will first try to look at Italy as an example.

In [8]:
df_italy = df_amenities[df_amenities["country"] == "Italy"].copy().reset_index(drop=True)

In [9]:
df_italy["review_score"].describe()

count    24336.000000
mean        91.827046
std          8.699237
min         20.000000
25%         89.000000
50%         94.000000
75%         98.000000
max        100.000000
Name: review_score, dtype: float64

We will only look at the amenities for listings with review scores equal or higher than 98.

In [10]:
df_italy_top = df_italy[df_italy["review_score"] >= 98].copy().reset_index(drop=True)

We will drop the columns that aren't needed and get a sorted list by amenities with the highest counts.

In [11]:
df_italy_top.drop(columns=["country", "review_score", "price"]).sum().sort_values(ascending=False).head(30)


amenity
heating                      6145
wireless internet            5918
essentials                   5646
tv                           5415
kitchen                      5162
family/kid friendly          4883
hair dryer                   4635
air conditioning             4540
shampoo                      4329
hangers                      4294
washer                       4020
iron                         3617
laptop friendly workspace    3579
internet                     3390
elevator in building         3259
first aid kit                3185
fire extinguisher            2733
breakfast                    1762
safety card                  1655
lock on bedroom door         1284
24-hour check-in             1272
pets allowed                 1221
free parking on premises     1094
buzzer/wireless intercom     1052
smoke detector               1045
cable tv                     1036
doorman                       918
carbon monoxide detector      861
smoking allowed               827
dryer 

We will then create a list for Italy's top 30 amenities for listings with review scores higher than 98.

In [12]:
italy_top_amenities = list(df_italy_top.drop(columns=["country", "review_score", "price"]).sum()
                           .sort_values(ascending=False).head(30).index)

In [13]:
italy_top_amenities[0:10]

['heating',
 'wireless internet',
 'essentials',
 'tv',
 'kitchen',
 'family/kid friendly',
 'hair dryer',
 'air conditioning',
 'shampoo',
 'hangers']

Now that the example worked, we will do a function that will do this for any country selected.

In [14]:
def amenities_per_country(df, country, no=10):
    df = df[df["country"] == country].copy().reset_index(drop=True)
    df = df[df["review_score"] >= 98].copy().reset_index(drop=True)
    top_amenities = list(df.drop(columns=["country", "review_score", "price"])
                         .sum().sort_values(ascending=False).head(no).index)
    return top_amenities
        

In [15]:
countries = list(set(df_amenities["country"]))

In [16]:
countries.sort()

In [17]:
countries

['Australia',
 'Austria',
 'Belgium',
 'Canada',
 'Denmark',
 'France',
 'Germany',
 'Greece',
 'Hong Kong',
 'Ireland',
 'Italy',
 'Netherlands',
 'Spain',
 'Switzerland',
 'United Kingdom',
 'United States']

In [18]:
amenities_per_country(df_amenities, "Australia", 10)

['kitchen',
 'wireless internet',
 'essentials',
 'washer',
 'smoke detector',
 'tv',
 'heating',
 'hangers',
 'iron',
 'shampoo']

As stated on the Top Amenities notebook, there are basic amenities that each listing should have, so we would also try the function using the data frame that has the "extra" amenities in order to see what else could be added.

In [19]:
df_extra_amenities = pd.read_pickle("C:/Users/Admin/Documents/ironhack/AirBnB_data/airbnb_amenities_extra.pkl")

In [20]:
df_extra_amenities["country"] = df_countries["Country"]

In [39]:
# df_extra_amenities.to_pickle("C:/Users/Admin/Documents/ironhack/AirBnB_data/airbnb_amenities_extra_countries.pkl")

In [22]:
countries_top_amenities = {}

for i in countries:
    countries_top_amenities[i] = amenities_per_country(df_extra_amenities, i, 10)

In [23]:
countries_top_amenities["Australia"]

['kitchen',
 'washer',
 'family/kid friendly',
 'dryer',
 'buzzer/wireless intercom',
 'free parking on premises',
 'cable tv',
 '24-hour check-in',
 'elevator in building',
 'safety card']

In [24]:
countries_top_amenities["Austria"]

['kitchen',
 'washer',
 'family/kid friendly',
 'dryer',
 'elevator in building',
 'free parking on premises',
 'cable tv',
 '24-hour check-in',
 'buzzer/wireless intercom',
 'pool']

In [25]:
countries_top_amenities["Belgium"]

['kitchen',
 'washer',
 'dryer',
 'family/kid friendly',
 'free parking on premises',
 'cable tv',
 '24-hour check-in',
 'buzzer/wireless intercom',
 'elevator in building',
 'safety card']

In [26]:
countries_top_amenities["Canada"]

['kitchen',
 'washer',
 'dryer',
 'family/kid friendly',
 'cable tv',
 'free parking on premises',
 '24-hour check-in',
 'buzzer/wireless intercom',
 'safety card',
 'pets live on this property']

In [27]:
countries_top_amenities["Denmark"]

['kitchen',
 'washer',
 'dryer',
 'family/kid friendly',
 'free parking on premises',
 'cable tv',
 '24-hour check-in',
 'self check-in',
 'safety card',
 'pets live on this property']

In [28]:
countries_top_amenities["France"]

['kitchen',
 'washer',
 'family/kid friendly',
 'dryer',
 'cable tv',
 'free parking on premises',
 '24-hour check-in',
 'buzzer/wireless intercom',
 'smoking allowed',
 'elevator in building']

In [29]:
countries_top_amenities["Germany"]

['kitchen',
 'washer',
 'dryer',
 'family/kid friendly',
 'free parking on premises',
 'cable tv',
 '24-hour check-in',
 'buzzer/wireless intercom',
 'safety card',
 'self check-in']

In [30]:
countries_top_amenities["Greece"]

['kitchen',
 'washer',
 'elevator in building',
 'family/kid friendly',
 'buzzer/wireless intercom',
 'dryer',
 'smoking allowed',
 '24-hour check-in',
 'cable tv',
 'pets allowed']

In [31]:
countries_top_amenities["Hong Kong"]

['kitchen',
 'washer',
 'dryer',
 'family/kid friendly',
 'elevator in building',
 'free parking on premises',
 'buzzer/wireless intercom',
 '24-hour check-in',
 'cable tv',
 'pool']

In [32]:
countries_top_amenities["Ireland"]

['kitchen',
 'washer',
 'dryer',
 'free parking on premises',
 'family/kid friendly',
 'cable tv',
 '24-hour check-in',
 'pets live on this property',
 'pool',
 'safety card']

In [33]:
countries_top_amenities["Italy"]

['kitchen',
 'washer',
 'family/kid friendly',
 'dryer',
 'buzzer/wireless intercom',
 'cable tv',
 'elevator in building',
 'free parking on premises',
 '24-hour check-in',
 'safety card']

In [34]:
countries_top_amenities["Netherlands"]

['kitchen',
 'washer',
 'family/kid friendly',
 'dryer',
 'cable tv',
 'free parking on premises',
 '24-hour check-in',
 'buzzer/wireless intercom',
 'safety card',
 'elevator in building']

In [35]:
countries_top_amenities["Spain"]

['kitchen',
 'washer',
 'dryer',
 'family/kid friendly',
 'free parking on premises',
 'cable tv',
 'buzzer/wireless intercom',
 '24-hour check-in',
 'safety card',
 'elevator in building']

In [36]:
countries_top_amenities["Switzerland"]

['kitchen',
 'washer',
 'dryer',
 'elevator',
 'buzzer/wireless intercom',
 'family/kid friendly',
 'cable tv',
 '24-hour check-in',
 'doorman',
 'safety card']

In [37]:
countries_top_amenities["United Kingdom"]

['kitchen',
 'washer',
 'family/kid friendly',
 'elevator in building',
 'dryer',
 'buzzer/wireless intercom',
 'cable tv',
 '24-hour check-in',
 'free parking on premises',
 'safety card']

In [38]:
countries_top_amenities["United States"]

['kitchen',
 'washer',
 'family/kid friendly',
 'dryer',
 'free parking on premises',
 'cable tv',
 'buzzer/wireless intercom',
 '24-hour check-in',
 'elevator in building',
 'safety card']

## Conclusions

- All countries seem to have the same top amenities.
- Austria, Ireland and Hong Kong have pool as a top amenity.
- Canada, Denmark and Ireland have pets living on the property, and Greece allows pets.
- The top amenities for each city include:
    1. Kitchen
    2. Washer
    3. Dryer
    4. Family/kid friendly
    6. Cable TV
    7. Buzzer/wireless intercom