In [41]:
import pandas as pd
import os.path
import plotly.express as px

import matplotlib.pyplot as plt


# Load the Airbnb dataset (assuming you have it already)
airbnb_data_path = os.path.join(os.path.expanduser('~'), 'Downloads', 'listings.csv')
bnb_df = pd.read_csv(airbnb_data_path)

print('Columns in file:\n', bnb_df.columns)
bnb_df.head()

neighborhood_counts = bnb_df['neighbourhood'].value_counts().reset_index()
neighborhood_counts.columns = ['Neighbourhood', 'Number of Airbnb Locations']

fig = px.bar(neighborhood_counts, x='Neighbourhood', y='Number of Airbnb Locations',
             title="Number of Airbnb Locations per Neighborhood in Amsterdam")
fig.update_xaxes(tickangle=90)
fig.show()


Columns in file:
 Index(['id', 'name', 'host_id', 'host_name', 'neighbourhood_group',
       'neighbourhood', 'latitude', 'longitude', 'room_type', 'price',
       'minimum_nights', 'number_of_reviews', 'last_review',
       'reviews_per_month', 'calculated_host_listings_count',
       'availability_365', 'number_of_reviews_ltm', 'license'],
      dtype='object')


In [42]:
unique_licenses = bnb_df['license'].nunique()

print(f"The number of different licenses issued is: {unique_licenses}")

The number of different licenses issued is: 7288


In [43]:
split_name = bnb_df['name'].str.split('·', expand=True)

split_name.columns = ['Property', 'Rating', 'Bedrooms', 'Beds', 'Baths']

bnb_df = pd.concat([bnb_df, split_name], axis=1)

print(bnb_df)



                      id                                               name  \
0                 761411  Condo in Amsterdam · ★4.74 · 1 bedroom · 1 bed...   
1                 768274  Rental unit in Amsterdam · ★4.83 · 1 bedroom ·...   
2                 768737  Boat in Amsterdam · ★4.82 · 1 bedroom · 1 bed ...   
3                 771217  Houseboat in Amsterdam · ★5.0 · 3 bedrooms · 3...   
4                 771343  Rental unit in Amsterdam · ★4.89 · 1 bedroom ·...   
...                  ...                                                ...   
8381  971403658453358540  Rental unit in Amsterdam · ★New · 1 bedroom · ...   
8382  971600466535392582  Home in Amsterdam · ★New · 3 bedrooms · 3 beds...   
8383  971686543447319533  Rental unit in Amsterdam · ★New · 1 bedroom · ...   
8384  971758168481363169  Hotel in Amsterdam · ★New · 1 bedroom · 1 bed ...   
8385  972218247170836285  Condo in Amsterdam · ★New · 2 bedrooms · 1 bed...   

        host_id host_name  neighbourhood_group     

In [36]:
print(bnb_df['Beds'])

0         1 bed 
1         1 bed 
2         1 bed 
3        3 beds 
4         1 bed 
          ...   
8381      1 bed 
8382     3 beds 
8383      1 bed 
8384      1 bed 
8385      1 bed 
Name: Beds, Length: 8386, dtype: object


In [44]:
bnb_df['Beds'] = bnb_df['Beds'].str.extract('([\d.]+)').astype(float)

# Show the resulting DataFrame
print(bnb_df['Beds'])

0       1.0
1       1.0
2       1.0
3       3.0
4       1.0
       ... 
8381    1.0
8382    3.0
8383    1.0
8384    1.0
8385    1.0
Name: Beds, Length: 8386, dtype: float64


In [45]:
print(bnb_df['Beds'])

0       1.0
1       1.0
2       1.0
3       3.0
4       1.0
       ... 
8381    1.0
8382    3.0
8383    1.0
8384    1.0
8385    1.0
Name: Beds, Length: 8386, dtype: float64


In [46]:
total_beds = bnb_df['Beds'].sum()

# Toon het totaal aantal bedden
print("Totaal aantal bedden:", total_beds)

Totaal aantal bedden: 14713.5


In [47]:
'availability_365'
count_not_equal_365 = len(bnb_df.loc[bnb_df['availability_365'] != 365])

# Toon het aantal
print("Aantal rijen waarbij availability_365 niet gelijk is aan 365:", count_not_equal_365)

Aantal rijen waarbij availability_365 niet gelijk is aan 365: 8350


In [48]:
bnb_df.describe()

Unnamed: 0,id,host_id,neighbourhood_group,latitude,longitude,price,minimum_nights,number_of_reviews,reviews_per_month,calculated_host_listings_count,availability_365,number_of_reviews_ltm,Beds
count,8386.0,8386.0,0.0,8386.0,8386.0,8386.0,8386.0,8386.0,7556.0,8386.0,8386.0,8386.0,8368.0
mean,2.973884e+17,107787800.0,,52.366603,4.889834,254.489864,5.046625,45.437753,1.175696,1.831624,82.825066,10.84808,1.758305
std,3.905819e+17,144831600.0,,0.017191,0.035403,395.468275,34.710395,107.352805,2.253706,2.857336,113.568083,30.815465,1.318676
min,2818.0,3159.0,,52.29034,4.75587,18.0,1.0,0.0,0.01,1.0,0.0,0.0,0.0
25%,18514640.0,10255360.0,,52.35564,4.865501,150.0,2.0,3.0,0.27,1.0,0.0,0.0,1.0
50%,42427610.0,36423180.0,,52.365769,4.88758,207.0,3.0,10.0,0.58,1.0,18.0,3.0,1.0
75%,7.227894e+17,144433400.0,,52.37644,4.908708,300.0,4.0,36.0,1.24,1.0,142.0,8.0,2.0
max,9.722182e+17,534954400.0,,52.425159,5.02643,27857.0,1001.0,3199.0,120.11,27.0,365.0,1689.0,33.0


In [49]:
total_price_per_night = bnb_df['price'].sum()

# Toon het totaal aantal bedden
print("Totale prijs per nacht geheel amsterdam is:", total_price_per_night)

Totale prijs per nacht geheel amsterdam is: 2134152
