In [1]:
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

# Listings data 
Seem to be most informative, so let's start with reading and preliminary analysis

In [3]:
listings_df = pd.read_csv('data/listings.csv')
listings_df.head(3)

Unnamed: 0,id,listing_url,scrape_id,last_scraped,name,summary,space,description,experiences_offered,neighborhood_overview,...,review_scores_value,requires_license,license,jurisdiction_names,instant_bookable,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,calculated_host_listings_count,reviews_per_month
0,241032,https://www.airbnb.com/rooms/241032,20160104002432,2016-01-04,Stylish Queen Anne Apartment,,Make your self at home in this charming one-be...,Make your self at home in this charming one-be...,none,,...,10.0,f,,WASHINGTON,f,moderate,f,f,2,4.07
1,953595,https://www.airbnb.com/rooms/953595,20160104002432,2016-01-04,Bright & Airy Queen Anne Apartment,Chemically sensitive? We've removed the irrita...,"Beautiful, hypoallergenic apartment in an extr...",Chemically sensitive? We've removed the irrita...,none,"Queen Anne is a wonderful, truly functional vi...",...,10.0,f,,WASHINGTON,f,strict,t,t,6,1.48
2,3308979,https://www.airbnb.com/rooms/3308979,20160104002432,2016-01-04,New Modern House-Amazing water view,New modern house built in 2013. Spectacular s...,"Our house is modern, light and fresh with a wa...",New modern house built in 2013. Spectacular s...,none,Upper Queen Anne is a charming neighborhood fu...,...,10.0,f,,WASHINGTON,f,strict,f,f,2,1.15


In [4]:
# Number of rows 
n_rows = listings_df.shape[0]
n_rows

3818

In [5]:
# Get info about columns. Those with number of non-null object less than n_rows contain NaN values  
listings_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3818 entries, 0 to 3817
Data columns (total 92 columns):
id                                  3818 non-null int64
listing_url                         3818 non-null object
scrape_id                           3818 non-null int64
last_scraped                        3818 non-null object
name                                3818 non-null object
summary                             3641 non-null object
space                               3249 non-null object
description                         3818 non-null object
experiences_offered                 3818 non-null object
neighborhood_overview               2786 non-null object
notes                               2212 non-null object
transit                             2884 non-null object
thumbnail_url                       3498 non-null object
medium_url                          3498 non-null object
picture_url                         3818 non-null object
xl_picture_url                      3498

We need to convert string data in 'price' column into integers (rounded to dollar amount)
But because of dollar sign($) and comma(,) the simple .astype() method won't work.
So we will first strip these strings of '$'and ',', then apply .astype() transform
(https://stackoverflow.com/questions/32464280/converting-currency-with-to-numbers-in-python-pandas recipe)

In [20]:
listings_df['price'] = listings_df['price'].str.replace("[$,]", "").astype("float").astype("int")


# Reviews data

In [21]:
reviews_df = pd.read_csv('data/reviews.csv')
reviews_df.head()

Unnamed: 0,listing_id,id,date,reviewer_id,reviewer_name,comments
0,7202016,38917982,2015-07-19,28943674,Bianca,Cute and cozy place. Perfect location to every...
1,7202016,39087409,2015-07-20,32440555,Frank,Kelly has a great room in a very central locat...
2,7202016,39820030,2015-07-26,37722850,Ian,"Very spacious apartment, and in a great neighb..."
3,7202016,40813543,2015-08-02,33671805,George,Close to Seattle Center and all it has to offe...
4,7202016,41986501,2015-08-10,34959538,Ming,Kelly was a great host and very accommodating ...


In [22]:
reviews_df["comments"][0]

'Cute and cozy place. Perfect location to everything! '

# Estimating Property Revenue

https://towardsdatascience.com/airbnb-in-seattle-data-analysis-8222207579d7

In [23]:
# To make joining easier, rename id column in listings_df to listing_id, as in reviews_df 
listings_df.rename(columns={'id':'listing_id'}, inplace=True)

In [24]:
bookings_df = pd.merge(reviews_df, listings_df, on='listing_id')

bookings_df['estimated_revenue'] = bookings_df['price'] * bookings_df['minimum_nights']
bookings_df['estimated_revenue'] 

0        150
1        150
2        150
3        150
4        150
        ... 
84844    525
84845    525
84846    525
84847    525
84848     92
Name: estimated_revenue, Length: 84849, dtype: int64