# <font color="salmon">Airbnb Analysis</font>

In [32]:
#Parameters
use_cleaned_data = False

#Number of columns to show for each df
pd.set_option('display.max_columns', None)

## Loading Data and Modules

In [33]:
import pandas as pd
from glob2 import glob
import re

if use_cleaned_data:
    file_paths = glob("clean_data/*.csv")
else:
    file_paths = glob("airbnb-sep-2017/*.csv")

calendar = pd.read_csv(file_paths[0], low_memory=False)
calendar_available_only = pd.read_csv(file_paths[1], low_memory=False)
listings = pd.read_csv(file_paths[2], low_memory=False)
neighbourhoods = pd.read_csv(file_paths[3], low_memory=False)
reviews = pd.read_csv(file_paths[4], low_memory=False)

## <font color = "salmon">Data Cleaning</font>

### What I've Done in Data Cleaning
- encode cancellation_policy to one-hot in listings.csv
- convert all binary classes (1 is True and 0 is False)

In [34]:
#Regex currency pattern
currency_pattern = re.compile(r"\$|\,")

def one_hot_concat(df, col_nm):
    one_hot_response  = pd.get_dummies(df[col_nm])
    df = pd.concat([df, one_hot_response], axis=1, join="inner")
    del df[col_nm]
    return df
    
if not use_cleaned_data:       
    #calendar: "prices" to float, encode "available" to binary class
    calendar["price"] = calendar["price"].apply(lambda x: float(re.sub(currency_pattern, "", str(x))))
    calendar["available"] = calendar["available"].apply(lambda x: 0 if x == "f" else 1)

    #calendar_available_only: "prices" to float, encode "available" to binary class
    calendar_available_only["price"] = calendar_available_only["price"].apply(lambda x: float(re.sub(currency_pattern, "", str(x))))
    calendar_available_only["available"] = calendar_available_only["available"].apply(lambda x: 0 if x == "f" else 1)

    #listings: convert cols in listings_binary_cols to float, encode "cancellation_policy" to one-hot and concat,
    listings_binary_cols = ["requires_license", "instant_bookable", "require_guest_profile_picture", 
                            "require_guest_phone_verification", "host_is_superhost", "host_has_profile_pic",
                           "host_identity_verified", "price", "weekly_price",
                           "monthly_price", "security_deposit", "cleaning_fee",
                           "extra_people"]
    for col in listings_binary_cols:
        listings.loc[:,col] = listings.loc[:,col].apply(lambda x: 0 if x == "f" else 1)
    #cancellation policy - one hot
    listings = one_hot_concat(listings, "cancellation_policy")
    #host_response_time - one hot
    listings = one_hot_concat(listings, "host_response_time")
    #room type - one hot
    listings = one_hot_concat(listings, "room_type")
    listings = one_hot_concat(listings, "bed_type")
    

## <font color="salmon">EDA</font>

### Initial notes from data cleaning
- "prices" columns consist of prices on the dollar
- "listing_id" is the primary key

Listings.csv
- experiences_offerred, has_availability contain only Nones

neighborhoods.csv
- neighbourhood_groups contains NaNs

### calendar summary

In [35]:
calendar.head()

Unnamed: 0,listing_id,date,available,price
0,10988680,2018-04-01,0,
1,10988680,2018-03-31,0,
2,10988680,2018-03-30,0,
3,10988680,2018-03-29,0,
4,10988680,2018-03-28,0,


### calendar_available_only summary

In [36]:
calendar_available_only.head()

Unnamed: 0,listing_id,date,available,price
0,8053481,2017-07-31,1,220.0
1,8053481,2017-07-30,1,220.0
2,8053481,2017-07-21,1,230.0
3,8053481,2017-07-20,1,220.0
4,8053481,2017-07-19,1,220.0


### listings summary

In [37]:
listings.head()

Unnamed: 0,id,listing_url,scrape_id,last_scraped,name,summary,space,description,experiences_offered,neighborhood_overview,notes,transit,access,interaction,house_rules,thumbnail_url,medium_url,picture_url,xl_picture_url,host_id,host_url,host_name,host_since,host_location,host_about,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,street,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,city,state,zipcode,market,smart_location,country_code,country,latitude,longitude,is_location_exact,property_type,accommodates,bathrooms,bedrooms,beds,amenities,square_feet,price,weekly_price,monthly_price,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights,maximum_nights,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,requires_license,license,jurisdiction_names,instant_bookable,require_guest_profile_picture,require_guest_phone_verification,calculated_host_listings_count,reviews_per_month,flexible,moderate,strict,super_strict_30,super_strict_60,a few days or more,within a day,within a few hours,within an hour,Entire home/apt,Private room,Shared room,Airbed,Couch,Futon,Pull-out Sofa,Real Bed
0,10988680,https://www.airbnb.com/rooms/10988680,20170401232214,2017-04-02,"Simple, new, clean accommodation",I signed a new lease and moving out of my plac...,"This is clean, simple, affordable accommodatio...",I signed a new lease and moving out of my plac...,none,Place is very conveniently located in the Miss...,,,"You get 24/7 access to your bedroom,bathroom a...","I won't be there, but two my roommates live in...",,https://a0.muscache.com/im/pictures/2c6eb3ad-a...,https://a0.muscache.com/im/pictures/2c6eb3ad-a...,https://a0.muscache.com/im/pictures/2c6eb3ad-a...,https://a0.muscache.com/im/pictures/2c6eb3ad-a...,2448592,https://www.airbnb.com/users/show/2448592,Georgy,2012-05-23,"San Francisco, California, United States",I am originally from Armenia. I moved to US 5 ...,,,0,https://a0.muscache.com/im/users/2448592/profi...,https://a0.muscache.com/im/users/2448592/profi...,Mission District,1.0,1.0,"['email', 'phone', 'reviews', 'kba', 'work_ema...",1,1,"Mission District, San Francisco, CA 94110, Uni...",Mission District,Mission,,San Francisco,CA,94110,San Francisco,"San Francisco, CA",US,United States,37.754184,-122.406514,t,Apartment,2,1.0,1.0,1.0,"{Internet,""Wireless Internet"",""Free parking on...",,1,1,1,1,1,1,1,1,1125,13 months ago,,0,0,0,0,2017-04-02,1,2016-02-23,2016-02-23,100.0,10.0,10.0,10.0,10.0,10.0,10.0,0,,SAN FRANCISCO,0,0,0,1,0.07,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1
1,8053481,https://www.airbnb.com/rooms/8053481,20170401232214,2017-04-02,Spacious 2BR/2BATH w/Shared Kitchen,Enjoy the whole main floor of our sunny 2-leve...,"We love our big, bright home and we know you w...",Enjoy the whole main floor of our sunny 2-leve...,none,Our Mission District neighborhood is among the...,Parking: Street parking in our neighborhood i...,It is easy to get to most places in San Franci...,"We have lived in this home for over 20 years, ...",We are now retired and have decided to make ou...,* Please treat our home and personal belonging...,https://a0.muscache.com/im/pictures/102820109/...,https://a0.muscache.com/im/pictures/102820109/...,https://a0.muscache.com/im/pictures/102820109/...,https://a0.muscache.com/im/pictures/102820109/...,14419919,https://www.airbnb.com/users/show/14419919,David & Barbara,2014-04-17,"San Francisco, California, United States",We enjoy traveling and prefer staying in B&Bs ...,100%,,0,https://a0.muscache.com/im/users/14419919/prof...,https://a0.muscache.com/im/users/14419919/prof...,Mission District,1.0,1.0,"['email', 'phone', 'manual_online', 'reviews',...",1,1,"Mission District, San Francisco, CA 94110, Uni...",Mission District,Mission,,San Francisco,CA,94110,San Francisco,"San Francisco, CA",US,United States,37.754166,-122.421534,t,Apartment,4,2.0,2.0,3.0,"{TV,""Cable TV"",""Wireless Internet"",Kitchen,Hea...",,1,1,1,1,1,2,1,3,30,4 days ago,,0,15,22,45,2017-04-02,9,2015-09-26,2016-07-27,96.0,10.0,10.0,10.0,10.0,9.0,9.0,0,,SAN FRANCISCO,0,0,0,1,0.49,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1
2,14011173,https://www.airbnb.com/rooms/14011173,20170401232214,2017-04-02,Private room - Victorian House in the Mission,Awesome location in the Mission District of SF...,,Awesome location in the Mission District of SF...,none,,,,,,,https://a0.muscache.com/im/pictures/bdb8981f-8...,https://a0.muscache.com/im/pictures/bdb8981f-8...,https://a0.muscache.com/im/pictures/bdb8981f-8...,https://a0.muscache.com/im/pictures/bdb8981f-8...,4901696,https://www.airbnb.com/users/show/4901696,Donald,2013-01-30,"San Francisco, California, United States","I'm originally from Belgium, but I've studied ...",,,0,https://a0.muscache.com/im/pictures/50d3580d-4...,https://a0.muscache.com/im/pictures/50d3580d-4...,Mission District,3.0,3.0,"['email', 'phone', 'facebook', 'reviews', 'jum...",1,1,"Mission District, San Francisco, CA 94110, Uni...",Mission District,Mission,,San Francisco,CA,94110,San Francisco,"San Francisco, CA",US,United States,37.758506,-122.406152,t,House,1,1.0,1.0,1.0,"{TV,""Wireless Internet"",Kitchen,Heating,Washer...",,1,1,1,1,1,1,1,7,1125,2 months ago,,0,0,0,0,2017-04-02,2,2016-08-20,2016-08-27,90.0,10.0,8.0,10.0,10.0,8.0,9.0,0,,SAN FRANCISCO,0,0,0,3,0.27,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1
3,16142131,https://www.airbnb.com/rooms/16142131,20170401232214,2017-04-02,Spacious 1BR in the Mission District,"Quiet 1 bedroom apartment, with a spacious, we...",,"Quiet 1 bedroom apartment, with a spacious, we...",none,,,,,,,https://a0.muscache.com/im/pictures/ef31cf3f-5...,https://a0.muscache.com/im/pictures/ef31cf3f-5...,https://a0.muscache.com/im/pictures/ef31cf3f-5...,https://a0.muscache.com/im/pictures/ef31cf3f-5...,50491424,https://www.airbnb.com/users/show/50491424,Avishai,2015-12-04,"San Francisco, California, United States",,100%,,0,https://a0.muscache.com/im/pictures/b38912d2-d...,https://a0.muscache.com/im/pictures/b38912d2-d...,Mission District,1.0,1.0,"['email', 'phone', 'reviews', 'kba']",1,1,"Mission District, San Francisco, CA 94110, Uni...",Mission District,Mission,,San Francisco,CA,94110,San Francisco,"San Francisco, CA",US,United States,37.756549,-122.422025,t,Apartment,4,1.0,1.0,2.0,"{TV,""Wireless Internet"",Kitchen,""Elevator in b...",,1,1,1,1,1,1,1,4,14,6 weeks ago,,0,0,0,0,2017-04-02,2,2016-11-30,2017-03-06,100.0,10.0,10.0,10.0,10.0,10.0,8.0,0,,SAN FRANCISCO,0,0,0,1,0.48,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1
4,9080622,https://www.airbnb.com/rooms/9080622,20170401232214,2017-04-02,Edwardian flat in heart of Mission,Bedroom with queen bed available. The condo i...,,Bedroom with queen bed available. The condo i...,none,,,,,,,https://a0.muscache.com/im/pictures/923ac520-5...,https://a0.muscache.com/im/pictures/923ac520-5...,https://a0.muscache.com/im/pictures/923ac520-5...,https://a0.muscache.com/im/pictures/923ac520-5...,46158576,https://www.airbnb.com/users/show/46158576,Cindy,2015-10-09,"San Francisco, California, United States",Holistic health care provider who loves to tra...,,,0,https://a0.muscache.com/im/users/46158576/prof...,https://a0.muscache.com/im/users/46158576/prof...,Mission District,1.0,1.0,"['email', 'phone', 'jumio']",1,0,"Mission District, San Francisco, CA 94110, Uni...",Mission District,Mission,,San Francisco,CA,94110,San Francisco,"San Francisco, CA",US,United States,37.760051,-122.421352,t,Apartment,1,1.0,1.0,1.0,"{TV,""Cable TV"",Internet,""Wireless Internet"",Ki...",,1,1,1,1,1,1,1,2,3,6 weeks ago,,0,0,0,0,2017-04-02,0,,,,,,,,,,0,,SAN FRANCISCO,0,0,0,1,,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1


### neighbourhoods summary

In [38]:
neighbourhoods["neighbourhood_group"].value_counts()

Series([], Name: neighbourhood_group, dtype: int64)

In [39]:
neighbourhoods.head()

Unnamed: 0,neighbourhood_group,neighbourhood
0,,Bayview
1,,Bernal Heights
2,,Castro/Upper Market
3,,Chinatown
4,,Crocker Amazon


### reviews summary

In [40]:
reviews.head()

Unnamed: 0,listing_id,id,date,reviewer_id,reviewer_name,comments
0,7025250,36355262,2015-06-27,49913,Sean,Kevin was quick to respond and very coordinati...
1,7025250,36495849,2015-06-28,17344420,Terry,"Kevin, my host, was not home but stayed in tou..."
2,7025250,40263555,2015-07-29,38552007,Jon,I have to say that after staying in 3 hotels i...
3,7025250,55069111,2015-11-27,30440816,Sean,Clean room. The room I stayed in had a privat...
4,7025250,55166030,2015-11-28,33218898,Gregg,Very relaxed and casual atmosphere. The space ...
