# Airbnb Analytics

<img src="../assets/logo.png">
<img src="../assets/ERD.png">

## Connectin to Database

In [1]:
import pandas as pd

import psycopg2

import warnings
warnings.filterwarnings("ignore")

In [2]:
hostname = 'localhost'
database = 'BNB'
username = 'postgres'
pwd = 'Seriously'
port_id = 5432

In [3]:
try:
    conn = psycopg2.connect(host= hostname,
                            dbname = database,
                            user = username,
                            password = pwd,
                            port = port_id
                            )
except Exception as error:
    print(error) 

In [4]:
cur = conn.cursor()

## Establishing & Filling the Required Tables

airbnb_search_details

In [5]:
drop_tbL_airbnb_search_details = """
DROP TABLE IF EXISTS airbnb_search_details
"""
cur.execute(drop_tbL_airbnb_search_details)
conn.commit()

In [6]:
make_tbl_airbnb_search_details = """
CREATE TABLE airbnb_search_details (
    id INT PRIMARY KEY,
    price NUMERIC(6,2),
    property_type VARCHAR(20),
    room_type VARCHAR(30),
    amenities TEXT,
    accommodates SMALLINT,
    bathrooms SMALLINT,
    bed_type VARCHAR(30),
    cancellation_policy VARCHAR(30),
    cleaning_fee BOOLEAN,
    city VARCHAR(20),
    host_identity_verified VARCHAR(1),
    host_response_rate VARCHAR(4),
    host_since DATE,
    neighbourhood VARCHAR(30),
    number_of_reviews SMALLINT,
    review_scores_rating NUMERIC(5,2),
    postalcode VARCHAR(8),
    bedrooms SMALLINT,
    beds SMALLINT);
"""

cur.execute(make_tbl_airbnb_search_details)
conn.commit()

In [7]:
fill_tbl_airbnb_search_details = """
COPY airbnb_search_details
FROM 'C:\\Users\ADMIN\\Desktop\\Datasets\\bnb_data\\airbnb_search_details.csv'
WITH (FORMAT CSV, HEADER);
"""
cur.execute(fill_tbl_airbnb_search_details)
conn.commit()

In [8]:
show_tbl_airbnb_search_details = """
SELECT * FROM airbnb_search_details;
"""
pd.read_sql(show_tbl_airbnb_search_details,conn).head()

Unnamed: 0,id,price,property_type,room_type,amenities,accommodates,bathrooms,bed_type,cancellation_policy,cleaning_fee,city,host_identity_verified,host_response_rate,host_since,neighbourhood,number_of_reviews,review_scores_rating,postalcode,bedrooms,beds
0,12513361,555.68,Apartment,Entire home/apt,"{TV,""Wireless Internet"",""Air conditioning"",""Sm...",2,1,Real Bed,flexible,False,NYC,t,89%,2015-11-18,East Harlem,3,87.0,10029,0,1
1,7196412,366.36,Cabin,Private room,"{""Wireless Internet"",Kitchen,Washer,Dryer,""Smo...",2,3,Real Bed,moderate,False,LA,f,100%,2016-10-09,Valley Glen,14,91.0,91606,1,1
2,16333776,482.83,House,Private room,"{TV,""Cable TV"",Internet,""Wireless Internet"",Ki...",2,1,Real Bed,strict,True,SF,t,100%,2013-12-26,Richmond District,117,96.0,94118,1,1
3,1786412,448.86,Apartment,Private room,"{""Wireless Internet"",""Air conditioning"",Kitche...",2,1,Real Bed,strict,True,NYC,t,93%,2010-11-05,Williamsburg,8,86.0,11211,1,1
4,14575777,506.89,Villa,Private room,"{TV,Internet,""Wireless Internet"",""Air conditio...",6,2,Real Bed,strict,True,LA,t,70%,2015-10-22,,2,100.0,90703,3,3


airbnb_apartments

In [9]:
drop_tbL_airbnb_apartments = """
DROP TABLE IF EXISTS airbnb_apartments
"""
cur.execute(drop_tbL_airbnb_apartments)
conn.commit()

In [10]:
make_tbl_airbnb_apartments = """
CREATE TABLE airbnb_apartments(
    host_id SMALLINT,
    apartment_id VARCHAR(4),
    apartment_type VARCHAR(10),
    n_beds SMALLINT,
    n_bedrooms SMALLINT,
    country VARCHAR(10),
    city VARCHAR(20),
    CONSTRAINT student_key PRIMARY KEY (host_id, apartment_id));
"""
cur.execute(make_tbl_airbnb_apartments)
conn.commit()

In [11]:
fill_tbl_airbnb_apartments = """
COPY airbnb_apartments
FROM 'C:\\Users\\ADMIN\\Desktop\\Datasets\\bnb_data\\airbnb_apartments.csv'
WITH (FORMAT CSV, HEADER);
"""
cur.execute(fill_tbl_airbnb_apartments)
conn.commit()

In [12]:
show_tbl_airbnb_apartments = """
SELECT * FROM airbnb_apartments;
"""
pd.read_sql(show_tbl_airbnb_apartments,conn).head()

Unnamed: 0,host_id,apartment_id,apartment_type,n_beds,n_bedrooms,country,city
0,0,A1,Room,1,1,USA,New York
1,0,A2,Room,1,1,USA,New Jersey
2,0,A3,Room,1,1,USA,New Jersey
3,1,A4,Apartment,2,1,USA,Houston
4,1,A5,Apartment,2,1,USA,Las Vegas


airbnb_searches

In [13]:
drop_tbL_airbnb_searches = """
DROP TABLE IF EXISTS airbnb_searches
"""
cur.execute(drop_tbL_airbnb_searches)
conn.commit()

In [14]:
make_tbl_airbnb_searches = """
CREATE TABLE airbnb_searches(
    ds DATE,
    id_user VARCHAR(40),
    ds_checkin DATE,
    ds_checkout DATE,
    n_searches SMALLINT,
    n_nights SMALLINT,
    n_guests_min SMALLINT,
    n_guests_max SMALLINT,
    origin_country VARCHAR(20),
    filter_price_min NUMERIC,
    filter_price_max NUMERIC,
    filter_room_types VARCHAR(140),
    filter_neighborhoods VARCHAR(100));
"""
cur.execute(make_tbl_airbnb_searches)
conn.commit()

In [15]:
fill_tbl_airbnb_searches = """
COPY airbnb_searches
FROM 'C:\\Users\\ADMIN\\Desktop\\Datasets\\bnb_data\\airbnb_searches.csv'
WITH (FORMAT CSV, HEADER);
"""
cur.execute(fill_tbl_airbnb_searches)
conn.commit()

In [16]:
show_tbl_airbnb_searches = """
SELECT * FROM airbnb_searches;
"""
pd.read_sql(show_tbl_airbnb_searches,conn).head()

Unnamed: 0,ds,id_user,ds_checkin,ds_checkout,n_searches,n_nights,n_guests_min,n_guests_max,origin_country,filter_price_min,filter_price_max,filter_room_types,filter_neighborhoods
0,2014-11-10,67aece73-e112-4e9e-9e05-8a2a94b003b9,,,5,,1,1,IT,0.0,99.0,",Private room",
1,2014-01-10,6cbb33d1-6ecc-4f74-8b6a-a43d07d484b6,2014-04-10,2014-07-10,11,3.0,3,3,ES,0.0,567.0,",Entire home/apt,Entire home/apt,Private room,...",
2,2014-03-10,aa9cf5bf-5667-4212-8018-1cb8beee530e,2014-11-14,2014-11-16,17,2.0,2,2,GB,0.0,171.0,",Entire home/apt",
3,2014-09-10,3e6c2466-74fe-44c0-a6f3-dda79755d30a,2015-02-26,2015-02-03,9,4.0,1,4,GB,0.0,240.0,",Entire home/apt",
4,2014-10-13,a09bf912-b21d-4859-b194-8512c30695f6,2014-10-18,2014-10-22,7,4.0,1,2,GB,,,",Entire home/apt",


airbnb_reviews

In [17]:
drop_tbL_airbnb_reviews = """
DROP TABLE IF EXISTS airbnb_reviews
"""
cur.execute(drop_tbL_airbnb_reviews)
conn.commit()

In [18]:
make_tbl_airbnb_reviews = """
CREATE TABLE airbnb_reviews(
    from_user SMALLINT,
    to_user SMALLINT,
    from_type VARCHAR(5),
    to_type VARCHAR(5),
    review_score SMALLINT);
"""
cur.execute(make_tbl_airbnb_reviews)
conn.commit()

In [19]:
fill_tbl_airbnb_reviews = """
COPY airbnb_reviews
FROM 'C:\\Users\\ADMIN\\Desktop\\Datasets\\bnb_data\\airbnb_reviews.csv'
WITH (FORMAT CSV, HEADER);
"""
cur.execute(fill_tbl_airbnb_reviews)
conn.commit()

In [20]:
show_tbl_airbnb_reviews = """
SELECT * FROM airbnb_reviews;
"""
pd.read_sql(show_tbl_airbnb_reviews,conn).head()

Unnamed: 0,from_user,to_user,from_type,to_type,review_score
0,4,2,guest,host,3
1,1,2,host,guest,5
2,8,8,guest,host,9
3,4,0,guest,host,6
4,7,1,host,guest,2


airbnb_hosts

In [21]:
drop_tbL_airbnb_hosts = """
DROP TABLE IF EXISTS airbnb_hosts
"""
cur.execute(drop_tbL_airbnb_hosts)
conn.commit()

In [22]:
make_tbl_airbnb_hosts = """
CREATE TABLE airbnb_hosts(
    host_id SMALLINT,
    nationality VARCHAR(10),
    gender VARCHAR(1),
    age SMALLINT);
"""
cur.execute(make_tbl_airbnb_hosts)
conn.commit()

In [23]:
fill_tbl_airbnb_hosts = """
COPY airbnb_hosts
FROM 'C:\\Users\\ADMIN\\Desktop\\Datasets\\bnb_data\\airbnb_hosts.csv'
WITH (FORMAT CSV, HEADER);
"""
cur.execute(fill_tbl_airbnb_hosts)
conn.commit()

In [24]:
show_tbl_airbnb_hosts = """
SELECT * FROM airbnb_hosts;
"""
pd.read_sql(show_tbl_airbnb_hosts,conn).head()

Unnamed: 0,host_id,nationality,gender,age
0,0,USA,M,28
1,1,USA,F,29
2,2,China,F,31
3,3,China,M,24
4,4,Mali,M,30


airbnb_guests

In [25]:
drop_tbL_airbnb_guests = """
DROP TABLE IF EXISTS airbnb_guests
"""
cur.execute(drop_tbL_airbnb_guests)
conn.commit()

In [26]:
make_tbl_airbnb_guests = """
CREATE TABLE airbnb_guests(
    guest_id SMALLINT,
    nationality VARCHAR(10),
    gender VARCHAR(1),
    age SMALLINT);
"""
cur.execute(make_tbl_airbnb_guests)
conn.commit()

In [27]:
fill_tbl_airbnb_guests = """
COPY airbnb_guests
FROM 'C:\\Users\\ADMIN\\Desktop\\Datasets\\bnb_data\\airbnb_guests.csv'
WITH (FORMAT CSV, HEADER);
"""
cur.execute(fill_tbl_airbnb_guests)
conn.commit()

In [28]:
show_tbl_airbnb_guests = """
SELECT * FROM airbnb_guests;
"""
pd.read_sql(show_tbl_airbnb_guests,conn).head()

Unnamed: 0,guest_id,nationality,gender,age
0,0,Mali,M,21
1,1,China,F,23
2,2,Mali,F,27
3,3,Australia,F,24
4,4,Luxembourg,M,19


airbnb_contacts

In [29]:
drop_tbL_airbnb_contacts = """
DROP TABLE IF EXISTS airbnb_contacts
"""
cur.execute(drop_tbL_airbnb_contacts)
conn.commit()

In [30]:
make_tbl_airbnb_contacts = """
CREATE TABLE airbnb_contacts(
    id_guest VARCHAR(45),
    id_host VARCHAR(45),
    id_listing VARCHAR(45),
    ts_contact_at TIMESTAMP,
    ts_reply_at TIMESTAMP,
    ts_accepted_at TIMESTAMP,
    ts_booking_at TIMESTAMP,
    ds_checkin DATE,
    ds_checkout DATE,
    n_guests SMALLINT,
    n_messages SMALLINT);
"""
cur.execute(make_tbl_airbnb_contacts)
conn.commit()

In [31]:
fill_tbl_airbnb_contacts = """
COPY airbnb_contacts
FROM 'C:\\Users\\ADMIN\\Desktop\\Datasets\\bnb_data\\airbnb_contacts.csv'
WITH (FORMAT CSV, HEADER);
"""
cur.execute(fill_tbl_airbnb_contacts)
conn.commit()

In [32]:
show_tbl_airbnb_contacts = """
SELECT * FROM airbnb_contacts;
"""
pd.read_sql(show_tbl_airbnb_contacts,conn).head()

Unnamed: 0,id_guest,id_host,id_listing,ts_contact_at,ts_reply_at,ts_accepted_at,ts_booking_at,ds_checkin,ds_checkout,n_guests,n_messages
0,86b39b70-965b-479d-a0b0-719b195acea2,1dfb22ec-c20e-4bf9-b161-1607afa25c5a,d668de42-122a-45cd-b91f-91a70895f902,2014-04-18 09:32:00,2014-04-18 09:39:00,NaT,NaT,2014-12-31,2015-02-01,7,5
1,14f943bb-74e9-458b-be55-203dc7220688,3347390d-8670-4870-9dab-da30f3700141,14c47fb8-e831-4044-9674-9b3fd0499193,2014-06-10 06:55:00,2014-06-10 10:06:00,2014-06-10 10:06:00,2014-06-10 10:06:00,2014-03-11,2014-07-11,2,8
2,425aa1ed-82ab-4ecf-b62f-d61e1848706d,02cafb86-5445-45cc-80f2-405291578356,c5a4a913-a094-4a9d-82e2-0b2d4f9d9eeb,2014-04-10 05:02:00,2014-04-10 23:10:00,NaT,NaT,2014-02-11,2014-09-11,2,2
3,bb490ede-8a70-4d61-a2e8-625855a393e2,f49c3095-58de-4b8d-9d5b-3bfceceb47d8,27f4b429-d544-464f-b4b5-3c09fd5992e7,2014-08-31 11:46:00,2014-08-31 16:48:00,NaT,NaT,2014-03-11,2014-07-11,2,5
4,b2fda15a-89bb-4e6e-ae81-8b21598e2482,71f1d49e-2ff4-4d72-b8e6-fd4c67feaa74,95fb78ca-8e6e-436a-9830-949d995ad14f,2014-08-10 15:07:00,2014-08-10 15:32:00,2014-08-10 15:32:00,2014-08-10 22:21:00,2014-06-11,2014-09-11,2,10


## Analysis

### Q1. Find neighborhoods where you can sleep on a real bed in a villa with internet while paying the lowest price possible?

<img src='../assets/search_details.png'>

In [33]:
a1_script = """
SELECT distinct neighbourhood
FROM airbnb_search_details
WHERE price IN
    (SELECT min(price)
     FROM airbnb_search_details
     WHERE bed_type = 'Real Bed'
       AND property_type = 'Villa'
       AND amenities ILIKE '%internet%' )
    AND bed_type = 'Real Bed'
    AND property_type = 'Villa'
    AND amenities ILIKE '%internet%'
"""

a1_sql = pd.read_sql(a1_script,conn)
a1_sql

Unnamed: 0,neighbourhood
0,Long Beach


### Q2. For each guest reviewer, find the nationality of the reviewer’s favorite host based on the guest’s highest review score given to a host. Output the user ID of the guest along with their favorite host’s nationality. In case there is more than one favorite host from the same country, list that country only once (remove duplicates).
<img src='../assets/reviews.png'>
<img src='../assets/hosts.png'>

In [34]:
a2_script = """
SELECT 
    DISTINCT guest AS from_user,  host_natinality AS nationality
FROM (
    SELECT 
        DISTINCT ar.from_user AS guest,
        ar.review_score,
        DENSE_RANK() OVER (PARTITION BY ar.from_user ORDER BY ar.review_score DESC) AS score_rank,
        ar.to_user AS host,
        ah.nationality AS host_natinality
    FROM airbnb_reviews AS ar
    JOIN airbnb_hosts ah
        ON ar.to_user = ah.host_id AND ar.from_type = 'guest' AND ar.to_type = 'host'
    ORDER BY guest, review_score DESC) t
WHERE score_rank = 1
"""
a2_sql = pd.read_sql(a2_script,conn)
a2_sql

Unnamed: 0,from_user,nationality
0,0,Luxembourg
1,1,Brazil
2,2,Australia
3,3,Australia
4,4,Brazil
5,5,Australia
6,5,Mali
7,5,USA
8,6,China
9,6,USA


### Q3. Find the distinct room types (filter_room_types column). Output each unique room types in its own row.
<img src='../assets/searches.png'>

In [35]:
a3_script = """
SELECT DISTINCT unnest(regexp_split_to_array(ltrim(filter_room_types, ','), ',')) AS property_type
FROM airbnb_searches;
"""
a3_sql = pd.read_sql(a3_script,conn)
a3_sql

Unnamed: 0,property_type
0,Shared room
1,Private room
2,Entire home/apt


### Q4. Rank each host based on the number of beds they have listed. The host with the most beds should be ranked 1 and the host with the least number of beds should be ranked last. Hosts that have the same number of beds should have the same rank but there should be no gaps between ranking values. A host can also own multiple properties. Output the host ID, number of beds, and rank from highest rank to lowest.
<img src='../assets/apartments.png'>

In [36]:
a4_script = """
SELECT 
    host_id, 
    SUM(n_beds) AS number_of_beds,
    DENSE_RANK() OVER(ORDER BY sum(n_beds) DESC) AS rank
FROM airbnb_apartments
GROUP BY host_id
ORDER BY number_of_beds desc;
"""
a4_sql = pd.read_sql(a4_script,conn)
a4_sql


Unnamed: 0,host_id,number_of_beds,rank
0,10,16,1
1,3,8,2
2,6,6,3
3,5,5,4
4,1,4,5
5,7,4,5
6,9,4,5
7,0,3,6
8,2,3,6
9,11,2,7


### Q5. To better understand the effect of the review count on the price of accommodation, categorize the number of reviews into the following groups along with the price. 
#### - 0 reviews: NO 
#### - 1 to 5 reviews: FEW 
#### - 6 to 15 reviews: SOME 
#### - 16 to 40 reviews: MANY 
#### - more than 40 reviews: A LOT 
### Output the price and it's categorization. Perform the categorization on accommodation level? (save the output to a csv file with the name of a5)
<img src='../assets/search_details.png'>

In [37]:
a5_script = """
select 
    price,
    CASE
        WHEN number_of_reviews = 0 THEN 'NO'
        WHEN number_of_reviews < 6 THEN 'FEW'
        WHEN number_of_reviews < 16 THEN 'SOME'
        WHEN number_of_reviews < 41 THEN 'MANY'
        ELSE ' A LOT'
    END AS review_category
FROM airbnb_search_details;
"""

a5_sql = pd.read_sql(a5_script,conn)
a5_sql.to_csv('../data/a5.csv')
a5_sql

Unnamed: 0,price,review_category
0,555.68,FEW
1,366.36,SOME
2,482.83,A LOT
3,448.86,SOME
4,506.89,FEW
...,...,...
155,478.75,NO
156,510.59,FEW
157,368.89,SOME
158,409.43,MANY


### Q6. Find the total number of searches for each room type (apartments, private, shared) by city.
<img src='../assets/search_details.png'>

In [38]:
a6_script = """
WITH cte AS (
SELECT 
    city,
    CASE
        WHEN room_type = 'Entire home/apt' THEN 1
        ELSE 0
    END AS entire_home_or_apt,
    CASE
        WHEN room_type = 'Shared room' THEN 1
        ELSE 0
    END AS shared_room,
    CASE
        WHEN room_type = 'Private room' THEN 1
        ELSE 0
    END AS private_room
FROM airbnb_search_details)
SELECT 
    city,
    SUM(entire_home_or_apt) AS entire_homes_or_apts,
    SUM(shared_room) AS private_rooms,
    SUM(private_room) AS private_rooms
FROM cte
GROUP BY 1;
"""
a6_sql = pd.read_sql(a6_script,conn)
a6_sql

Unnamed: 0,city,entire_homes_or_apts,private_rooms,private_rooms.1
0,LA,34,4,25
1,DC,0,0,1
2,SF,3,0,4
3,Chicago,3,0,1
4,NYC,43,3,35
5,Boston,1,0,3


### Q7. You're given a dataset of searches for properties on Airbnb. For simplicity, let's say that each search result (i.e., each row) represents a unique host. Find the city with the most amenities across all their host's properties. Output the name of the city.
<img src='../assets/search_details.png'>

In [39]:
a7_script = """
SELECT city,
    SUM(array_length(string_to_array(amenities,','),1)) AS sum_len
FROM airbnb_search_details
GROUP by city
order by sum_len desc
limit 1;
"""
a7_sql = pd.read_sql(a7_script,conn)
a7_sql

Unnamed: 0,city,sum_len
0,NYC,1416


### 8. Estimate the growth of Airbnb each year using the number of hosts registered as the growth metric. The rate of growth is calculated by taking ((number of hosts registered in the current year - number of hosts registered in the previous year) / the number of hosts registered in the previous year) * 100. Output the year, number of hosts in the current year, number of hosts in the previous year, and the rate of growth. Round the rate of growth to the nearest percent and order the result in the ascending order based on the year. Assume that the dataset consists only of unique hosts, meaning there are no duplicate hosts listed.
<img src='../assets/search_details.png'>

In [40]:
a8_script = """
WITH cte AS (
SELECT 
    date_part ('year',host_since) AS year,
    COUNT(*) AS num_hosts
FROM airbnb_search_details
GROUP BY year)
SELECT 
    year AS current_year,
    num_hosts,
    LAG(num_hosts,1) OVER(ORDER BY year) AS previous_year_num_hosts,
    ROUND(((num_hosts - LAG(num_hosts,1) OVER(ORDER BY year))::numeric)/LAG(num_hosts,1) OVER(ORDER BY year)*100,0) AS rate
FROM cte;
"""
a8_sql = pd.read_sql(a8_script,conn)
a8_sql

Unnamed: 0,current_year,num_hosts,previous_year_num_hosts,rate
0,2009.0,2,,
1,2010.0,4,2.0,100.0
2,2011.0,9,4.0,125.0
3,2012.0,10,9.0,11.0
4,2013.0,30,10.0,200.0
5,2014.0,33,30.0,10.0
6,2015.0,33,33.0,0.0
7,2016.0,28,33.0,-15.0
8,2017.0,11,28.0,-61.0


### 9. Find the average number of beds in each neighborhood that has at least 3 beds in total. Output results along with the neighborhood name and sort the results based on the number of average beds in descending order.
<img src='../assets/search_details.png'>

In [41]:
a9_script = """
SELECT neighbourhood, AVG(beds) AS n_beds_avg
FROM airbnb_search_details
GROUP BY 1
HAVING sum(beds) >= 3;
"""
a9_sql = pd.read_sql(a9_script,conn)
a9_sql

Unnamed: 0,neighbourhood,n_beds_avg
0,,2.2
1,West Los Angeles,3.0
2,Malibu,2.5
3,Mid-Wilshire,1.666667
4,Rancho Palos Verdes,4.0
5,Loop,2.0
6,Dorchester,1.0
7,Harlem,1.576923
8,East Village,2.0
9,Cow Hollow,3.0


### 10. Find the average accommodates-to-beds ratio for "shared rooms" in each city. Sort your results by listing cities with the highest ratios first.
<img src='../assets/search_details.png'>

In [42]:
a10_script = """
SELECT city, AVG(accommodates:: numeric/beds) AS avg_accommodates_to_beds
FROM airbnb_search_details
WHERE room_type = 'Shared room'
GROUP BY 1
ORDER BY 2 DESC;
"""
a10_sql = pd.read_sql(a10_script, conn)
a10_sql

Unnamed: 0,city,avg_accommodates_to_beds
0,LA,1.6
1,NYC,1.5


### 11. Find the average number of bathrooms and bedrooms for each city’s property types. Output the result along with the city name and the property type.
<img src='../assets/search_details.png'>

In [43]:
a11_script = """
SELECT 
    city,
    property_type,
    AVG(bathrooms) AS avg_bathrooms,
    AVG(bedrooms) AS avg_bedrooms
FROM airbnb_search_details
GROUP BY 1,2;
"""
a11_sql = pd.read_sql(a11_script,conn)
a11_sql

Unnamed: 0,city,property_type,avg_bathrooms,avg_bedrooms
0,NYC,Townhouse,1.0,0.0
1,LA,Condominium,1.5,1.5
2,NYC,Loft,1.0,0.5
3,NYC,Apartment,1.116667,1.25
4,Boston,Apartment,1.0,1.0
5,LA,House,1.5,1.558824
6,Chicago,Apartment,2.0,2.0
7,SF,Apartment,1.0,2.0
8,LA,Villa,2.066667,2.533333
9,NYC,House,1.4,1.8


### Q12. You’re given a table of rental property searches by users. The table consists of search results and outputs host information for searchers. Find the minimum, average, maximum rental prices for each host’s popularity rating. The host’s popularity rating is defined as below: 
### - 0 reviews: New 
### - 1 to 5 reviews: Rising 
### - 6 to 15 reviews: Trending Up 
### - 16 to 40 reviews: Popular 
### - more than 40 reviews: Hot
<img src='../assets/search_details.png'>

In [44]:
a12_script = """
SELECT
    CASE
        WHEN number_of_reviews = 0 THEN 'New'
        WHEN number_of_reviews BETWEEN 1 AND 5 THEN 'Rising'
        WHEN number_of_reviews BETWEEN 6 AND 15 THEN 'Trending Up'
        WHEN number_of_reviews BETWEEN 16 AND 40 THEN 'Popular'
        WHEN number_of_reviews > 40 THEN 'Hot'
    END AS host_popularity,
    min(price) AS min_price,
    avg(price) AS avg_price,
    max(price) AS max_price
FROM airbnb_search_details
GROUP BY 1;
"""
a12_sql = pd.read_sql(a12_script,conn)
a12_sql

Unnamed: 0,host_popularity,min_price,avg_price,max_price
0,Rising,355.53,503.846585,717.01
1,Popular,270.81,472.815,667.83
2,New,313.55,515.919714,741.76
3,Trending Up,361.09,476.277179,685.65
4,Hot,340.12,464.233158,633.51


### Q13. Display the average number of times a user performed a search which led to a successful booking and the average number of times a user performed a search but did not lead to a booking. The output should have a column named action with values 'does not book' and 'books' as well as a 2nd column named average_searches with the average number of searches per action. Consider that the booking did not happen if the booking date is null. Be aware that search is connected to the booking only if their check-in dates match.
<img src='../assets/search_details.png'>
<img src='../assets/contacts.png'>

In [45]:
a13_script = """
SELECT 
    CASE
        WHEN c.ts_booking_at IS NOT NULL THEN 'books'
        ELSE 'does not book'
    END AS action,
    avg(n_searches) AS average_searches
FROM airbnb_searches s
LEFT JOIN airbnb_contacts c 
    ON s.id_user = c.id_guest AND s.ds_checkin = c.ds_checkin
GROUP BY 1;"""
a13_sql = pd.read_sql(a13_script,conn)
a13_sql

Unnamed: 0,action,average_searches
0,does not book,22.707965
1,books,23.333333


### Q14. Find the average host response rate with a cleaning fee for each zipcode. Present the results as a percentage along with the postal code value. Convert the column 'host_response_rate' from TEXT to NUMERIC using type casts and string processing (take missing values as NULL). Order the result in ascending order based on the average host response rater after cleaning. (Save the output to a csv file with the name a14)
<img src='../assets/search_details.png'>

In [46]:
a14_script = """
SELECT
    postalcode,
    avg(split_part(host_response_rate,'%',1)::Numeric) AS avg_reponse_rate
FROM airbnb_search_details
WHERE cleaning_fee = True and split_part(host_response_rate,'%',1) is not null
GROUP BY postalcode
ORDER BY avg_reponse_rate asc"""
a14_sql = pd.read_sql(a14_script,conn)
a14_sql.to_csv('../data/a14.csv')
a14_sql

Unnamed: 0,postalcode,avg_reponse_rate
0,91324,0.0
1,90028,25.0
2,10035,67.0
3,90703,70.0
4,10039,76.0
...,...,...
60,11231,100.0
61,90064,100.0
62,90290,100.0
63,20005,100.0


In [47]:
cur.close()
conn.close()