# Airbnb - Stays Listing Earnings Insights

```SQL
CREATE TABLE dim_listings (
    listing_id integer,
    amenities text,
    location text
);

CREATE TABLE fct_bookings (
    booking_id integer,
    listing_id integer,
    booking_date date,
    nightly_price decimal,
    cleaning_fee decimal,
    booked_nights integer
);

INSERT INTO dim_listings (listing_id, amenities, location)
VALUES
    (1, 'pool, wifi, kitchen', 'Miami Beach'),
    (2, 'ocean view, balcony, wifi', 'Santa Monica'),
    (3, 'wifi, kitchen', 'New York'),
    (4, 'pool, garden, wifi', 'Los Angeles'),
    (5, 'ocean view, pool, gym', 'San Diego'),
    (6, 'wifi, parking', 'Chicago'),
    (7, 'kitchen, ocean view', 'Boston'),
    (8, 'pool, wifi', 'Orlando'),
    (9, 'balcony, garden', 'Austin'),
    (10, 'ocean view, spa', 'Malibu'),
    (11, 'wifi, kitchen', 'Denver'),
    (12, 'pool, ocean view, rooftop', 'Las Vegas');

INSERT INTO fct_bookings (booking_id, listing_id, booking_date, nightly_price, cleaning_fee, booked_nights)
VALUES
    (1, 1, '2024-07-03', 200, 50, 3),
    (2, 2, '2024-07-10', 240, NULL, 4),
    (3, 2, '2024-07-20', 250, NULL, 2),
    (4, 4, '2024-07-11', 220, 40, 4),
    (5, 5, '2024-07-12', 300, 30, 7),
    (6, 7, '2024-07-18', 270, NULL, 2),
    (7, 8, '2024-07-25', 190, 20, 2),
    (8, 10, '2024-07-22', 350, 70, 6),
    (9, 12, '2024-07-19', 400, 80, 3),
    (10, 12, '2024-07-21', 410, 80, 5),
    (11, 3, '2024-07-15', 180, 30, 1),
    (12, 6, '2024-07-14', 155, 20, 2),
    (13, 9, '2024-07-05', 210, 45, 4),
    (14, 11, '2024-07-08', 160, 25, 3),
    (15, 1, '2024-08-01', 205, 50, 5);

SELECT * FROM dim_listings;

SELECT * FROM fct_bookings;
```

In [1]:
import pandas as pd
import numpy as np

In [2]:
df_listing = pd.read_csv('Data/019/dim_listings.csv')
df_booking = pd.read_csv('Data/019/fct_bookings.csv')

df_listing.head()

Unnamed: 0,listing_id,amenities,location
0,1,"pool, wifi, kitchen",Miami Beach
1,2,"ocean view, balcony, wifi",Santa Monica
2,3,"wifi, kitchen",New York
3,4,"pool, garden, wifi",Los Angeles
4,5,"ocean view, pool, gym",San Diego


In [3]:
df_booking.head()

Unnamed: 0,booking_id,listing_id,booking_date,nightly_price,cleaning_fee,booked_nights
0,1,1,2024-07-03,200,50.0,3
1,2,2,2024-07-10,240,,4
2,3,2,2024-07-20,250,,2
3,4,4,2024-07-11,220,40.0,4
4,5,5,2024-07-12,300,30.0,7


# Pregunta 1

### ¿Cuál es el precio por noche promedio general para los alojamientos que tienen 'pool' (piscina) o 'ocean view' (vista al mar) en julio de 2024? Considere únicamente los alojamientos que hayan sido reservados al menos una vez durante este periodo.

In [None]:
df_merge = df_listing.merge(df_booking, on='listing_id')

df_july = df_merge[
    (df_merge['booking_date'].between('2024-07-01','2024-07-31')) &
    (df_merge['amenities'].str.contains('pool|ocean view', case=False))
]

repuesta1 = df_july['nightly_price'].mean()

repuesta1

```SQL
SELECT
    AVG(b.nightly_price) AS avg_night_price
FROM dim_listings l
JOIN fct_bookings b ON b.listing_id = l.listing_id
WHERE b.booking_date BETWEEN  '2024-07-01' AND '2024-07-31'
AND (l.amenities LIKE '%pool%' OR l.amenities LIKE '%ocean view%');
```

# Pregunta 2

### Para los alojamientos que no tienen tarifa de limpieza (es decir, valores NULL en la columna 'cleaning_fee'), ¿cuál es la diferencia promedio en el precio por noche en comparación con los alojamientos que sí tienen una tarifa de limpieza en julio de 2024?

In [8]:
df_null = df_booking[
    (df_booking['booking_date'].between('2024-07-01','2024-07-31')) &
    (df_booking['cleaning_fee'].isna())
]

df_not_null = df_booking[
    (df_booking['booking_date'].between('2024-07-01','2024-07-31')) &
    (df_booking['cleaning_fee'].notna())
]

df_null_mean = df_null['nightly_price'].mean()

df_not_null_mean = df_not_null['nightly_price'].mean()

dfiferencia = df_null_mean - df_not_null_mean

dfiferencia

np.float64(1.0606060606060623)

```SQL
SELECT 
    AVG(CASE WHEN cleaning_fee IS NULL OR cleaning_fee = 0 THEN nightly_price END) AS avg_no_fee,
    AVG(CASE WHEN cleaning_fee > 0 THEN nightly_price END) AS avg_with_fee,
    AVG(CASE WHEN cleaning_fee IS NULL OR cleaning_fee = 0 THEN nightly_price END) - 
    AVG(CASE WHEN cleaning_fee > 0 THEN nightly_price END) AS difference
FROM fct_bookings
WHERE booking_date BETWEEN '2024-07-01' AND '2024-07-31';
```

# Pregunta 3

### Basándose en el 50% superior de los alojamientos con mayores ingresos en julio de 2024, ¿qué porcentaje de estos alojamientos tienen 'ocean view' (vista al mar) entre sus servicios? Para este análisis, considere las reservas realizadas en julio de 2024.

In [9]:
df_merge = df_booking.merge(df_listing, on='listing_id')
df_july = df_merge[
    (df_merge['booking_date'].between('2024-07-01','2024-07-31'))
].copy()

df_july['total_rev'] = (df_july['nightly_price'] * df_july['booked_nights']) + df_july['cleaning_fee'].fillna(0)
ingresos_por_listing = df_july.groupby(['listing_id','amenities'])['total_rev'].sum().reset_index()

mediana = ingresos_por_listing['total_rev'].median()
top_50 = ingresos_por_listing[ingresos_por_listing['total_rev'] >= mediana]

con_vista = top_50['amenities'].str.contains('ocean view', case=False).sum()
porcentaje = (con_vista / len(top_50)) * 100

porcentaje

np.float64(66.66666666666666)

```SQL
WITH ListingRevenue AS (
    SELECT
        l.listing_id,
        l.amenities,
        SUM((b.nightly_price * b.booked_nights) + COALESCE(b.cleaning_fee, 0)) AS total_revenue,
        NTILE(2) OVER (ORDER BY SUM((b.nightly_price * b.booked_nights) + COALESCE(b.cleaning_fee, 0)) DESC) AS tile
    FROM fct_bookings b
    JOIN dim_listings l ON b.listing_id = l.listing_id
    WHERE b.booking_date BETWEEN '2024-07-01' AND '2024-07-31'
    GROUP BY l.listing_id, l.amenities
)
SELECT
    (COUNT(CASE WHEN amenities ILIKE '%ocean view%' THEN 1 END) * 100.0 / COUNT(*)) AS pct_ocean_view_in_top_half
FROM ListingRevenue
WHERE tile = 1;
```