# Airbnb - Work Travel Expense Tracking and Optimization

```SQL
CREATE TABLE dim_companies (
    company_id integer,
    company_name varchar
);

CREATE TABLE fct_corporate_bookings (
    booking_id integer,
    company_id integer,
    employee_id integer,
    booking_cost decimal,
    booking_date date,
    travel_date date
);

INSERT INTO dim_companies (company_id, company_name)
VALUES
    (1, 'Acme Corp'),
    (2, 'Beta Technologies'),
    (3, 'Global Insights'),
    (4, 'Zenith Solutions'),
    (5, 'Innovatech'),
    (6, 'Prime Systems'),
    (7, 'Nexus Holdings'),
    (8, 'Vertex LLC'),
    (9, 'Synergy Inc'),
    (10, 'Quantum Dynamics');

INSERT INTO fct_corporate_bookings (booking_id, company_id, employee_id, booking_cost, booking_date, travel_date)
VALUES
    (1, 1, 101, 500, '2024-01-05', '2024-02-05'),
    (2, 2, 201, 750.5, '2024-01-15', '2024-01-20'),
    (3, 1, 102, 450, '2024-01-20', '2024-02-15'),
    (4, 3, 301, 1200, '2024-01-22', '2024-03-01'),
    (5, 4, 401, 650.75, '2024-01-10', '2024-01-25'),
    (6, 5, 501, 900, '2024-02-05', '2024-03-10'),
    (7, 2, 202, 850, '2024-02-08', '2024-03-05'),
    (8, 6, 601, 1100, '2024-02-15', '2024-03-20'),
    (9, 7, 701, 780.25, '2024-02-20', '2024-03-25'),
    (10, 8, 801, 660, '2024-02-22', '2024-03-15'),
    (11, 3, 302, 950, '2024-03-05', '2024-03-12'),
    (12, 1, 103, 530, '2024-03-10', '2024-03-15'),
    (13, 9, 901, 1250, '2024-03-15', '2024-03-22'),
    (14, 4, 402, 700, '2024-03-20', '2024-04-01'),
    (15, 2, 203, 800, '2024-03-25', '2024-04-10');

SELECT * FROM dim_companies;

SELECT * FROM fct_corporate_bookings;
```

In [1]:
import pandas as pd
import numpy as np

In [2]:
df_companies = pd.read_csv('Data/018/dim_companies.csv')
df_corporate = pd.read_csv('Data/018/fct_corporate_bookings.csv')

df_companies.head()

Unnamed: 0,company_id,company_name
0,1,Acme Corp
1,2,Beta Technologies
2,3,Global Insights
3,4,Zenith Solutions
4,5,Innovatech


In [3]:
df_corporate.head()

Unnamed: 0,booking_id,company_id,employee_id,booking_cost,booking_date,travel_date
0,1,1,101,500.0,2024-01-05,2024-02-05
1,2,2,201,750.5,2024-01-15,2024-01-20
2,3,1,102,450.0,2024-01-20,2024-02-15
3,4,3,301,1200.0,2024-01-22,2024-03-01
4,5,4,401,650.75,2024-01-10,2024-01-25


# Pregunta 1

### ¿Cuál es el costo promedio de reserva para los viajeros corporativos? Para esta pregunta, observemos únicamente los viajes que fueron reservados en enero de 2024.

In [4]:
df_enero = df_corporate[
    (df_corporate['booking_date'].between('2024-01-01','2024-01-31'))
]

respuesta1 = df_enero['booking_cost'].mean()

respuesta1

np.float64(710.25)

```SQL
SELECT
    AVG(booking_cost) AS avg_booking_cost
FROM fct_corporate_bookings
WHERE booking_date BETWEEN '2024-01-01' AND '2024-01-31';
```

# Pregunta 2

### Identifique las 5 empresas con el costo promedio de reserva más alto por empleado para los viajes realizados durante el primer trimestre de 2024. Tenga en cuenta que si un empleado realiza varios viajes, cada reserva aparecerá como una fila independiente en fct_corporate_bookings

In [7]:
df_q1 = df_corporate[df_corporate['travel_date'].between('2024-01-01','2024-03-31')]

gasto_empleado = df_q1.groupby(['company_id','employee_id'])['booking_cost'].sum().reset_index()

ranking_empresas = gasto_empleado.groupby('company_id')['booking_cost'].mean().reset_index()

resultado = ranking_empresas.merge(df_companies, on='company_id')
resultado = resultado.sort_values(by='booking_cost', ascending=False).head(5)

resultado

Unnamed: 0,company_id,booking_cost,company_name
8,9,1250.0,Synergy Inc
5,6,1100.0,Prime Systems
2,3,1075.0,Global Insights
4,5,900.0,Innovatech
1,2,800.25,Beta Technologies


```SQL
SELECT
    c.company_name,
    AVG(employee_total) AS avg_cost_per_employee
FROM (
    SELECT
        company_id,
        employee_id,
        SUM(booking_cost) AS employee_total
    FROM fct_corporate_bookings
    WHERE travel_date BETWEEN '2024-01-01' AND '2024-03-31'
    GROUP BY company_id, employee_id) AS b
JOIN dim_companies c ON b.company_id = c.company_id
GROUP BY c.company_name
ORDER BY avg_cost_per_employee DESC
LIMIT 5;
```

# Pregunta 3

### Para las reservas realizadas en febrero de 2024, ¿qué porcentaje de las reservas se hicieron con más de 30 días de antelación? Utilice este dato para recomendar estrategias destinadas a reducir los costos de reserva.

In [12]:
df_corporate['booking_date'] = pd.to_datetime(df_corporate['booking_date'])
df_corporate['travel_date'] = pd.to_datetime(df_corporate['travel_date'])

df_feb = df_corporate[df_corporate['booking_date'].between('2024-02-01','2024-02-28')].copy()

df_feb['lead_time'] = (df_feb['travel_date'] - df_feb['booking_date']).dt.days

mas_de_30 = (df_feb['lead_time'] > 30).sum()
total = len(df_feb)
porcentaje = (mas_de_30 / total) * 100

porcentaje


np.float64(60.0)

```SQL
SELECT
    COUNT(CASE WHEN (travel_date - booking_date) > 30 THEN 1 END) * 100.0 / COUNT(*) AS pct_advance_bookings
FROM fct_corporate_bookings
WHERE booking_date BETWEEN '2024-02-01' AND '2024-02-29';
```