# Meta - Engagement with Facebook Events

```SQL
CREATE TABLE dim_users (
    user_id INTEGER,
    first_name VARCHAR,
    last_name VARCHAR,
    preferred_category VARCHAR
);

CREATE TABLE dim_events (
    event_id INTEGER,
    category_name VARCHAR
);

CREATE TABLE fct_event_clicks (
    click_id INTEGER,
    user_id INTEGER,
    event_id INTEGER,
    click_date DATE
);

INSERT INTO dim_users (user_id, first_name, last_name, preferred_category)
VALUES
    (101, 'Alice', 'Smith', 'Music'),
    (102, 'Bob', 'Johnson', 'Sports'),
    (103, 'Charlie', 'Brown', 'Food'),
    (104, 'Diana', 'Adams', 'Travel'),
    (105, 'Evan', 'Morris', 'Art'),
    (106, 'Fiona', 'Davis', 'Fitness'),
    (107, 'George', 'Clark', 'Business'),
    (108, 'Hannah', 'Evans', 'Comedy'),
    (109, 'Ian', 'Fisher', 'Technology'),
    (110, 'Julia', 'Walker', 'Literature');

INSERT INTO dim_events (event_id, category_name)
VALUES
    (201, 'Music'),
    (202, 'Sports'),
    (203, 'Food'),
    (204, 'Technology'),
    (205, 'Travel'),
    (206, 'Art'),
    (207, 'Fitness'),
    (208, 'Literature'),
    (209, 'Business'),
    (210, 'Comedy');

INSERT INTO fct_event_clicks (click_id, user_id, event_id, click_date)
VALUES
    (1, 101, 201, '2024-03-01'),
    (2, 102, 202, '2024-03-02'),
    (3, 103, 203, '2024-03-03'),
    (4, 101, 204, '2024-03-05'),
    (5, 104, 205, '2024-03-07'),
    (6, 102, 201, '2024-03-10'),
    (7, 105, 206, '2024-03-12'),
    (8, 106, 207, '2024-03-15'),
    (9, 103, 208, '2024-03-20'),
    (10, 107, 209, '2024-03-25');

SELECT * FROM dim_users;

SELECT * FROM dim_events;

SELECT * FROM fct_event_clicks;
```

In [2]:
import pandas as pd
import numpy as np

In [14]:
df_events = pd.read_csv('Data/012/dim_events.csv')
df_users = pd.read_csv('Data/012/dim_users.csv')
df_clicks = pd.read_csv('Data/012/fct_event_clicks.csv')

df_events.head()

Unnamed: 0,event_id,category_name
0,201,Music
1,202,Sports
2,203,Food
3,204,Technology
4,205,Travel


In [15]:
df_users.head()

Unnamed: 0,user_id,first_name,last_name,preferred_category
0,101,Alice,Smith,Music
1,102,Bob,Johnson,Sports
2,103,Charlie,Brown,Food
3,104,Diana,Adams,Travel
4,105,Evan,Morris,Art


In [16]:
df_clicks.head()

Unnamed: 0,click_id,user_id,event_id,click_date
0,1,101,201,2024-03-01
1,2,102,202,2024-03-02
2,3,103,203,2024-03-03
3,4,101,204,2024-03-05
4,5,104,205,2024-03-07


# Pregunta 1

### ¿Cuántas veces hicieron clic los usuarios en las recomendaciones de eventos para cada categoría de evento en marzo de 2024? Muestra el nombre de la categoría y el total de clics.

In [17]:
df_merge = pd.merge(df_clicks, df_events, on="event_id")

df_marzo = df_merge[
    df_merge['click_date'].between('2024-03-01', '2024-03-31')
]

df_total_click = df_marzo.groupby('category_name')['click_id'].count().reset_index()

df_total_click.columns = ['category_name', 'total_clicks']
df_total_click = df_total_click.sort_values(by='total_clicks', ascending=False)

df_total_click

Unnamed: 0,category_name,total_clicks
5,Music,2
0,Art,1
1,Business,1
3,Food,1
2,Fitness,1
4,Literature,1
6,Sports,1
7,Technology,1
8,Travel,1


```SQL
SELECT 
    d.category_name,
    COUNT(e.click_id) AS total_clicks
FROM fct_event_clicks e
JOIN dim_events d ON e.event_id = d.event_id
WHERE e.click_date BETWEEN '2024-03-01' AND '2024-03-31'
GROUP BY d.category_name
ORDER BY total_clicks DESC;
```

# Pregunta 2

### Para los clics en eventos en marzo de 2024, identifica si cada usuario hizo clic en un evento de su categoría preferida. Devuelve el ID de usuario, la categoría del evento y una etiqueta que indique si era su categoría preferida ('Yes' o 'No').

In [18]:
df_full = df_clicks.merge(df_users, on='user_id').merge(df_events, on='event_id')

df_march = df_full[df_full['click_date'].between('2024-03-01','2024-03-31')]

df_march['is_preferred_category'] = np.where(
    df_march['preferred_category'] == df_march['category_name'],
    'Yes',
    'No'
)

resultado = df_march[['user_id','category_name','is_preferred_category']]

resultado

Unnamed: 0,user_id,category_name,is_preferred_category
0,101,Music,Yes
1,102,Sports,Yes
2,103,Food,Yes
3,101,Technology,No
4,104,Travel,Yes
5,102,Music,No
6,105,Art,Yes
7,106,Fitness,Yes
8,103,Literature,No
9,107,Business,Yes


```SQL
SELECT
    u.user_id,
    e.category_name AS event_category,
    CASE
        WHEN u.preferred_category = e.category_name THEN 'Yes'
        ELSE 'No'
    END AS is_preferred_category
FROM fct_event_clicks c
JOIN dim_users u ON c.user_id = u.user_id
JOIN dim_events e ON c.event_id = e.event_id
WHERE c.click_date BETWEEN '2024-03-01' AND '2024-03-31';
```

# Pregunta 3

### Genera un reporte que combine el ID de usuario, su nombre completo (nombre y apellido) y el total de clics en eventos con los que interactuaron en marzo de 2024. Ordena el reporte por ID de usuario en orden ascendente.

In [19]:
df_report = pd.merge(df_users, df_clicks, on='user_id')

df_report = df_report[df_report['click_date'].between('2024-03-01','2024-03-31')]

df_report['full_name'] = df_report['first_name'] + ' ' + df_report['last_name']

resultado = df_report.groupby(['user_id','full_name'])['click_id'].count().reset_index()

resultado = resultado.rename(columns={'click_id':'total_clicks'})
resultado = resultado.sort_values(by='user_id', ascending=False)

resultado

Unnamed: 0,user_id,full_name,total_clicks
6,107,George Clark,1
5,106,Fiona Davis,1
4,105,Evan Morris,1
3,104,Diana Adams,1
2,103,Charlie Brown,2
1,102,Bob Johnson,2
0,101,Alice Smith,2


```SQL
SELECT
    u.user_id,
    u.first_name || ' ' || u.last_name AS full_name,
    COUNT(c.click_id) AS total_clicks
FROM dim_users u
LEFT JOIN fct_event_clicks c ON u.user_id = c.user_id
WHERE c.click_date BETWEEN '2024-03-01' AND '2024-03-31'
GROUP BY u.user_id, u.first_name, u.last_name
ORDER BY u.user_id ASC;
```