# Meta - Creators Growth: Engagement and Follower Metrics

```SQL
DROP TABLE IF EXISTS dim_creator;
DROP TABLE IF EXISTS fct_creator_content;

CREATE TABLE dim_creator (
    creator_id INTEGER,
    creator_name VARCHAR,
    category VARCHAR
);

CREATE TABLE fct_creator_content (
    content_id INTEGER,
    creator_id INTEGER,
    published_date DATE,
    content_type VARCHAR,
    impressions_count INTEGER,
    likes_count INTEGER,
    comments_count INTEGER,
    shares_count INTEGER,
    new_followers_count INTEGER
);

INSERT INTO dim_creator (creator_id, creator_name, category)
VALUES
    (1, 'Alice', 'Gaming'),
    (2, 'Bob', 'Music'),
    (3, 'Carol', 'Cooking'),
    (4, 'Dave', 'Travel'),
    (5, 'Eve', 'Fashion'),
    (6, 'Frank', 'Technology'),
    (7, 'Grace', 'Sports'),
    (8, 'Heidi', 'Lifestyle'),
    (9, 'Ivan', 'Education'),
    (10, 'Judy', 'Comedy');

INSERT INTO fct_creator_content (
    content_id,
    creator_id,
    published_date,
    content_type,
    impressions_count,
    likes_count,
    comments_count,
    shares_count,
    new_followers_count
)
VALUES
    (1, 1, '2024-05-05', 'Video', 1000, 150, 20, 10, 50),
    (2, 2, '2024-05-15', 'Article', 800, 80, 15, 5, 70),
    (3, 3, '2024-05-20', 'Video', 1200, 200, 30, 12, 80),
    (4, 4, '2024-04-10', 'Podcast', 500, 60, 10, 3, 30),
    (5, 5, '2024-04-15', 'Article', 700, 90, 12, 8, 40),
    (6, 6, '2024-04-20', 'Image', 600, 75, 5, 4, 35),
    (7, 7, '2024-04-05', 'Video', 1100, 140, 25, 6, 45),
    (8, 8, '2024-06-01', 'Podcast', 550, 70, 22, 4, 55),
    (9, 9, '2024-06-15', 'Video', 1300, 210, 35, 15, 90),
    (10, 10, '2024-06-20', 'Image', 750, 80, 18, 5, 60),
    (11, 1, '2024-04-22', 'Article', 950, 110, 19, 7, 65),
    (12, 2, '2024-05-25', 'Podcast', 640, 85, 16, 6, 75),
    (13, 3, '2024-06-05', '1280', 1280, 190, 28, 10, 88),
    (14, 4, '2024-05-30', 'Image', 820, 105, 13, 9, 52),
    (15, 5, '2024-04-12', 'Article', 670, 95, 14, 6, 42);

SELECT * FROM dim_creator;
SELECT * FROM fct_creator_content;
```


In [1]:
import pandas as pd
import numpy as np

In [3]:
df_creator = pd.read_csv('Data/003/dim_creator.csv')
df_content = pd.read_csv('Data/003/fct_creator_content.csv', parse_dates=['published_date'])

df_creator.head()

Unnamed: 0,creator_id,creator_name,category
0,1,Alice,Gaming
1,2,Bob,Music
2,3,Carol,Cooking
3,4,Dave,Travel
4,5,Eve,Fashion


In [4]:
df_content.head()

Unnamed: 0,content_id,creator_id,published_date,content_type,impressions_count,likes_count,comments_count,shares_count,new_followers_count
0,1,1,2024-05-05,Video,1000,150,20,10,50
1,2,2,2024-05-15,Article,800,80,15,5,70
2,3,3,2024-05-20,Video,1200,200,30,12,80
3,4,4,2024-04-10,Podcast,500,60,10,3,30
4,5,5,2024-04-15,Article,700,90,12,8,40


# Pregunta 1

### Para el contenido publicado en mayo de 2024, ¿qué IDs de creadores muestran el mayor crecimiento de nuevos seguidores dentro de cada tipo de contenido? Si un creador publicó varias veces el mismo tipo de contenido, queremos considerar el crecimiento total de nuevos seguidores para ese tipo de contenido.

```SQL
SELECT creator_id, content_type, sum_new_follower
FROM (SELECT f.creator_id,
             f.content_type,
             SUM(f.new_followers_count) AS sum_new_follower,
             RANK() OVER (PARTITION BY f.content_type ORDER BY SUM(f.new_followers_count) DESC) as rnk
      FROM fct_creator_content f
      WHERE f.published_date BETWEEN '2024-05-01' AND '2024-05-31'
      GROUP BY f.creator_id, f.content_type) ranked_stats
WHERE rnk = 1;
```

# Pregunta 2

### Tu Gerente de Producto (Product Manager) solicita un informe que muestre las impresiones, likes, comentarios y compartidos (shares) para cada tipo de contenido entre el 8 y el 21 de abril de 2024. Ella solicita específicamente que las métricas de interacción sean 'despivotadas' (unpivoted) en una sola columna llamada 'tipo de métrica' (metric type).

In [6]:
df_periodo = df_content[df_content['published_date'].between('2024-04-08','2024-04-21')]

resumen = df_periodo.groupby('content_type').agg({
    'impressions_count': 'sum',
    'likes_count': 'sum',
    'comments_count': 'sum',
    'shares_count': 'sum'
}).reset_index()

respuesta_unpivoted = pd.melt(
    resumen,
    id_vars=['content_type'],
    value_vars=['impressions_count','likes_count','comments_count','shares_count'],
    var_name = 'metric_type',
    value_name = 'metric_value'
)

respuesta_unpivoted

Unnamed: 0,content_type,metric_type,metric_value
0,Article,impressions_count,1370
1,Image,impressions_count,600
2,Podcast,impressions_count,500
3,Article,likes_count,185
4,Image,likes_count,75
5,Podcast,likes_count,60
6,Article,comments_count,26
7,Image,comments_count,5
8,Podcast,comments_count,10
9,Article,shares_count,14


```SQL
SELECT
    content_type,
    'impressions' AS mentric_type,
    SUM(impressions_count) AS metric_value
FROM fct_creator_content
WHERE published_date BETWEEN '2024-04-08' AND '2024-04-21'
GROUP BY content_type

UNION ALL

SELECT
    content_type,
    'likes' AS metric_type,
    SUM(likes_count) AS metric_value
FROM fct_creator_content
WHERE published_date BETWEEN '2024-04-08' AND '2024-04-21'
GROUP BY content_type

UNION ALL

SELECT
    content_type,
    'comments' as metric_type,
    SUM(comments_count) AS metric_value
FROM fct_creator_content
WHERE published_date BETWEEN '2024-04-08' AND '2024-04-21'
GROUP BY content_type

UNION ALL

SELECT
    content_type,
    'shares' AS metric_value,
    SUM(shares_count) AS metric_value
FROM fct_creator_content
WHERE published_date BETWEEN '2024-04-08' AND '2024-04-21'
GROUP BY content_type;
```

# Pregunta 3

### Para el contenido publicado entre abril y junio de 2024, ¿puedes calcular para cada creador qué porcentaje de sus nuevos seguidores provino de cada tipo de contenido?

In [8]:
df_merge = df_creator.merge(df_content, on='creator_id', how='inner')

df_q2 = df_merge[df_merge['published_date'].between('2024-04-01','2024-06-30')]
stats = df_q2.groupby(['creator_name','content_type'])['new_followers_count'].sum().reset_index()

total_per_creator = stats.groupby('creator_name')['new_followers_count'].transform('sum')

stats['percentage'] = (stats['new_followers_count'] / total_per_creator * 100).round(2)

stats = stats.sort_values(['creator_name','percentage'], ascending=[True, False])

stats

Unnamed: 0,creator_name,content_type,new_followers_count,percentage
0,Alice,Article,65,56.52
1,Alice,Video,50,43.48
3,Bob,Podcast,75,51.72
2,Bob,Article,70,48.28
4,Carol,1280,88,52.38
5,Carol,Video,80,47.62
6,Dave,Image,52,63.41
7,Dave,Podcast,30,36.59
8,Eve,Article,82,100.0
9,Frank,Image,35,100.0


```SQL
WITH stats AS (
    SELECT
        d.creator_name,
        f.content_type,
        SUM(f.new_followers_count) AS follower_per_type
    FROM dim_creator d
    INNER JOIN fct_creator_content f ON d.creator_id = f.creator_id
    WHERE f.published_date BETWEEN '2024-04-01' AND '2024-06-30'
    GROUP BY d.creator_name, f.content_type)
SELECT
    creator_name,
    content_type,
    follower_per_type,
    ROUND(
    100.0 * follower_per_type / SUM(follower_per_type) OVER(PARTITION BY creator_name),
    2
    ) as percentage_of_total_followers
FROM stats
ORDER BY creator_name, percentage_of_total_followers DESC;
```