# Netflix - Subscriber Growth in Emerging Markets

```SQL
CREATE TABLE dimension_country (
    country_id int,
    country_name varchar
);

CREATE TABLE fact_marketing_spend (
    spend_id int,
    country_id int,
    campaign_date date,
    amount_spent decimal
);

CREATE TABLE fact_daily_subscriptions (
    subscription_id int,
    country_id int,
    signup_date date,
    num_new_subscribers int
);

INSERT INTO dimension_country (country_id, country_name)
VALUES
    (1, 'India'),
    (2, 'Brazil'),
    (3, 'South Africa'),
    (4, 'Indonesia');

INSERT INTO fact_marketing_spend (spend_id, country_id, amount_spent, campaign_date)
VALUES
    (1, 1, 150000.5, '2024-01-15'),
    (2, 2, 200000.75, '2024-02-10'),
    (3, 3, 175000, '2024-03-05'),
    (4, 1, 80000, '2024-01-25'),
    (5, 2, 95000.5, '2024-02-20'),
    (6, 3, 120000, '2024-03-15'),
    (7, 4, 50000, '2024-01-30'),
    (8, 4, 70000, '2024-03-10'),
    (9, 1, 60000, '2024-02-05'),
    (10, 2, 110000, '2024-03-25');

INSERT INTO fact_daily_subscriptions (subscription_id, country_id, signup_date, num_new_subscribers)
VALUES
    (1, 1, '2024-01-16', 3000),
    (2, 2, '2024-02-11', 4000),
    (3, 3, '2024-03-06', 3500),
    (4, 1, '2024-01-26', 2000),
    (5, 2, '2024-02-21', 2500),
    (6, 3, '2024-03-16', 2800),
    (7, 4, '2024-01-31', 1500),
    (8, 4, '2024-03-11', 1800),
    (9, 1, '2024-02-06', 1700),
    (10, 2, '2024-03-26', 3000);
```

In [1]:
import pandas as pd
import numpy as np

In [4]:
df_country = pd.read_csv('Data/010/dimension_country.csv')
df_subscription = pd.read_csv('Data/010/fact_daily_subscriptions.csv')
df_marketing = pd.read_csv('Data/010/fact_marketing_spend.csv')

df_country.head()

Unnamed: 0,country_id,country_name
0,1,India
1,2,Brazil
2,3,South Africa
3,4,Indonesia


In [5]:
df_subscription.head()

Unnamed: 0,subscription_id,country_id,signup_date,num_new_subscribers
0,1,1,2024-01-16,3000
1,2,2,2024-02-11,4000
2,3,3,2024-03-06,3500
3,4,1,2024-01-26,2000
4,5,2,2024-02-21,2500


In [6]:
df_marketing.head()

Unnamed: 0,spend_id,country_id,campaign_date,amount_spent
0,1,1,2024-01-15,150000.5
1,2,2,2024-02-10,200000.75
2,3,3,2024-03-05,175000.0
3,4,1,2024-01-25,80000.0
4,5,2,2024-02-20,95000.5


# Pregunta 1

### Obtén el gasto total de marketing en cada país para el primer trimestre (Q1) de 2024 para ayudar a informar la distribución del presupuesto entre las regiones.

In [12]:
df_merge = pd.merge(df_country, df_marketing, on=('country_id'))

df_filter = df_merge[
    (df_merge['campaign_date'].between('2024-01-01','2024-03-31'))
]

df_total = df_filter.groupby('country_name')['amount_spent'].sum()

df_total_order = df_total.sort_values(ascending=False).reset_index()

df_total_order

Unnamed: 0,country_name,amount_spent
0,Brazil,405001.25
1,South Africa,295000.0
2,India,290000.5
3,Indonesia,120000.0


```SQL
SELECT
    d.country_name,
    SUM(f.amount_spent) AS total_q1_spend
FROM dimension_country d
JOIN fact_marketing_spend f
    ON d.country_id = f.country_id
WHERE f.campaign_date BETWEEN '2024-01-01' AND '2024-03-31'
GROUP BY d.country_name
ORDER BY total_q1_spend DESC;
```

# Pregunta 2

### Enumera la cantidad de nuevos suscriptores adquiridos en cada país (con su nombre) durante enero de 2024, renombrando la columna del conteo de suscriptores como 'new_subscribers' para que el reporte sea más claro.

In [15]:
df_merge2 = pd.merge(df_country, df_subscription, on=('country_id'))

df_filter2 = df_merge2[
    (df_merge2['signup_date'].between('2024-01-01','2024-01-31'))
]

df_new = df_filter2.groupby('country_name')['num_new_subscribers'].sum().reset_index()

df_new_subscriber = df_new.rename(columns={'num_new_subscribers': 'new_subscribers'})

df_new_subscriber

Unnamed: 0,country_name,new_subscribers
0,India,5000
1,Indonesia,1500


```SQL
SELECT
    c.country_name,
    SUM(s.num_new_subscribers) AS new_subscribers
FROM fact_daily_subscriptions s
JOIN dimension_country c
    ON c.country_id = s.country_id
WHERE s.signup_date BETWEEN '2024-01-01' AND '2024-01-31'
GROUP BY c.country_name;
```

# Pregunta 3

### Determina el gasto promedio de marketing por cada nuevo suscriptor para cada país en el primer trimestre (Q1) de 2024, redondeando el resultado al número entero superior más cercano para evaluar la eficiencia de las campañas

In [21]:
mask_mkt = df_marketing['campaign_date'].between('2024-01-01','2024-01-31')
mask_subs = df_subscription['signup_date'].between('2024-01-01','2024-01-31')

gasto_total = df_marketing[mask_mkt].groupby('country_id')['amount_spent'].sum()
subs_totatles = df_subscription[mask_subs].groupby('country_id')['num_new_subscribers'].sum()

df_cac = pd.concat([gasto_total, subs_totatles], axis=1).dropna()
df_cac['avg_spend_per_subcriber'] = np.ceil(df_cac['amount_spent'] / df_cac['num_new_subscribers']) 

df_final = df_cac.reset_index().merge(df_country, on='country_id')
df_final = df_final[['country_name','avg_spend_per_subcriber']]

df_final

Unnamed: 0,country_name,avg_spend_per_subcriber
0,India,47.0
1,Indonesia,34.0


```SQL
WITH gasto_q1 AS (
    SELECT
        country_id,
        SUM(amount_spent) AS total_spend
    FROM fact_marketing_spend
    WHERE campaign_date BETWEEN '2024-01-01' AND '2024-03-31'
    GROUP BY country_id
),
subs_q1 AS (
    SELECT
        country_id,
        SUM(num_new_subscribers) AS total_subs
    FROM fact_daily_subscriptions
    WHERE signup_date BETWEEN '2024-01-01' AND '2024-03-31'
    GROUP BY country_id
)
SELECT
    c.country_name,
    CEIL(g.total_spend / s.total_subs) AS avg_spend_per_subscriber
FROM dimension_country c
JOIN gasto_q1 g ON c.country_id = g.country_id
JOIN subs_q1 s ON c.country_id = s.country_id;
```