# LinkedIn Skill Endorsements User Engagement

```SQL
CREATE TABLE dim_users (
    user_id integer,
    user_name varchar,
    profile_creation_date date
);

CREATE TABLE dim_skills (
    skill_id integer,
    skill_name varchar,
    skill_category varchar
);

CREATE TABLE fct_skill_endorsements (
    endorsement_id integer,
    user_id integer,
    skill_id integer,
    endorsement_date date
);

INSERT INTO dim_users (user_id, user_name, profile_creation_date)
VALUES
    (1, 'Alice', '2024-07-01'),
    (2, 'Bob', '2024-07-02'),
    (3, 'Charlie', '2024-07-03'),
    (4, 'Dawn''s', '2024-07-04'),
    (5, 'Eve', '2024-07-05'),
    (6, 'Frank', '2024-07-06'),
    (7, 'Grace', '2024-07-07'),
    (8, 'Heidi', '2024-07-08'),
    (9, 'Ivan', '2024-07-09'),
    (10, 'Judy', '2024-07-10');

INSERT INTO dim_skills (skill_id, skill_name, skill_category)
VALUES
    (101, 'Python', 'TECHNICAL'),
    (102, 'Java', 'TECHNICAL'),
    (103, 'Project Management', 'MANAGEMENT'),
    (104, 'Leadership', 'MANAGEMENT'),
    (105, 'Data Analysis', 'TECHNICAL'),
    (106, 'Communication', 'SOFT'),
    (107, 'Networking', 'TECHNICAL'),
    (108, 'Strategic Planning', 'MANAGEMENT'),
    (109, 'SQL', 'TECHNICAL'),
    (110, 'Teamwork', 'SOFT');

INSERT INTO fct_skill_endorsements (user_id, skill_id, endorsement_id, endorsement_date)
VALUES
    (1, 101, 1, '2024-07-05'),
    (1, 105, 2, '2024-08-10'),
    (1, 103, 3, '2024-09-15'),
    (2, 103, 4, '2024-07-20'),
    (2, 104, 5, '2024-08-22'),
    (3, 102, 6, '2024-07-25'),
    (3, 102, 7, '2024-08-05'),
    (3, 109, 8, '2024-09-10'),
    (4, 104, 9, '2024-09-12'),
    (5, 101, 10, '2024-09-14'),
    (5, 102, 11, '2024-08-18'),
    (7, 106, 12, '2024-07-07'),
    (8, 107, 13, '2024-08-12'),
    (8, 109, 14, '2024-08-20'),
    (9, 108, 15, '2024-09-18'),
    (9, 109, 16, '2024-07-03'),
    (10, 110, 17, '2024-08-25'),
    (10, 103, 18, '2024-09-22'),
    (4, 108, 19, '2024-07-15'),
    (2, 105, 20, '2024-08-30');

SELECT * FROM dim_users;

SELECT * FROM dim_skills;

SELECT * FROM fct_skill_endorsements;
```

In [1]:
import pandas as pd
import numpy as np

In [2]:
df_skills = pd.read_csv('Data/025/dim_skills.csv')
df_users = pd.read_csv('Data/025/dim_users.csv', parse_dates=['profile_creation_date'])
df_endorsement = pd.read_csv('Data/025/fct_skill_endorsements.csv', parse_dates=['endorsement_date'])

df_skills.head()

Unnamed: 0,skill_id,skill_name,skill_category
0,101,Python,TECHNICAL
1,102,Java,TECHNICAL
2,103,Project Management,MANAGEMENT
3,104,Leadership,MANAGEMENT
4,105,Data Analysis,TECHNICAL


In [3]:
df_users.head()

Unnamed: 0,user_id,user_name,profile_creation_date
0,1,Alice,2024-07-01
1,2,Bob,2024-07-02
2,3,Charlie,2024-07-03
3,4,Dawn's,2024-07-04
4,5,Eve,2024-07-05


In [4]:
df_endorsement.head()

Unnamed: 0,endorsement_id,user_id,skill_id,endorsement_date
0,1,1,101,2024-07-05
1,2,1,105,2024-08-10
2,3,1,103,2024-09-15
3,4,2,103,2024-07-20
4,5,2,104,2024-08-22


# Pregunta 1

### ¿Qué porcentaje de usuarios tiene al menos una habilidad validada por otros durante julio de 2024?

In [6]:
df_july = df_endorsement[
    (df_endorsement['endorsement_date'].between('2024-07-01','2024-07-31'))
]

df_nunique = df_july['user_id'].nunique()

respuesta1 = (df_nunique/df_users['user_id'].nunique())*100

respuesta1

60.0

```SQL
SELECT
    (COUNT(DISTINCT e.user_id) * 100.0 / (SELECT COUNT(user_id) FROM dim_users)) AS porcentaje_at_least_one_skill
FROM fct_skill_endorsements e
WHERE e.endorsement_date BETWEEN '2024-07-01' AND '2024-07-31';
```

# Pregunta 2

### ¿Cuál es el promedio de validaciones recibidas por usuario para habilidades categorizadas como 'TECHNICAL' durante agosto de 2024?

In [14]:
df_merge = df_endorsement.merge(df_skills, on='skill_id')

df_aug_tech = df_merge[
    (df_merge['endorsement_date'].between('2024-08-01','2024-08-31')) &
    (df_merge['skill_category'] == 'TECHNICAL')
]

resultado2 = df_aug_tech.groupby('user_id').size().mean()

resultado2

np.float64(1.2)

```SQL
SELECT
    AVG(conteo_por_usuario) AS avg_endorsements_techincal
FROM
(
    SELECT
        e.user_id,
        COUNT(e.endorsement_id) * 1.0 AS conteo_por_usuario
    FROM fct_skill_endorsements e
    JOIN dim_skills s ON e.skill_id = s.skill_id
    WHERE e.endorsement_date BETWEEN '2024-08-01' AND '2024-08-31'
    AND s.skill_category = 'TECHNICAL'
    GROUP BY e.user_id) subsonculta;
```

# Pregunta 3

### Para la categoría de habilidades de 'MANAGEMENT' (Gestión), ¿qué porcentaje de los usuarios que han recibido alguna vez una validación en esa categoría, recibieron al menos una validación en septiembre de 2024?

In [16]:
df_mgmt = df_merge[df_merge['skill_category'] == 'MANAGEMENT']

denominador = df_mgmt['user_id'].nunique()

numerador = df_mgmt[
    df_mgmt['endorsement_date'].between('2024-09-01','2024-09-30')
]['user_id'].nunique()

repuesta = (numerador / denominador) * 100

repuesta

80.0

```SQL
SELECT
    COUNT(DISTINCT CASE
        WHEN e.endorsement_date BETWEEN '2024-09-01' AND '2024-09-30'
        THEN e.user_id
    END)* 100.0 /
    COUNT(DISTINCT e.user_id) AS percentage_management_active
FROM fct_skill_endorsements e
JOIN dim_skills s ON e.skill_id = s.skill_id
WHERE s.skill_category = 'MANAGEMENT';
```