# Electroninc Arts - Star Wars Game Player Storyline Engagement Analysis

```SQL
CREATE TABLE dim_storyline_components (
    storyline_component_id INTEGER,
    component_name VARCHAR
);

CREATE TABLE fct_storyline_interactions (
    interaction_id INTEGER,
    player_id INTEGER,
    storyline_component_id INTEGER,
    interaction_date DATE
);

INSERT INTO dim_storyline_components (storyline_component_id, component_name)
VALUES
    (1, 'Light Side Redemption'),
    (2, 'Dark Side Temptation'),
    (3, 'Force Awakening'),
    (4, 'Sith Prophecy'),
    (5, 'Rebel Alliance'),
    (6, 'Jedi Resurrection'),
    (7, 'Bounty Hunter Intrigue'),
    (8, 'Galactic Conspiracy'),
    (9, 'Droid Uprising'),
    (10, 'Empire Rebellion');

INSERT INTO fct_storyline_interactions (interaction_id, player_id, storyline_component_id, interaction_date)
VALUES
    (1, 1, 1, '2024-05-03'),
    (2, 1, 2, '2024-05-10'),
    (3, 2, 2, '2024-05-05'),
    (4, 2, 3, '2024-05-12'),
    (5, 3, 3, '2024-05-15'),
    (6, 3, 3, '2024-05-20'),
    (7, 3, 1, '2024-05-25'),
    (8, 4, 4, '2024-05-07'),
    (9, 4, 4, '2024-05-28'),
    (10, 5, 5, '2024-05-08'),
    (11, 5, 2, '2024-05-18'),
    (12, 6, 5, '2024-05-22'),
    (13, 1, 1, '2024-05-02'),
    (14, 2, 3, '2024-05-30'),
    (15, 6, 1, '2024-05-30'),
    (16, 7, 6, '2024-05-11'),
    (17, 7, 6, '2024-05-19'),
    (18, 8, 7, '2024-05-14'),
    (19, 8, 8, '2024-05-21'),
    (20, 9, 9, '2024-05-17');

SELECT * FROM dim_storyline_components;

SELECT * FROM fct_storyline_interactions;
```

In [1]:
import pandas as pd
import numpy as np

In [7]:
df_components = pd.read_csv('Data/031/dim_storyline_components.csv')
df_interaction = pd.read_csv('Data/031/fct_toryline_interactions.csv', parse_dates=['interaction_date'])

df_components.head()

Unnamed: 0,storyline_component_id,component_name
0,1,Light Side Redemption
1,2,Dark Side Temptation
2,3,Force Awakening
3,4,Sith Prophecy
4,5,Rebel Alliance


In [8]:
df_interaction.head()

Unnamed: 0,interaction_id,player_id,storyline_component_id,interaction_date
0,1,1,1,2024-05-03
1,2,1,2,2024-05-10
2,3,2,2,2024-05-05
3,4,2,3,2024-05-12
4,5,3,3,2024-05-15


# Pregunta 1

### Para cada componente de la historia, ¿cuántos jugadores únicos interactuaron con dicho componente durante todo el mes de mayo de 2024? Si un componente de la historia no tuvo interacciones, devuelva el nombre del componente con un conteo de jugadores de 0.

In [10]:
df_mayo = df_interaction[
    (df_interaction['interaction_date'].between('2024-05-01','2024-05-31'))
]

df_res = df_mayo.merge(df_components, on='storyline_component_id', how='left')

respuesta = df_res.groupby('component_name')['player_id'].nunique().reset_index(name='count_player')

respuesta

Unnamed: 0,component_name,count_player
0,Bounty Hunter Intrigue,1
1,Dark Side Temptation,3
2,Droid Uprising,1
3,Force Awakening,2
4,Galactic Conspiracy,1
5,Jedi Resurrection,1
6,Light Side Redemption,3
7,Rebel Alliance,2
8,Sith Prophecy,1


```SQL
SELECT
    c.component_name,
    COUNT(DISTINCT i.player_id) AS count_player
FROM dim_storyline_components c
LEFT JOIN fct_storyline_interactions i ON c.storyline_component_id = i.storyline_component_id
AND i.interaction_date BETWEEN '2024-05-01' AND '2024-05-31'
GROUP BY c.component_name;
```

# Pregunta 2

### ¿Cuál es el número total de interacciones de historia para cada combinación de componente y jugador durante mayo de 2024? Considere únicamente a aquellos jugadores que hayan interactuado con al menos dos componentes de historia diferentes.

In [11]:
df_mayo = df_interaction[
    df_interaction['interaction_date'].between('2024-05-01','2024-05-31')
]

valid_players = df_mayo.groupby('player_id')['storyline_component_id'].nunique()
valid_players = valid_players[valid_players >= 2].index

df_filtered = df_mayo[df_mayo['player_id'].isin(valid_players)]

resutlado = df_filtered.groupby(['player_id','storyline_component_id']).size().reset_index(name='total_interactions')

resutlado


Unnamed: 0,player_id,storyline_component_id,total_interactions
0,1,1,2
1,1,2,1
2,2,2,1
3,2,3,2
4,3,1,1
5,3,3,2
6,5,2,1
7,5,5,1
8,6,1,1
9,6,5,1


```SQL
WITH players_at_least_2 AS (
    SELECT player_id
    FROM fct_storyline_interactions
    WHERE interaction_date BETWEEN '2024-05-01' AND '2024-05-31'
    GROUP BY player_id
    HAVING COUNT(DISTINCT storyline_component_id) >= 2
)
SELECT 
    player_id, 
    storyline_component_id, 
    COUNT(*) AS total_interactions
FROM fct_storyline_interactions
WHERE player_id IN (SELECT player_id FROM players_at_least_2)
  AND interaction_date BETWEEN '2024-05-01' AND '2024-05-31'
GROUP BY player_id, storyline_component_id;
```

# Pregunta 3

### ¿Puedes clasificar (rankear) los componentes de la historia según el número promedio de interacciones por jugador durante mayo de 2024? Proporcione una lista de los nombres de los componentes de la historia y su clasificación.

```SQL
SELECT
    c.component_name,
    CAST(COUNT(i.interaction_id) AS DECIMAL) / COUNT(DISTINCT i.player_id) AS avg_interactions_per_player,
    RANK() OVER(ORDER BY CAST(COUNT(i.interaction_id) AS DECIMAL) / COUNT(DISTINCT i.player_id) DESC) AS storyline_rank
FROM dim_storyline_components c
JOIN fct_storyline_interactions i ON c.storyline_component_id = i.storyline_component_id
WHERE i.interaction_date BETWEEN '2024-05-01' AND '2024-05-31'
GROUP BY c.component_name
```