# Exemplos NPS e Bolsa de valores

## Módulos

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
from scipy.stats import f_oneway

## Dados

In [5]:
dados_nps = pd.read_csv('nps_example.csv', sep = ';')

In [8]:
dados_bolsa = pd.read_csv('dados_bolsa.csv', sep = ';', decimal = ',')

## Análise NPS

- Verificar se todas as respostas estão completas

In [13]:
dados_nps.groupby('response_status') \
         .size() \
         .to_frame('n') \
         .reset_index()

Unnamed: 0,response_status,n
0,Complete,2281
1,Incomplete,265
2,Terminated,33


In [14]:
dados_nps[dados_nps['nps_score'].isnull()]

Unnamed: 0,id,response_status,how_long_listening,age,nps_score,gender
17,11706467,Incomplete,Less than 6 months,18-24,,
31,11706938,Incomplete,1 year to less than 3 years,25-34,,
32,11706979,Incomplete,6 months to less than a year,25-34,,
43,11707426,Incomplete,6 months to less than a year,25-34,,
48,11707719,Incomplete,3 years to less than 5 years,35-44,,
...,...,...,...,...,...,...
2546,13093216,Incomplete,6 months to less than a year,35-44,,
2556,13278063,Incomplete,3 years to less than 5 years,18-24,,
2570,13565327,Complete,1 year to less than 3 years,45-54,,Female
2572,13601847,Incomplete,3 years to less than 5 years,25-34,,


- Filtrar somente respostas completas

In [19]:
dados_nps_filtrados = dados_nps[(dados_nps['response_status'] == 'Complete') & \
                                (dados_nps['nps_score'].notna())]

- Análise descritiva

In [21]:
dados_nps_filtrados.groupby('age') \
                   .agg(media_nps = pd.NamedAgg('nps_score', 'mean'),
                        dp_nps = pd.NamedAgg('nps_score', 'std'),
                        n = pd.NamedAgg('nps_score', 'size')) \
                   .reset_index()

Unnamed: 0,age,media_nps,dp_nps,n
0,18-24,9.464539,1.116275,282
1,25-34,9.694828,0.957639,580
2,35-44,9.707612,0.979501,578
3,45-54,9.719039,0.928254,541
4,55-64,9.733871,0.92302,248
5,65-74,9.423077,1.36156,26
6,75+,8.0,0.0,2


- Filtrar os 75+, pq somente possui 2 linhas

In [23]:
dados_nps_filtrados_aj = dados_nps_filtrados[dados_nps_filtrados['age'] != '75+']

In [24]:
dados_nps_filtrados_aj.groupby('age') \
                      .agg(media_nps = pd.NamedAgg('nps_score', 'mean'),
                           dp_nps = pd.NamedAgg('nps_score', 'std'),
                           n = pd.NamedAgg('nps_score', 'size')) \
                      .reset_index()

Unnamed: 0,age,media_nps,dp_nps,n
0,18-24,9.464539,1.116275,282
1,25-34,9.694828,0.957639,580
2,35-44,9.707612,0.979501,578
3,45-54,9.719039,0.928254,541
4,55-64,9.733871,0.92302,248
5,65-74,9.423077,1.36156,26


- Teste f

In [27]:
dados_18_24 = dados_nps_filtrados_aj[dados_nps_filtrados_aj['age'] == '18-24']['nps_score']
dados_25_34 = dados_nps_filtrados_aj[dados_nps_filtrados_aj['age'] == '25-34']['nps_score']
dados_35_44 = dados_nps_filtrados_aj[dados_nps_filtrados_aj['age'] == '35-44']['nps_score']
dados_45_54 = dados_nps_filtrados_aj[dados_nps_filtrados_aj['age'] == '45-54']['nps_score']
dados_55_64 = dados_nps_filtrados_aj[dados_nps_filtrados_aj['age'] == '55-64']['nps_score']
dados_65_74 = dados_nps_filtrados_aj[dados_nps_filtrados_aj['age'] == '65-74']['nps_score']

In [28]:
f_oneway(dados_18_24, 
         dados_25_34, 
         dados_35_44, 
         dados_45_54,
         dados_55_64,
         dados_65_74)

F_onewayResult(statistic=3.5221660981040768, pvalue=0.0035606861304276695)

## Análise correlação bolsa

- Filtrar missings

In [31]:
dados_bolsa[dados_bolsa.isnull()]

Unnamed: 0,data,petr4,bbdc3,vale5,ambv4,itub4
0,,,,,,
1,,,,,,
2,,,,,,
3,,,,,,
4,,,,,,
...,...,...,...,...,...,...
1773,,,,,,
1774,,,,,,
1775,,,,,,
1776,,,,,,


In [32]:
dados_bolsa_filtrados = dados_bolsa.dropna()

- Correlação

In [33]:
dados_bolsa_filtrados.corr()

Unnamed: 0,petr4,bbdc3,vale5,ambv4,itub4
petr4,1.0,0.539247,0.724023,0.392074,0.593834
bbdc3,0.539247,1.0,0.592143,0.470529,0.778506
vale5,0.724023,0.592143,1.0,0.482919,0.642838
ambv4,0.392074,0.470529,0.482919,1.0,0.488886
itub4,0.593834,0.778506,0.642838,0.488886,1.0


In [35]:
corr = dados_bolsa_filtrados.corr()

In [36]:
corr.style.background_gradient(cmap='coolwarm').set_precision(2)

Unnamed: 0,petr4,bbdc3,vale5,ambv4,itub4
petr4,1.0,0.54,0.72,0.39,0.59
bbdc3,0.54,1.0,0.59,0.47,0.78
vale5,0.72,0.59,1.0,0.48,0.64
ambv4,0.39,0.47,0.48,1.0,0.49
itub4,0.59,0.78,0.64,0.49,1.0
