In [23]:
import altair as alt
import pandas as pd
import seaborn as sns

In [29]:
df = pd.read_csv(r'depress.txt', sep='\t') #Lendo os dados

In [30]:
df.head(5)

Unnamed: 0,id,sex,age,marital,educat,employ,income,relig,c1,c2,...,c20,cesd,cases,drink,health,regdoc,treat,beddays,acuteill,chronill
0,1,1,68,Widowed,Some HS,Retired,4,1.0,0,0,...,0,0,0,0,2,1,1,0,0,1
1,2,0,58,Divorced,Some college,FT,15,1.0,0,0,...,0,4,0,1,1,1,1,0,0,1
2,3,1,45,Married,HS Grad,FT,28,1.0,0,0,...,0,4,0,1,2,1,1,0,0,0
3,4,1,50,Divorced,HS Grad,Unemp,9,1.0,0,0,...,0,5,0,0,1,1,0,0,0,1
4,5,1,33,Separated,HS Grad,FT,35,1.0,0,0,...,0,6,0,1,1,1,1,1,1,0


In [4]:
df.describe() #Realizando uma análise descritiva

Unnamed: 0,id,sex,age,income,relig,c1,c2,c3,c4,c5,...,c20,cesd,cases,drink,health,regdoc,treat,beddays,acuteill,chronill
count,294.0,294.0,294.0,294.0,292.0,294.0,294.0,294.0,294.0,294.0,...,294.0,294.0,294.0,294.0,294.0,294.0,294.0,294.0,294.0,294.0
mean,147.5,0.622449,44.414966,20.57483,1.955479,0.363946,0.568027,0.544218,0.193878,0.55102,...,0.248299,8.884354,0.170068,0.795918,1.772109,0.812925,0.503401,0.214286,0.295918,0.506803
std,85.014705,0.485601,18.085441,15.290124,1.184698,0.757348,0.809794,0.891609,0.589872,0.81942,...,0.586754,8.823655,0.376333,0.403716,0.837947,0.390636,0.500841,0.411026,0.457233,0.500806
min,1.0,0.0,18.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
25%,74.25,0.0,28.0,9.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,3.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0
50%,147.5,1.0,42.5,15.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,7.0,0.0,1.0,2.0,1.0,1.0,0.0,0.0,1.0
75%,220.75,1.0,59.0,28.0,3.0,0.0,1.0,1.0,0.0,1.0,...,0.0,12.0,0.0,1.0,2.0,1.0,1.0,0.0,1.0,1.0
max,294.0,1.0,89.0,65.0,4.0,3.0,3.0,3.0,3.0,3.0,...,3.0,47.0,1.0,1.0,4.0,1.0,1.0,1.0,1.0,1.0


In [69]:
#Fazendo ajustes nos dados para "traduzir" os valores numéricos de acordo com o dicionário

df['sex'].replace({0: "Masculino", 1: "Feminino"}, inplace=True)
df['cases'].replace({0: "Não", 1: "Sim"}, inplace=True)
df['relig'].replace({1: "Protestante", 2: "Católico", 3: "Judeu", 4: "Nenhuma", 5: "Outra"}, inplace=True)

In [71]:
#Gráfico de dispersão
#Eixo X = Renda
#Eixo Y = Idade
#Cor = Sexo
#Tamanho = Se é deprimido ou não

range_ = ['#C74CED', '#46ACF0']

alt.Chart(df).mark_point(filled=True, size=70).encode(
    x = alt.X('age', title='Idade'),
    y = alt.Y('income', title='Renda (em milhares de USD)'),
    size = alt.Size('cases:N', title='Deprimido', scale=alt.Scale(range=[100,400])),
    color = alt.Color('sex:N', title='Sexo', scale=alt.Scale(range=range_)),
    tooltip=['cases','income', 'age']
).properties(
    width=700,
    height=400)

#Interessante apontar que nas faixas mais ricas, não há deprimidos. Começa na faixa dos US$ 45 mil.

In [76]:
alt.Chart(df).mark_boxplot(size=50).encode(
    x=alt.X('income', title='Renda'),
    y=alt.Y('sex:N', title='Sexo',scale=alt.Scale(zero=False)),
    color = alt.Color('sex:N', title='Sexo', scale=alt.Scale(range=range_)),
).properties(
    width=400,
    height=250)

In [75]:
alt.Chart(df).mark_boxplot(size=50).encode(
    x=alt.X('age', title='Idade'),
    y=alt.Y('sex:N', title='Sexo',scale=alt.Scale(zero=False)),
    color = alt.Color('sex:N', title='Sexo', scale=alt.Scale(range=range_)),
).properties(
    width=400,
    height=250)

In [7]:
#Quantidade de pessoas deprimidas por sexo

alt.Chart(df).mark_bar().encode(
    x = alt.X('cases:N', title= 'Deprimido'),
    y = alt.Y('count(cases)', title='Número de casos'),
    color = alt.Color('cases:N', title='Deprimido'),
    column = alt.Column('sex:N', title='Sexo'),
    tooltip =['count(cases)','sex']
).properties(
    width = 60,
    height = 300)

In [8]:
#Quantidade de pessoas deprimidas por sexo - ROPORCIONAL

alt.Chart(df).mark_bar().encode(
    x=alt.X('count(cases)', stack="normalize", title='% de casos'),
    y=alt.Y('sex:N',title='Sexo'),
    color=alt.Color('cases:N',title='Deprimido'),
    tooltip=['count(cases)','sex']
).properties(
    width=400,
    height=100)

In [9]:
#Quantidade de pessoas deprimidas por nível de educação

alt.Chart(df).mark_bar().encode(
    x = alt.X('cases:N', title= 'Deprimido'),
    y = alt.Y('count(cases)', title='Número de casos'),
    color = alt.Color('cases:N', title='Deprimido'),
    column = alt.Column('educat:N', title='Nível de educação'),
    tooltip =['count(cases)','educat']
).properties(
    width = 60,
    height = 300)

In [10]:
#Quantidade de pessoas deprimidas por nível de educação - PROPORCIONAL

alt.Chart(df).mark_bar().encode(
    x=alt.X('count(cases)', stack="normalize", title='% de casos'),
    y=alt.Y('educat:N',title='Nível de educação'),
    color=alt.Color('cases:N',title='Deprimido'),
    tooltip=['count(cases)','educat']
).properties(
    width=400,
    height=250)

In [11]:
#Quantidade de pessoas deprimidas por ocupação (se trabalha período integral, parcial, desempregado)

alt.Chart(df).mark_bar().encode(
    x = alt.X('cases:N', title= 'Deprimido'),
    y = alt.Y('count(cases)', title='Número de casos'),
    color = alt.Color('cases:N', title='Deprimido'),
    column = alt.Column('employ:N', title='Ocupação'),
    tooltip =['count(cases)','employ']
).properties(
    width = 60,
    height = 300)

In [12]:
#Quantidade de pessoas deprimidas por ocupação (se trabalha período integral, parcial, desempregado) PROPORCIONAL

alt.Chart(df).mark_bar().encode(
    x=alt.X('count(cases)', stack="normalize", title='% de casos'),
    y=alt.Y('employ:N',title='Ocupação'),
    color=alt.Color('cases:N',title='Deprimido'),
    tooltip=['count(cases)','employ']
).properties(
    width=400,
    height=250)

In [13]:
#Quantidade de pessoas deprimidas por estado civil

alt.Chart(df).mark_bar().encode(
    x = alt.X('cases:N', title= 'Deprimido'),
    y = alt.Y('count(cases)', title='Número de casos'),
    color = alt.Color('cases:N', title='Deprimido'),
    column = alt.Column('marital:N', title='Estado civil'),
    tooltip =['count(cases)','marital']
).properties(
    width = 60,
    height = 300)

In [14]:
#Quantidade de pessoas deprimidas por estado civil - PROPORCIONAL

alt.Chart(df).mark_bar().encode(
    x=alt.X('count(cases)', stack="normalize", title='% de casos'),
    y=alt.Y('marital:N',title='Estado civil'),
    color=alt.Color('cases:N',title='Deprimido'),
    tooltip=['count(cases)','marital']
).properties(
    width=400,
    height=250)

In [15]:
#Quantidade de pessoas deprimidas por religião

alt.Chart(df).mark_bar().encode(
    x = alt.X('cases:N', title= 'Deprimido'),
    y = alt.Y('count(cases)', title='Número de casos'),
    color = alt.Color('cases:N', title='Deprimido'),
    column = alt.Column('relig:N', title='Religião'),
    tooltip =['count(cases)','relig']
).properties(
    width = 60,
    height = 300)

In [16]:
#Quantidade de pessoas deprimidas por religião - PROPORCIONAL

alt.Chart(df).mark_bar().encode(
    x=alt.X('count(cases)', stack="normalize", title='% de casos'),
    y=alt.Y('relig:N',title='Religião'),
    color=alt.Color('cases:N',title='Deprimido'),
    tooltip=['count(cases)','relig']
).properties(
    width=400,
    height=250)