In [38]:
from sklearn.model_selection import train_test_split
%load_ext autoreload
%autoreload 2
import sys
sys.path.insert(0, '../src')

from pathlib import Path
import pandas as pd
from zipfile import ZipFile
import numpy as np
import matplotlib.pyplot as plt
import copy


PNS_DATA = Path('../data/processed/PNS_2019/all.zip').resolve()
zip_pns = ZipFile(PNS_DATA)
df = pd.read_csv(zip_pns.open(zip_pns.namelist()[0]), sep=",", low_memory=False)

PATH_GEN_DATA = Path('generated_data/pns_data_models').resolve()

def process_df(df_i, remove_na=True):
    df_i = copy.deepcopy(df_i)
    if remove_na:
        df_i = df_i.dropna()
    return df_i.drop(['r_norte', 'r_nordeste', 'r_sudeste', 'r_sul', 'r_centro_oeste'], axis=1)



# Target
df['depression'] = np.where(df['phq9_total'].isna(), np.nan, np.where(df['phq9_total'] >= 10, 1, 0))
df = df.dropna(subset=['depression'])
df['depression'].value_counts()



# Alimentos in natura ou minimamente processado
innatura = ['P00601','P00602', 'P00603', 'P00604', 'P00605', 'P00607', 'P00608', 'P00609', 'P00610', 'P00611', 'P00612', 'P00613']
df_innatura = df[innatura]
df_innatura['qst_innatura'] = (df_innatura == 1.0).sum(axis=1)
# Alimentos ultraprocessados
ultraprocessados = ['P00614', 'P00615', 'P00616', 'P00617', 'P00618', 'P00619', 'P00620', 'P00621', 'P00622', 'P00623']
df_upf = df[ultraprocessados]
df_upf['qst_upf'] = (df_upf == 1.0).sum(axis=1)

# A literatura não usa a porcentagem de ultraprocessados do prato como análise. O artigo 'Food consumption and depression among Brazilian adults: results from the Brazilian National Health Survey, 2013'
# Usa como base o número de perguntas respondidas 'Sim' nos alimentos ultraprocessados das ultimas 24hrs
df['per_innatura'] = df_innatura['qst_innatura'] / (df_innatura['qst_innatura'] + df_upf['qst_upf']) * 100
df['per_upf'] = df_upf['qst_upf'] / (df_innatura['qst_innatura'] + df_upf['qst_upf']) * 100
df['qst_upf'] = df_upf['qst_upf'] # Baseado no artigo

def classify_upf(row):
    value = row['qst_upf']

    if  0 <= value < 2:
        return 1
    elif value < 4:
        return 2
    elif value < 6:
        return 3
    elif value < 8:
        return 4
    elif value <= 10:
        return 5

    return np.nan # questionario não respondido

df['upf_severity'] = df.apply(classify_upf, axis=1)


def count_imc(row):
    height = row['altura']
    weight = row['peso']

    imc = weight/np.square(height/100)   # Peso está em kg e altura em cm, no calculo do imc é kg/m^2
    return imc

df['imc'] = df.apply(count_imc, axis=1)


def classify_imc(row):
    value = row['imc']

    if  0 <= value < 18.5:
        return 1  # Baixo peso
    elif value < 24.9:
        return 2 # Eutrofia/peso adequado
    elif value < 29.9:
        return 3   # Sobrepeso
    elif value >= 30:
        return 4  # Obesidade

    return np.nan # questionario não respondido

df['class_imc'] = df.apply(classify_imc, axis=1)


def classify_idade(row):
    value = row['idade']
    if  18 <= value <= 29:
        return 1
    elif value <= 39:
        return 2
    elif value <= 49:
        return 3
    elif value <= 59:
        return 4

    return np.nan

df['class_idade'] = df.apply(classify_idade, axis=1)


# Essa variável foi criada a partir de um índice utilizado pela OMS. Na literatura, usam a pergunta 'P034' relacionada à prática de exercícios físicos nos ultimos 3 meses
df_activity = pd.DataFrame()
df_activity['minutes_per_week'] = df['P03702'] * df['P035']

vigorous_activities = [3.0, 4.0, 5.0, 6.0, 12.0, 13.0, 15.0]
moderated_activities = [1.0, 2.0, 7.0, 8.0, 9.0, 10.0, 11.0, 14.0, 16.0, 17.0]
df_activity['time_vigorous'] = df_activity['minutes_per_week'].loc[df['P036'].isin(vigorous_activities)]
df_activity['time_moderated'] = df_activity['minutes_per_week'].loc[df['P036'].isin(moderated_activities)]

df_activity['activity_class'] = 0
df_activity.loc[df_activity['minutes_per_week'].isna(), 'activity_class'] = np.nan
df_activity.loc[(df_activity['time_vigorous'] >= 75) | (df_activity['time_moderated'] >= 150), 'activity_class'] = 1

df['activity_class'] = df_activity['activity_class']
df['exerc_fisico'] = np.where(df['P034'].isin([1.0]), 1, np.where(df['P034'] == 2.0, 0, np.nan))
df['tabagismo'] = np.where(df['P050'].isin([1.0, 2.0]), 1, np.where(df['P050'] == 3.0, 0, np.nan))

# Na literatura eles usam outra variável para analisar o consumo alcoolico
# df['cons_alcool'] = np.where(df['P02801'] >= 1, 1, 0)
# df['cons_alcool'] = np.where(df['P02801'].isna(), np.nan, df['cons_alcool'])

# Olhar artigo 'Food consumption and depression among Brazilian adults: results from the Brazilian National Health Survey, 2013'
df['cons_alcool'] = np.where(df['P027'].isin([2.0, 3.0]), 1, np.where(df['P027'] == 1.0, 0, np.nan))

df = df[df['P005'] != 1.0] #Retirando as grávidas da análise

df['obesidade'] = np.where(df['class_imc'].isna(), np.nan, np.where(df['class_imc'] >= 4, 1, 0))

# Limitação da análise por idades que se adequam ao PHQ9 baseado no artigo 'Food consumption and depression among Brazilian adults: results from the Brazilian National Health Survey, 2013'
df = df[(df['idade'] >= 18) & (df['idade'] <= 59)]


regiao_mapping = {
    'r_norte': [11, 12, 13, 14, 15, 16, 17],
    'r_nordeste': [21, 22, 23, 24, 25, 26, 27, 28, 29],
    'r_sudeste': [31, 32, 33, 35],
    'r_sul': [41, 42, 43],
    'r_centro_oeste': [50, 51, 52, 53]
}

for regiao, estados in regiao_mapping.items():
    df[regiao] = df['uf'].apply(lambda x: int(x in estados))


df['class_renda'] = np.where(df['renda_pc'].isin([4.0, 5.0, 6.0, 7.0]), 1, np.where(df['renda_pc'].isin([1.0, 2.0, 3.0]), 0, np.nan))





df_total = pd.DataFrame()

df_total['exerc_fisico'] = df['exerc_fisico']
df_total['tabagismo'] = df['tabagismo']
df_total['cons_alcool'] = df['cons_alcool']
df_total['cancer'] = df['Q120']
df_total['hipertensao'] = df['Q00201']
df_total['diabetes'] = df['Q03001']
df_total['cardiovascular'] = df['Q06306']
df_total['hipercolesterolemia'] = df['Q060']
df_total['avc'] = df['Q068']
df_total['artrite'] = df['Q079']
df_total['obesidade'] = df['obesidade']
df_total['depression'] = df['depression']
df_total['sexo'] = df['sexo']
df_total['estado_civil'] = df['vive_conjugue']
df_total = df_total.replace(2.0, 0.0)
df_total['class_renda'] = df['class_renda']

df_total['r_norte'] = df['r_norte']
df_total['r_nordeste'] = df['r_nordeste']
df_total['r_sudeste'] = df['r_sudeste']
df_total['r_sul'] = df['r_sul']
df_total['r_centro_oeste'] = df['r_centro_oeste']

df_total['escolaridade'] = df['escolaridade_agregada']
df_total['upf'] = df['qst_upf']
df_total['class_idade'] = df['class_idade']

df_total['perc_saude'] = df['N00101'] # Variável não utilizada na literatura, meio duvidosa

df_train_total, df_test_total = train_test_split(df_total, test_size=0.3, random_state=42)

df_total_t = copy.deepcopy(df_total)

print("Número de dados totais: ", len(df_total_t))
print("Número de dados filtrados: ", len(df_total_t.dropna()))
print("Porcentagem de dados faltantes: {:.2f}".format(100*(1 - (len(df_total_t.dropna()) / len(df_total_t)))))

print()

df_total_t = copy.deepcopy(df_train_total)
print("Número de dados totais (treino): ", len(df_total_t))
print("Número de dados filtrados (treino): ", len(df_total_t.dropna()))
print("Porcentagem de dados faltantes (treino): {:.2f}".format(100*(1 - (len(df_total_t.dropna()) / len(df_total_t)))))

print()

df_total_t = copy.deepcopy(df_test_total)
print("Número de dados totais (teste): ", len(df_total_t))
print("Número de dados filtrados (teste): ", len(df_total_t.dropna()))
print("Porcentagem de dados faltantes (teste): {:.2f}".format(100*(1 - (len(df_total_t.dropna()) / len(df_total_t)))))

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_innatura['qst_innatura'] = (df_innatura == 1.0).sum(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_upf['qst_upf'] = (df_upf == 1.0).sum(axis=1)


Número de dados totais:  64664
Número de dados filtrados:  49843
Porcentagem de dados faltantes: 22.92

Número de dados totais (treino):  45264
Número de dados filtrados (treino):  34932
Porcentagem de dados faltantes (treino): 22.83

Número de dados totais (teste):  19400
Número de dados filtrados (teste):  14911
Porcentagem de dados faltantes (teste): 23.14


In [8]:
df_total['sexo'].value_counts()

sexo
0.0    33439
1.0    31225
Name: count, dtype: int64

In [25]:
selected = pd.DataFrame()
selected['sexo'] = df_total['sexo']
selected['depression'] = df['depression']
selected = selected.dropna()
contingency_table = pd.crosstab(selected['sexo'], selected['depression'])
chi2, p_value, _, _ = chi2_contingency(contingency_table, correction=False)
contingency_table = round(pd.crosstab(selected['sexo'], selected['depression'], normalize='columns') * 100, 3)
contingency_table['n'] = selected.groupby('sexo')['depression'].count()
contingency_table['p_value'] = p_value


# Print the contingency table
display(contingency_table)

depression,0.0,1.0,n,p_value
sexo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,49.083,73.905,33439,0.0
1.0,50.917,26.095,31225,0.0


In [5]:
from scipy.stats import chi2_contingency
from IPython.display import display_markdown

In [42]:
def gen_chi2_statistics(df_i, columns_dict, key_dep_d='depression'):
    selected_g = copy.deepcopy(df_i)
    data = []

    for c in columns_dict:

        # display(Markdown("### {}".format(c['name'])))

        display_markdown('''### {}'''.format(c['name']), raw=True)

        selected = copy.deepcopy(selected_g)

        key_dep = key_dep_d
        if 'dep' in c:
            key_dep = c['dep']

        selected = selected.dropna(subset=[c['key'], key_dep])
        contingency_table = pd.crosstab(selected[c['key']], selected[key_dep])
        chi2, p_value, dof, _ = chi2_contingency(contingency_table, correction=False)
        c1 = selected[c['key']]
        c2 = selected[key_dep]
        selected_min = pd.DataFrame({c['key']: c1, key_dep: c2})
        # Para printar
        contingency_table = pd.crosstab(selected_min[c['key']], selected_min[key_dep], normalize=False)
        #contingency_table_n = pd.crosstab(selected[c['key']], selected[key_dep], normalize='index')
        contingency_table_c = round(pd.crosstab(selected_min[c['key']], selected_min[key_dep], normalize='columns') * 100, 3)
        contingency_table_c['n'] = selected_min.groupby(c['key'])[key_dep].count()
        contingency_table_c['p_value'] = round(p_value, 4)
        display(contingency_table)
        #print("Normalizado pelas linhas: ")
        #display(contingency_table_n * 100)

        print()
        #print("Normalizado pelas colunas: ")
        display(contingency_table_c)

        #print("chi2: {}, p_value: {}, dof: {}".format(chi2, p_value, dof))


        print("-------------------------------------------------------------")
        print()

        data.append([c['key'], key_dep, c['name'], chi2, p_value, dof])

    return pd.DataFrame(data, columns=['variavel1', 'variavel2', 'titulo', 'chi2', 'p_value', 'dof'])


selected_columns = [ {'key': c, 'name': c} for c in df_total.columns]
df_chi = gen_chi2_statistics(df_total, selected_columns)
df_chi.to_excel(PATH_GEN_DATA / 'chi2_variables.xlsx')

### exerc_fisico

depression,0.0,1.0
exerc_fisico,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,32263,4451
1.0,25553,2397





depression,0.0,1.0,n,p_value
exerc_fisico,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,55.803,64.997,36714,0.0
1.0,44.197,35.003,27950,0.0


-------------------------------------------------------------



### tabagismo

depression,0.0,1.0
tabagismo,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,50525,5644
1.0,7291,1204





depression,0.0,1.0,n,p_value
tabagismo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,87.389,82.418,56169,0.0
1.0,12.611,17.582,8495,0.0


-------------------------------------------------------------



### cons_alcool

depression,0.0,1.0
cons_alcool,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,31537,4104
1.0,26279,2744





depression,0.0,1.0,n,p_value
cons_alcool,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,54.547,59.93,35641,0.0
1.0,45.453,40.07,29023,0.0


-------------------------------------------------------------



### cancer

depression,0.0,1.0
cancer,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,57134,6648
1.0,682,200





depression,0.0,1.0,n,p_value
cancer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,98.82,97.079,63782,0.0
1.0,1.18,2.921,882,0.0


-------------------------------------------------------------



### hipertensao

depression,0.0,1.0
hipertensao,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,47122,4821
1.0,9270,1956





depression,0.0,1.0,n,p_value
hipertensao,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,83.561,71.138,51943,0.0
1.0,16.439,28.862,11226,0.0


-------------------------------------------------------------



### diabetes

depression,0.0,1.0
diabetes,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,50366,5889
1.0,2393,606





depression,0.0,1.0,n,p_value
diabetes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,95.464,90.67,56255,0.0
1.0,4.536,9.33,2999,0.0


-------------------------------------------------------------



### cardiovascular

depression,0.0,1.0
cardiovascular,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,56385,6303
1.0,1431,545





depression,0.0,1.0,n,p_value
cardiovascular,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,97.525,92.041,62688,0.0
1.0,2.475,7.959,1976,0.0


-------------------------------------------------------------



### hipercolesterolemia

depression,0.0,1.0
hipercolesterolemia,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,45843,5003
1.0,6047,1402





depression,0.0,1.0,n,p_value
hipercolesterolemia,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,88.347,78.111,50846,0.0
1.0,11.653,21.889,7449,0.0


-------------------------------------------------------------



### avc

depression,0.0,1.0
avc,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,57341,6627
1.0,475,221





depression,0.0,1.0,n,p_value
avc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,99.178,96.773,63968,0.0
1.0,0.822,3.227,696,0.0


-------------------------------------------------------------



### artrite

depression,0.0,1.0
artrite,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,55536,5977
1.0,2280,871





depression,0.0,1.0,n,p_value
artrite,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,96.056,87.281,61513,0.0
1.0,3.944,12.719,3151,0.0


-------------------------------------------------------------



### obesidade

depression,0.0,1.0
obesidade,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,45888,4970
1.0,11640,1821





depression,0.0,1.0,n,p_value
obesidade,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,79.766,73.185,50858,0.0
1.0,20.234,26.815,13461,0.0


-------------------------------------------------------------



### depression

depression,0.0,1.0
depression,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,57816,0
1.0,0,6848





depression,0.0,1.0,n,p_value
depression,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,100.0,0.0,57816,0.0
1.0,0.0,100.0,6848,0.0


-------------------------------------------------------------



### sexo

depression,0.0,1.0
sexo,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,28378,5061
1.0,29438,1787





depression,0.0,1.0,n,p_value
sexo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,49.083,73.905,33439,0.0
1.0,50.917,26.095,31225,0.0


-------------------------------------------------------------



### estado_civil

depression,0.0,1.0
estado_civil,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,22556,3383
1.0,35260,3465





depression,0.0,1.0,n,p_value
estado_civil,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,39.013,49.401,25939,0.0
1.0,60.987,50.599,38725,0.0


-------------------------------------------------------------



### class_renda

depression,0.0,1.0
class_renda,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,32900,4412
1.0,24898,2436





depression,0.0,1.0,n,p_value
class_renda,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,56.922,64.428,37312,0.0
1.0,43.078,35.572,27334,0.0


-------------------------------------------------------------



### r_norte

depression,0.0,1.0
r_norte,Unnamed: 1_level_1,Unnamed: 2_level_1
0,45808,5652
1,12008,1196





depression,0.0,1.0,n,p_value
r_norte,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,79.231,82.535,51460,0.0
1,20.769,17.465,13204,0.0


-------------------------------------------------------------



### r_nordeste

depression,0.0,1.0
r_nordeste,Unnamed: 1_level_1,Unnamed: 2_level_1
0,37739,4355
1,20077,2493





depression,0.0,1.0,n,p_value
r_nordeste,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,65.274,63.595,42094,0.0058
1,34.726,36.405,22570,0.0058


-------------------------------------------------------------



### r_sudeste

depression,0.0,1.0
r_sudeste,Unnamed: 1_level_1,Unnamed: 2_level_1
0,45973,5308
1,11843,1540





depression,0.0,1.0,n,p_value
r_sudeste,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,79.516,77.512,51281,0.0001
1,20.484,22.488,13383,0.0001


-------------------------------------------------------------



### r_sul

depression,0.0,1.0
r_sul,Unnamed: 1_level_1,Unnamed: 2_level_1
0,50750,6074
1,7066,774





depression,0.0,1.0,n,p_value
r_sul,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,87.778,88.697,56824,0.0276
1,12.222,11.303,7840,0.0276


-------------------------------------------------------------



### r_centro_oeste

depression,0.0,1.0
r_centro_oeste,Unnamed: 1_level_1,Unnamed: 2_level_1
0,50994,6003
1,6822,845





depression,0.0,1.0,n,p_value
r_centro_oeste,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,88.2,87.661,56997,0.1913
1,11.8,12.339,7667,0.1913


-------------------------------------------------------------



### escolaridade

depression,0.0,1.0
escolaridade,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,18260,2392
2.0,20809,2278
3.0,8755,1002
4.0,2815,309





depression,0.0,1.0,n,p_value
escolaridade,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1.0,36.059,39.993,20652,0.0
2.0,41.093,38.087,23087,0.0
3.0,17.289,16.753,9757,0.0
4.0,5.559,5.166,3124,0.0


-------------------------------------------------------------



### upf

depression,0.0,1.0
upf,Unnamed: 1_level_1,Unnamed: 2_level_1
0,9318,1038
1,11888,1377
2,11988,1382
3,9873,1183
4,6876,824
5,3936,499
6,2019,262
7,1034,148
8,463,77
9,251,40





depression,0.0,1.0,n,p_value
upf,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,16.117,15.158,10356,0.0056
1,20.562,20.108,13265,0.0056
2,20.735,20.181,13370,0.0056
3,17.077,17.275,11056,0.0056
4,11.893,12.033,7700,0.0056
5,6.808,7.287,4435,0.0056
6,3.492,3.826,2281,0.0056
7,1.788,2.161,1182,0.0056
8,0.801,1.124,540,0.0056
9,0.434,0.584,291,0.0056


-------------------------------------------------------------



### class_idade

depression,0.0,1.0
class_idade,Unnamed: 1_level_1,Unnamed: 2_level_1
1,13464,1435
2,16063,1692
3,14600,1853
4,13689,1868





depression,0.0,1.0,n,p_value
class_idade,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,23.288,20.955,14899,0.0
2,27.783,24.708,17755,0.0
3,25.253,27.059,16453,0.0
4,23.677,27.278,15557,0.0


-------------------------------------------------------------



### perc_saude

depression,0.0,1.0
perc_saude,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,10622,361
2.0,36313,2473
3.0,9819,2756
4.0,923,992
5.0,139,266





depression,0.0,1.0,n,p_value
perc_saude,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1.0,18.372,5.272,10983,0.0
2.0,62.808,36.113,38786,0.0
3.0,16.983,40.245,12575,0.0
4.0,1.596,14.486,1915,0.0
5.0,0.24,3.884,405,0.0


-------------------------------------------------------------



In [7]:
df_chi

Unnamed: 0,variavel1,variavel2,titulo,chi2,p_value,dof
0,exerc_fisico,depression,exerc_fisico,210.90533,8.716665e-48,1
1,tabagismo,depression,tabagismo,132.591516,1.110669e-30,1
2,cons_alcool,depression,cons_alcool,71.711059,2.4913230000000002e-17,1
3,cancer,depression,cancer,137.937857,7.519344e-32,1
4,hipertensao,depression,hipertensao,639.024747,5.445572e-141,1
5,diabetes,depression,diabetes,276.661533,4.009787e-62,1
6,cardiovascular,depression,cardiovascular,621.453083,3.611749e-137,1
7,hipercolesterolemia,depression,hipercolesterolemia,535.932766,1.4462950000000002e-118,1
8,avc,depression,avc,332.787048,2.375594e-74,1
9,artrite,depression,artrite,1017.190964,3.293074e-223,1
