In [8]:
from sklearn.model_selection import train_test_split
%load_ext autoreload
%autoreload 2
import sys
sys.path.insert(0, '../src')

from pathlib import Path
import pandas as pd
from zipfile import ZipFile
import numpy as np
import matplotlib.pyplot as plt
import copy


PNS_DATA = Path('../data/processed/PNS_2019/all.zip').resolve()
zip_pns = ZipFile(PNS_DATA)
df = pd.read_csv(zip_pns.open(zip_pns.namelist()[0]), sep=",", low_memory=False)

PATH_GEN_DATA = Path('generated_data/pns_data_models').resolve()

def process_df(df_i, remove_na=True):
    df_i = copy.deepcopy(df_i)
    if remove_na:
        df_i = df_i.dropna()
    return df_i.drop(['r_norte', 'r_nordeste', 'r_sudeste', 'r_sul', 'r_centro_oeste'], axis=1)



# Target
df['depression'] = np.where(df['phq9_total'].isna(), np.nan, np.where(df['phq9_total'] >= 10, 1, 0))
df = df.dropna(subset=['depression'])
df['depression'].value_counts()



# Alimentos in natura ou minimamente processado
innatura = ['P00601','P00602', 'P00603', 'P00604', 'P00605', 'P00607', 'P00608', 'P00609', 'P00610', 'P00611', 'P00612', 'P00613']
df_innatura = df[innatura]
df_innatura['qst_innatura'] = (df_innatura == 1.0).sum(axis=1)
# Alimentos ultraprocessados
ultraprocessados = ['P00614', 'P00615', 'P00616', 'P00617', 'P00618', 'P00619', 'P00620', 'P00621', 'P00622', 'P00623']
df_upf = df[ultraprocessados]
df_upf['qst_upf'] = (df_upf == 1.0).sum(axis=1)

# A literatura não usa a porcentagem de ultraprocessados do prato como análise. O artigo 'Food consumption and depression among Brazilian adults: results from the Brazilian National Health Survey, 2013'
# Usa como base o número de perguntas respondidas 'Sim' nos alimentos ultraprocessados das ultimas 24hrs
df['per_innatura'] = df_innatura['qst_innatura'] / (df_innatura['qst_innatura'] + df_upf['qst_upf']) * 100
df['per_upf'] = df_upf['qst_upf'] / (df_innatura['qst_innatura'] + df_upf['qst_upf']) * 100
df['qst_upf'] = df_upf['qst_upf'] # Baseado no artigo

def classify_upf(row):
    value = row['qst_upf']

    if  0 <= value < 2:
        return 1
    elif value < 4:
        return 2
    elif value < 6:
        return 3
    elif value < 8:
        return 4
    elif value <= 10:
        return 5

    return np.nan # questionario não respondido

df['upf_severity'] = df.apply(classify_upf, axis=1)


def count_imc(row):
    height = row['altura']
    weight = row['peso']

    imc = weight/np.square(height/100)   # Peso está em kg e altura em cm, no calculo do imc é kg/m^2
    return imc

df['imc'] = df.apply(count_imc, axis=1)


def classify_imc(row):
    value = row['imc']

    if  0 <= value < 18.5:
        return 1  # Baixo peso
    elif value < 24.9:
        return 2 # Eutrofia/peso adequado
    elif value < 29.9:
        return 3   # Sobrepeso
    elif value >= 30:
        return 4  # Obesidade

    return np.nan # questionario não respondido

df['class_imc'] = df.apply(classify_imc, axis=1)


def classify_idade(row):
    value = row['idade']
    if  18 <= value <= 29:
        return 1
    elif value <= 39:
        return 2
    elif value <= 49:
        return 3
    elif value <= 59:
        return 4

    return np.nan

df['class_idade'] = df.apply(classify_idade, axis=1)


# Essa variável foi criada a partir de um índice utilizado pela OMS. Na literatura, usam a pergunta 'P034' relacionada à prática de exercícios físicos nos ultimos 3 meses
df_activity = pd.DataFrame()
df_activity['minutes_per_week'] = df['P03702'] * df['P035']

vigorous_activities = [3.0, 4.0, 5.0, 6.0, 12.0, 13.0, 15.0]
moderated_activities = [1.0, 2.0, 7.0, 8.0, 9.0, 10.0, 11.0, 14.0, 16.0, 17.0]
df_activity['time_vigorous'] = df_activity['minutes_per_week'].loc[df['P036'].isin(vigorous_activities)]
df_activity['time_moderated'] = df_activity['minutes_per_week'].loc[df['P036'].isin(moderated_activities)]

df_activity['activity_class'] = 0
df_activity.loc[df_activity['minutes_per_week'].isna(), 'activity_class'] = np.nan
df_activity.loc[(df_activity['time_vigorous'] >= 75) | (df_activity['time_moderated'] >= 150), 'activity_class'] = 1

df['activity_class'] = df_activity['activity_class']
df['exerc_fisico'] = np.where(df['P034'].isin([1.0]), 1, np.where(df['P034'] == 2.0, 0, np.nan))
df['tabagismo'] = np.where(df['P050'].isin([1.0, 2.0]), 1, np.where(df['P050'] == 3.0, 0, np.nan))

# Na literatura eles usam outra variável para analisar o consumo alcoolico
# df['cons_alcool'] = np.where(df['P02801'] >= 1, 1, 0)
# df['cons_alcool'] = np.where(df['P02801'].isna(), np.nan, df['cons_alcool'])

# Olhar artigo 'Food consumption and depression among Brazilian adults: results from the Brazilian National Health Survey, 2013'
df['cons_alcool'] = np.where(df['P027'].isin([2.0, 3.0]), 1, np.where(df['P027'] == 1.0, 0, np.nan))

df = df[df['P005'] != 1.0] #Retirando as grávidas da análise

df['obesidade'] = np.where(df['class_imc'].isna(), np.nan, np.where(df['class_imc'] >= 4, 1, 0))

# Limitação da análise por idades que se adequam ao PHQ9 baseado no artigo 'Food consumption and depression among Brazilian adults: results from the Brazilian National Health Survey, 2013'
df = df[(df['idade'] >= 18) & (df['idade'] <= 59)]


regiao_mapping = {
    'r_norte': [11, 12, 13, 14, 15, 16, 17],
    'r_nordeste': [21, 22, 23, 24, 25, 26, 27, 28, 29],
    'r_sudeste': [31, 32, 33, 35],
    'r_sul': [41, 42, 43],
    'r_centro_oeste': [50, 51, 52, 53]
}

for regiao, estados in regiao_mapping.items():
    df[regiao] = df['uf'].apply(lambda x: int(x in estados))


df['class_renda'] = np.where(df['renda_pc'].isin([4.0, 5.0, 6.0, 7.0]), 1, np.where(df['renda_pc'].isin([1.0, 2.0, 3.0]), 0, np.nan))





df_total = pd.DataFrame()

df_total['exerc_fisico'] = df['exerc_fisico']
df_total['tabagismo'] = df['tabagismo']
df_total['cons_alcool'] = df['cons_alcool']
df_total['cancer'] = df['Q120']
df_total['hipertensao'] = df['Q00201']
df_total['diabetes'] = df['Q03001']
df_total['cardiovascular'] = df['Q06306']
df_total['hipercolesterolemia'] = df['Q060']
df_total['avc'] = df['Q068']
df_total['artrite'] = df['Q079']
df_total['obesidade'] = df['obesidade']
df_total['depression'] = df['depression']
df_total['sexo'] = df['sexo']
df_total['estado_civil'] = df['vive_conjugue']
df_total = df_total.replace(2.0, 0.0)
df_total['class_renda'] = df['class_renda']

df_total['r_norte'] = df['r_norte']
df_total['r_nordeste'] = df['r_nordeste']
df_total['r_sudeste'] = df['r_sudeste']
df_total['r_sul'] = df['r_sul']
df_total['r_centro_oeste'] = df['r_centro_oeste']

df_total['escolaridade'] = df['escolaridade_agregada']
df_total['upf'] = df['qst_upf']
df_total['class_idade'] = df['class_idade']

df_total['perc_saude'] = df['N00101'] # Variável não utilizada na literatura, meio duvidosa

df_train_total, df_test_total = train_test_split(df_total, test_size=0.3, random_state=42)

df_total_t = copy.deepcopy(df_total)

print("Número de dados totais: ", len(df_total_t))
print("Número de dados filtrados: ", len(df_total_t.dropna()))
print("Porcentagem de dados faltantes: {:.2f}".format(100*(1 - (len(df_total_t.dropna()) / len(df_total_t)))))

print()

df_total_t = copy.deepcopy(df_train_total)
print("Número de dados totais (treino): ", len(df_total_t))
print("Número de dados filtrados (treino): ", len(df_total_t.dropna()))
print("Porcentagem de dados faltantes (treino): {:.2f}".format(100*(1 - (len(df_total_t.dropna()) / len(df_total_t)))))

print()

df_total_t = copy.deepcopy(df_test_total)
print("Número de dados totais (teste): ", len(df_total_t))
print("Número de dados filtrados (teste): ", len(df_total_t.dropna()))
print("Porcentagem de dados faltantes (teste): {:.2f}".format(100*(1 - (len(df_total_t.dropna()) / len(df_total_t)))))

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_innatura['qst_innatura'] = (df_innatura == 1.0).sum(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_upf['qst_upf'] = (df_upf == 1.0).sum(axis=1)


Número de dados totais:  64664
Número de dados filtrados:  49843
Porcentagem de dados faltantes: 22.92

Número de dados totais (treino):  45264
Número de dados filtrados (treino):  34932
Porcentagem de dados faltantes (treino): 22.83

Número de dados totais (teste):  19400
Número de dados filtrados (teste):  14911
Porcentagem de dados faltantes (teste): 23.14


In [10]:
from scipy.stats import chi2_contingency
from IPython.display import display_markdown


def gen_chi2_statistics(df_i, columns_dict, key_dep_d='depression'):
    selected_g = copy.deepcopy(df_i)
    data = []

    for c in columns_dict:

        # display(Markdown("### {}".format(c['name'])))

        display_markdown('''### {}'''.format(c['name']), raw=True)

        selected = copy.deepcopy(selected_g)

        key_dep = key_dep_d
        if 'dep' in c:
            key_dep = c['dep']

        selected = selected.dropna(subset=[c['key'], key_dep])
        contingency_table = pd.crosstab(selected[c['key']], selected[key_dep])
        chi2, p_value, dof, _ = chi2_contingency(contingency_table, correction=False)

        # Para printar
        contingency_table = pd.crosstab(selected[c['key']], selected[key_dep], normalize=False)
        contingency_table_n = pd.crosstab(selected[c['key']], selected[key_dep], normalize='index')
        contingency_table_c = pd.crosstab(selected[c['key']], selected[key_dep], normalize='columns')

        display(contingency_table)
        print("Normalizado pelas linhas: ")
        display(contingency_table_n * 100)

        print()
        print("Normalizado pelas colunas: ")
        display(contingency_table_c * 100)

        print("chi2: {}, p_value: {}, dof: {}".format(chi2, p_value, dof))


        print("-------------------------------------------------------------")
        print()

        data.append([c['key'], key_dep, c['name'], chi2, p_value, dof])

    return pd.DataFrame(data, columns=['variavel1', 'variavel2', 'titulo', 'chi2', 'p_value', 'dof'])


selected_columns = [ {'key': c, 'name': c} for c in df_total.columns]
df_chi = gen_chi2_statistics(df_total, selected_columns)
df_chi.to_excel(PATH_GEN_DATA / 'chi2_variables.xlsx')

### exerc_fisico

depression,0.0,1.0
exerc_fisico,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,32263,4451
1.0,25553,2397


Normalizado pelas linhas: 


depression,0.0,1.0
exerc_fisico,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,87.876559,12.123441
1.0,91.423971,8.576029



Normalizado pelas colunas: 


depression,0.0,1.0
exerc_fisico,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,55.802892,64.997079
1.0,44.197108,35.002921


chi2: 210.9053304888035, p_value: 8.716664502188814e-48, dof: 1
-------------------------------------------------------------



### tabagismo

depression,0.0,1.0
tabagismo,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,50525,5644
1.0,7291,1204


Normalizado pelas linhas: 


depression,0.0,1.0
tabagismo,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,89.951753,10.048247
1.0,85.826957,14.173043



Normalizado pelas colunas: 


depression,0.0,1.0
tabagismo,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,87.389304,82.418224
1.0,12.610696,17.581776


chi2: 132.5915159897349, p_value: 1.110668530662876e-30, dof: 1
-------------------------------------------------------------



### cons_alcool

depression,0.0,1.0
cons_alcool,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,31537,4104
1.0,26279,2744


Normalizado pelas linhas: 


depression,0.0,1.0
cons_alcool,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,88.485172,11.514828
1.0,90.545429,9.454571



Normalizado pelas colunas: 


depression,0.0,1.0
cons_alcool,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,54.547184,59.929907
1.0,45.452816,40.070093


chi2: 71.71105924520018, p_value: 2.4913230942413865e-17, dof: 1
-------------------------------------------------------------



### cancer

depression,0.0,1.0
cancer,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,57134,6648
1.0,682,200


Normalizado pelas linhas: 


depression,0.0,1.0
cancer,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,89.576997,10.423003
1.0,77.324263,22.675737



Normalizado pelas colunas: 


depression,0.0,1.0
cancer,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,98.820396,97.079439
1.0,1.179604,2.920561


chi2: 137.93785667441418, p_value: 7.519344241804209e-32, dof: 1
-------------------------------------------------------------



### hipertensao

depression,0.0,1.0
hipertensao,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,47122,4821
1.0,9270,1956


Normalizado pelas linhas: 


depression,0.0,1.0
hipertensao,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,90.718672,9.281328
1.0,82.576162,17.423838



Normalizado pelas colunas: 


depression,0.0,1.0
hipertensao,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,83.561498,71.137672
1.0,16.438502,28.862328


chi2: 639.0247471818652, p_value: 5.4455718213869e-141, dof: 1
-------------------------------------------------------------



### diabetes

depression,0.0,1.0
diabetes,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,50366,5889
1.0,2393,606


Normalizado pelas linhas: 


depression,0.0,1.0
diabetes,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,89.531597,10.468403
1.0,79.793264,20.206736



Normalizado pelas colunas: 


depression,0.0,1.0
diabetes,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,95.464281,90.669746
1.0,4.535719,9.330254


chi2: 276.661532854021, p_value: 4.009787018083911e-62, dof: 1
-------------------------------------------------------------



### cardiovascular

depression,0.0,1.0
cardiovascular,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,56385,6303
1.0,1431,545


Normalizado pelas linhas: 


depression,0.0,1.0
cardiovascular,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,89.945444,10.054556
1.0,72.419028,27.580972



Normalizado pelas colunas: 


depression,0.0,1.0
cardiovascular,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,97.524907,92.041472
1.0,2.475093,7.958528


chi2: 621.4530827848046, p_value: 3.6117487623674724e-137, dof: 1
-------------------------------------------------------------



### hipercolesterolemia

depression,0.0,1.0
hipercolesterolemia,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,45843,5003
1.0,6047,1402


Normalizado pelas linhas: 


depression,0.0,1.0
hipercolesterolemia,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,90.160485,9.839515
1.0,81.178682,18.821318



Normalizado pelas colunas: 


depression,0.0,1.0
hipercolesterolemia,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,88.346502,78.110851
1.0,11.653498,21.889149


chi2: 535.9327663147611, p_value: 1.4462945804487886e-118, dof: 1
-------------------------------------------------------------



### avc

depression,0.0,1.0
avc,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,57341,6627
1.0,475,221


Normalizado pelas linhas: 


depression,0.0,1.0
avc,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,89.640133,10.359867
1.0,68.247126,31.752874



Normalizado pelas colunas: 


depression,0.0,1.0
avc,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,99.178428,96.77278
1.0,0.821572,3.22722


chi2: 332.78704827743945, p_value: 2.3755935367410357e-74, dof: 1
-------------------------------------------------------------



### artrite

depression,0.0,1.0
artrite,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,55536,5977
1.0,2280,871


Normalizado pelas linhas: 


depression,0.0,1.0
artrite,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,90.283355,9.716645
1.0,72.357982,27.642018



Normalizado pelas colunas: 


depression,0.0,1.0
artrite,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,96.056455,87.280958
1.0,3.943545,12.719042


chi2: 1017.1909642401934, p_value: 3.2930740645958454e-223, dof: 1
-------------------------------------------------------------



### obesidade

depression,0.0,1.0
obesidade,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,45888,4970
1.0,11640,1821


Normalizado pelas linhas: 


depression,0.0,1.0
obesidade,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,90.227693,9.772307
1.0,86.47203,13.52797



Normalizado pelas colunas: 


depression,0.0,1.0
obesidade,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,79.766375,73.185098
1.0,20.233625,26.814902


chi2: 158.97761979927432, p_value: 1.892476258542428e-36, dof: 1
-------------------------------------------------------------



### depression

depression,0.0,1.0
depression,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,57816,0
1.0,0,6848


Normalizado pelas linhas: 


depression,0.0,1.0
depression,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,100.0,0.0
1.0,0.0,100.0



Normalizado pelas colunas: 


depression,0.0,1.0
depression,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,100.0,0.0
1.0,0.0,100.0


chi2: 64664.0, p_value: 0.0, dof: 1
-------------------------------------------------------------



### sexo

depression,0.0,1.0
sexo,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,28378,5061
1.0,29438,1787


Normalizado pelas linhas: 


depression,0.0,1.0
sexo,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,84.864978,15.135022
1.0,94.277022,5.722978



Normalizado pelas colunas: 


depression,0.0,1.0
sexo,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,49.083299,73.90479
1.0,50.916701,26.09521


chi2: 1510.686545030145, p_value: 0.0, dof: 1
-------------------------------------------------------------



### estado_civil

depression,0.0,1.0
estado_civil,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,22556,3383
1.0,35260,3465


Normalizado pelas linhas: 


depression,0.0,1.0
estado_civil,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,86.957863,13.042137
1.0,91.052292,8.947708



Normalizado pelas colunas: 


depression,0.0,1.0
estado_civil,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,39.013422,49.401285
1.0,60.986578,50.598715


chi2: 275.03131373157163, p_value: 9.086425443829038e-62, dof: 1
-------------------------------------------------------------



### class_renda

depression,0.0,1.0
class_renda,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,32900,4412
1.0,24898,2436


Normalizado pelas linhas: 


depression,0.0,1.0
class_renda,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,88.175386,11.824614
1.0,91.088022,8.911978



Normalizado pelas colunas: 


depression,0.0,1.0
class_renda,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,56.922385,64.42757
1.0,43.077615,35.57243


chi2: 141.31534805594964, p_value: 1.3727352689644443e-32, dof: 1
-------------------------------------------------------------



### r_norte

depression,0.0,1.0
r_norte,Unnamed: 1_level_1,Unnamed: 2_level_1
0,45808,5652
1,12008,1196


Normalizado pelas linhas: 


depression,0.0,1.0
r_norte,Unnamed: 1_level_1,Unnamed: 2_level_1
0,89.016712,10.983288
1,90.942139,9.057861



Normalizado pelas colunas: 


depression,0.0,1.0
r_norte,Unnamed: 1_level_1,Unnamed: 2_level_1
0,79.230663,82.535047
1,20.769337,17.464953


chi2: 41.14149473328134, p_value: 1.4159808292210626e-10, dof: 1
-------------------------------------------------------------



### r_nordeste

depression,0.0,1.0
r_nordeste,Unnamed: 1_level_1,Unnamed: 2_level_1
0,37739,4355
1,20077,2493


Normalizado pelas linhas: 


depression,0.0,1.0
r_nordeste,Unnamed: 1_level_1,Unnamed: 2_level_1
0,89.654107,10.345893
1,88.954364,11.045636



Normalizado pelas colunas: 


depression,0.0,1.0
r_nordeste,Unnamed: 1_level_1,Unnamed: 2_level_1
0,65.274319,63.59521
1,34.725681,36.40479


chi2: 7.597662418084701, p_value: 0.0058444025656387575, dof: 1
-------------------------------------------------------------



### r_sudeste

depression,0.0,1.0
r_sudeste,Unnamed: 1_level_1,Unnamed: 2_level_1
0,45973,5308
1,11843,1540


Normalizado pelas linhas: 


depression,0.0,1.0
r_sudeste,Unnamed: 1_level_1,Unnamed: 2_level_1
0,89.649188,10.350812
1,88.492864,11.507136



Normalizado pelas colunas: 


depression,0.0,1.0
r_sudeste,Unnamed: 1_level_1,Unnamed: 2_level_1
0,79.516051,77.511682
1,20.483949,22.488318


chi2: 14.987169196237247, p_value: 0.00010824467064907862, dof: 1
-------------------------------------------------------------



### r_sul

depression,0.0,1.0
r_sul,Unnamed: 1_level_1,Unnamed: 2_level_1
0,50750,6074
1,7066,774


Normalizado pelas linhas: 


depression,0.0,1.0
r_sul,Unnamed: 1_level_1,Unnamed: 2_level_1
0,89.310855,10.689145
1,90.127551,9.872449



Normalizado pelas colunas: 


depression,0.0,1.0
r_sul,Unnamed: 1_level_1,Unnamed: 2_level_1
0,87.77847,88.69743
1,12.22153,11.30257


chi2: 4.853108204503333, p_value: 0.027596361101835217, dof: 1
-------------------------------------------------------------



### r_centro_oeste

depression,0.0,1.0
r_centro_oeste,Unnamed: 1_level_1,Unnamed: 2_level_1
0,50994,6003
1,6822,845


Normalizado pelas linhas: 


depression,0.0,1.0
r_centro_oeste,Unnamed: 1_level_1,Unnamed: 2_level_1
0,89.467867,10.532133
1,88.97874,11.02126



Normalizado pelas colunas: 


depression,0.0,1.0
r_centro_oeste,Unnamed: 1_level_1,Unnamed: 2_level_1
0,88.200498,87.660631
1,11.799502,12.339369


chi2: 1.7075401708560198, p_value: 0.19130483203196977, dof: 1
-------------------------------------------------------------



### escolaridade

depression,0.0,1.0
escolaridade,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,18260,2392
2.0,20809,2278
3.0,8755,1002
4.0,2815,309


Normalizado pelas linhas: 


depression,0.0,1.0
escolaridade,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,88.417587,11.582413
2.0,90.132975,9.867025
3.0,89.73045,10.26955
4.0,90.108835,9.891165



Normalizado pelas colunas: 


depression,0.0,1.0
escolaridade,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,36.059164,39.993312
2.0,41.092834,38.087276
3.0,17.289046,16.753051
4.0,5.558957,5.16636


chi2: 36.93528801226495, p_value: 4.748747268297748e-08, dof: 3
-------------------------------------------------------------



### upf

depression,0.0,1.0
upf,Unnamed: 1_level_1,Unnamed: 2_level_1
0,9318,1038
1,11888,1377
2,11988,1382
3,9873,1183
4,6876,824
5,3936,499
6,2019,262
7,1034,148
8,463,77
9,251,40


Normalizado pelas linhas: 


depression,0.0,1.0
upf,Unnamed: 1_level_1,Unnamed: 2_level_1
0,89.976825,10.023175
1,89.619299,10.380701
2,89.663426,10.336574
3,89.299928,10.700072
4,89.298701,10.701299
5,88.748591,11.251409
6,88.51381,11.48619
7,87.478849,12.521151
8,85.740741,14.259259
9,86.254296,13.745704



Normalizado pelas colunas: 


depression,0.0,1.0
upf,Unnamed: 1_level_1,Unnamed: 2_level_1
0,16.116646,15.15771
1,20.561782,20.108061
2,20.734745,20.181075
3,17.076588,17.275117
4,11.892902,12.03271
5,6.807804,7.286799
6,3.492113,3.825935
7,1.788432,2.161215
8,0.800816,1.124416
9,0.434136,0.584112


chi2: 24.859684931385072, p_value: 0.0056179095011363, dof: 10
-------------------------------------------------------------



### class_idade

depression,0.0,1.0
class_idade,Unnamed: 1_level_1,Unnamed: 2_level_1
1,13464,1435
2,16063,1692
3,14600,1853
4,13689,1868


Normalizado pelas linhas: 


depression,0.0,1.0
class_idade,Unnamed: 1_level_1,Unnamed: 2_level_1
1,90.368481,9.631519
2,90.47029,9.52971
3,88.737616,11.262384
4,87.992544,12.007456



Normalizado pelas colunas: 


depression,0.0,1.0
class_idade,Unnamed: 1_level_1,Unnamed: 2_level_1
1,23.287671,20.955023
2,27.782967,24.707944
3,25.252525,27.058995
4,23.676837,27.278037


chi2: 76.40310894875896, p_value: 1.8127917247084137e-16, dof: 3
-------------------------------------------------------------



### perc_saude

depression,0.0,1.0
perc_saude,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,10622,361
2.0,36313,2473
3.0,9819,2756
4.0,923,992
5.0,139,266


Normalizado pelas linhas: 


depression,0.0,1.0
perc_saude,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,96.713102,3.286898
2.0,93.623988,6.376012
3.0,78.083499,21.916501
4.0,48.198433,51.801567
5.0,34.320988,65.679012



Normalizado pelas colunas: 


depression,0.0,1.0
perc_saude,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,18.372077,5.271612
2.0,62.807873,36.112734
3.0,16.983188,40.245327
4.0,1.596444,14.485981
5.0,0.240418,3.884346


chi2: 7782.8561661966405, p_value: 0.0, dof: 4
-------------------------------------------------------------



In [7]:
df_chi

Unnamed: 0,variavel1,variavel2,titulo,chi2,p_value,dof
0,exerc_fisico,depression,exerc_fisico,210.90533,8.716665e-48,1
1,tabagismo,depression,tabagismo,132.591516,1.110669e-30,1
2,cons_alcool,depression,cons_alcool,71.711059,2.4913230000000002e-17,1
3,cancer,depression,cancer,137.937857,7.519344e-32,1
4,hipertensao,depression,hipertensao,639.024747,5.445572e-141,1
5,diabetes,depression,diabetes,276.661533,4.009787e-62,1
6,cardiovascular,depression,cardiovascular,621.453083,3.611749e-137,1
7,hipercolesterolemia,depression,hipercolesterolemia,535.932766,1.4462950000000002e-118,1
8,avc,depression,avc,332.787048,2.375594e-74,1
9,artrite,depression,artrite,1017.190964,3.293074e-223,1
