In [1]:
# load important modules
import random
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

from sklearn.feature_selection import VarianceThreshold, SelectKBest, chi2, f_classif
from sklearn.model_selection import train_test_split

%matplotlib inline

# ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
# load dataset
data = pd.DataFrame(pd.read_excel('data/sarcopenia.xlsx'))
# Genero 1: male, 2: female => change to 0: male, 1: female
# SI: 1, NO: 2 => change to NO: 0, YES: 1
data['Genero'] = data['Genero'].replace({1: 0, 2: 1})

# overview of data
print("Features: ", len(data.columns))
print("\"Misses\" in:")
print(data.loc[:,data.isna().any()].info())
print("Indexes of rows with misses:", data[data.isna().any(axis=1)].index)
#print(data['Marcha'].value_counts(dropna=False).sort_index())
#print(data['Hb'].value_counts(dropna=False).sort_index())
print("Columns:\n",data.columns)
data.describe()

# HB: meaning of 0? Guess 0 = NaN
data['Hb'].fillna(0, inplace=True)
# treating nans8
# Marcha: meaning of 0? If 0 = NaN: 8 NaN can be imputed with mean
data['Marcha'].fillna(data['Marcha'].mean(), inplace=True)
data['Marcha'] = data['Marcha'].replace({0:data['Marcha'].mean()})

Features:  84
"Misses" in:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 250 entries, 0 to 249
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Marcha  248 non-null    float64
 1   Hb      249 non-null    float64
dtypes: float64(2)
memory usage: 4.0 KB
None
Indexes of rows with misses: Int64Index([24, 191, 210], dtype='int64')
Columns:
 Index(['Folio', 'Genero', 'Edad', 'Escolaridad', 'Letrado', 'EdoCivil',
       'Cuidador', 'Religiòn', 'Residencia', 'Ocupacion', 'Economìa',
       'Manutencion', 'Visiòn', 'CorreccionVisual', 'Audiciòn',
       'CorreccionAuditiva', 'HAS', 'DMII', 'OA', 'OSTEOP', 'GASTRITIS',
       'DEPRESION', 'CARDIOOLOGIA', 'TNCM', 'HIPOTIROIDISMO', 'HIPERTENSION',
       'CANCER', 'EPOC', 'DISLIPIDEMIA', 'IRC', 'InsfHepatica', 'MED1', 'MED2',
       'MED3', 'MED4', 'MED5', 'MED6', 'MED7', 'MED8', 'MED9', 'MED10',
       'Tabaquismo', 'Alcoholismo', 'Drogas', 'ExpBiomasa', 'MMSE', 'MMSEx',
       'M

In [3]:
# usefull functions and declaration of variables which will be used later
drop_col = []

# drop features stored as strings in feats from dataframe df
def drop_feat(df, feats):
    for feature in feats:
        if feature in df.columns: df.drop(feature, axis=1, inplace=True)
    return df

# get name of all binary features from dataframe df
# returns array with names
def get_binary_features(df):
    return [feature for feature in df.columns 
     if len(data[feature].value_counts()) <= 2]

In [65]:

#### Remarks about encoding and features ####
# Dependencia (1-4): Need of help from someone for living/housing
# Charlson (2-10) : Not in excel, no understanding of meaning
# MM & IMM Meaning?
# FuerzaPrension => not binary, in excel as binary
# Vision described for 1-3, but data show 1-5 (thoughts: encoding from -2 to 2)
# Audicion described for 1-3, but data show 1-5
# GDS (0-5): Meaning/Encoding unclear, not in excel

#### NANs for every feature ####
# Escolaridad : ninguna = 0 : 53 cases
# Letrado : ninguno/no contesto = 3 : 5 cases
# Cuidador : Ninguno = 6 : 38 cases
# Religion : Ninguna = 5 : 5 cases
# Manutencion : otros = 5 : 14 cases
# CorreccionVisual : no utiliza = 0 : 49 cases
# CorreccionAuditiva : no utiliza = 0 : 231 cases
# MED 1 to 10: 0 = none, 8/23/35/63/97/142/179/200/229/241
# Drogas : 157 exdrogadictos!??!
# MMSE : 0 = none? : 7 cases (exel says below 14 DETERIORO GRAVE)
# GDS : not in exel, none = 0 in 60 cases?
# Barthel : encoding of excel does not match encoding in data
# Norton : encoding of excel does not match encoding in data
# Lawton : encoding of excel does not match encoding in data
# MNA : encoding of excel does not match encoding in data
# Hb: none = 0 : 130 cases
# urea: none = 0 : 144 cases
# Creatina: none = 0 : 128 cases
# Albumina: none = 0 : 188 cases
# Glucosa: none = 0 : 121 cases
# Sodio: none = 0 : 154 cases

# Charlson, IMM, MM, GDS, FuerzaPrension

In [4]:
# Folio = ID = index+1: no information
if "Folio" not in drop_col: drop_col.append("Folio")

# get all binary features
binary_features = get_binary_features(data)
data[binary_features[1:]] = data[binary_features[1:]].replace({2:0})

# apply variance threshold for binary
sel_bin = VarianceThreshold(threshold=(0.95*(1-0.95)))
sel_bin.fit_transform(data[binary_features])
mask = sel_bin.get_support()

for i in range(len(binary_features)):
        if not mask[i] and binary_features[i] not in drop_col: drop_col.append(binary_features[i])

print("Binary features with too low variance:\n", drop_col)

# get features which are not binary and have at least 95% of the same value
for i in data.columns:
    if (i not in binary_features) and (i not in drop_col) and (data[i].value_counts(dropna=False).tolist()[0] > 0.95*data.shape[0]):
        drop_col.append(i)

print("To drop because of low variance or entropy:\n", drop_col)
# categorical features: 
# Escolaridad (0-16), Letrado (1-3), EdoCivil (1-5), Cuidador (1-6), 
# Religion (1-5), Residencia (1-5), Ocupacion(1-4), Economia (1-3),
# Manutencion (1-5), Vision (1-3/1-5), CorreccionVisual (0-5), 
# Audicion (1-3/1-4), CorreccionAuditiva (0-4), MED1, MED2, MED3, MED4,
# MED5, MED6, MED7, MED8, MED9, MED10, Tabaquismo (1-3), Alcoholismo (1-3)
# Drogas (1-3)

# continous feature, which are encoded as categoricals/binary: => delete 2 versions?
# MMSE/MMSEx/MMSEcodif, Barthel/BarthelX/BarthelAR, Norton/NortonX/NortonAlto, Lawton/LawtonX, MNA/MNAx


# LawtonAR: check relation to LawtonX, maybe some errors?
print(pd.crosstab(data['LawtonX'], data['LawtonAR'], margins='all'))
# MNAAR: check realtion to MNAx, maybe some errors?
print(pd.crosstab(data['MNAx'], data['MNAAR'], margins='all'))
print(str(data[(data['LawtonAR']==0) & (data['LawtonX'] == 3)].index)
      +"\n"+
      str(data[(data['LawtonAR']==1) & (data['LawtonX']==5)].index)
      +"\n"+
     str(data[(data['MNAAR']==0) & (data['MNAx'] == 3)].index))

Binary features with too low variance:
 ['Folio', 'GASTRITIS', 'CANCER', 'InsfHepatica', 'Ùlceras']
To drop because of low variance or entropy:
 ['Folio', 'GASTRITIS', 'CANCER', 'InsfHepatica', 'Ùlceras', 'Residencia', 'MED10']
LawtonAR    0    1  All
LawtonX                
1           0   31   31
2           0   31   31
3           1   38   39
4          51    0   51
5          96    2   98
All       148  102  250
MNAAR    0    1  All
MNAx                
1      114    0  114
2        0  120  120
3        2   14   16
All    116  134  250
Int64Index([118], dtype='int64')
Int64Index([36, 222], dtype='int64')
Int64Index([28, 214], dtype='int64')


In [66]:
data = drop_feat(data, drop_col)

# get correlation matrix
corr = data.corr('pearson')

# get features with strong relation in medicine/psychology
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6107969/
very_high_correlation = [] # 0.7 to 1.0
for i in range(len(corr)):
    for j in range(i):
        pearson_corr = np.abs(corr.iloc[i,j])
        if 0.7 <= pearson_corr:
            very_high_correlation.append((corr.columns[i], corr.columns[j], corr.iloc[i,j]))


print(pd.DataFrame(very_high_correlation, columns=["Feat1", "Feat2", "pearson"]))
corr.dropna(how='all').T.dropna(how='all').style.background_gradient('RdBu', vmax=1, vmin=-1)

# try building model from 3 features by definition

         Feat1       Feat2   pearson
0       Drogas       Folio -0.719860
1        MMSEx        MMSE -0.935184
2    MMSEcodif        MMSE -0.728306
3    MMSEcodif       MMSEx  0.808425
4    BarthelAR    BarthelX  0.822434
5      NortonX      Norton -0.767800
6   NortonAlto      Norton -0.764823
7   NortonAlto     NortonX  0.881799
8      LawtonX      Lawton  0.932580
9     LawtonAR      Lawton -0.821173
10    LawtonAR     LawtonX -0.871083
11        MNAx         MNA -0.872407
12       MNAAR         MNA  0.765359
13       MNAAR        MNAx -0.881624
14         IMM          MM  0.908897
15  Creatinina        Urea  0.801113
16     Glucosa          Hb  0.792030
17     Glucosa        Urea  0.725648
18     Glucosa  Creatinina  0.799406
19       Sodio        Urea  0.727220


Unnamed: 0,Folio,Genero,Edad,Escolaridad,Letrado,EdoCivil,Cuidador,Religiòn,Residencia,Ocupacion,Economìa,Manutencion,Visiòn,CorreccionVisual,Audiciòn,CorreccionAuditiva,HAS,DMII,OA,OSTEOP,GASTRITIS,DEPRESION,CARDIOOLOGIA,TNCM,HIPOTIROIDISMO,HIPERTENSION,EPOC,DISLIPIDEMIA,IRC,MED1,MED2,MED3,MED4,MED5,MED6,MED7,MED8,MED9,MED10,Tabaquismo,Alcoholismo,Drogas,ExpBiomasa,MMSE,MMSEx,MMSEcodif,GDS,Depresion,Barthel,BarthelX,BarthelAR,Caìdas,NùmeroDeCaìdas,Ùlceras,Norton,NortonX,NortonAlto,Lawton,LawtonX,LawtonAR,MNA,MNAx,MNAAR,Charlson,MM,IMM,AnguloF,FuerzaPrension,Pantorrilla,Marcha,Sarcopenia,Demencia,Congiciòn,EVC,Infecciòn,Dolor,Hb,Urea,Creatinina,Albumina,Glucosa,Sodio
Folio,1.0,0.036786,0.217858,0.549191,-0.16688,0.38991,0.396145,-0.178223,-0.01389,0.068119,-0.076542,0.454693,0.534672,0.510702,0.24033,-0.029667,0.462846,-0.066974,-0.613282,0.025434,0.008908,0.026185,-0.015447,0.022496,0.014674,-0.19498,0.05643,-0.064345,-0.065453,-0.332939,-0.08038,0.012431,-0.036305,0.06186,0.052064,0.130044,0.051047,0.055315,0.057251,-0.371918,-0.50385,-0.71986,-0.027476,0.003275,-0.009881,-0.000572,0.020964,-0.263803,0.004208,0.010245,0.032715,-0.00283,0.006121,-0.018832,-0.012183,-0.021741,-0.027768,-0.048054,-0.039851,0.040036,-0.006386,-0.009615,0.006224,-0.02077,0.02577,-0.001033,0.015715,0.021862,-0.026176,-0.021489,-0.058663,0.005515,0.036477,-0.007164,0.034016,-0.011408,0.008761,-0.000498,0.043304,0.035147,0.005496,-0.002279
Genero,0.036786,1.0,0.098943,-0.043074,-0.014258,-0.021758,-0.037562,-0.09493,0.108992,-0.032255,-0.136476,0.011749,0.158603,0.034774,0.068005,-0.097529,0.038318,0.056319,-0.150325,-0.162265,-0.025453,-0.153044,0.070034,-0.051978,0.057862,-0.090269,-0.091481,0.061639,0.198378,-0.009434,0.12311,0.103863,0.069017,0.133691,0.088834,-0.003172,0.067118,0.035472,0.063107,0.26466,0.240444,-0.04877,-0.105645,0.015727,0.065891,-0.024277,-0.005322,-0.013072,-0.122492,-0.095995,-0.162865,-0.041384,-0.100382,0.193251,-0.013777,-0.088216,-0.070034,-0.085568,-0.106002,0.137039,-0.135655,0.081837,-0.045017,-0.01636,-0.456352,-0.339812,-0.162851,-0.52692,-0.043519,-0.131337,-0.00619,-0.061042,-0.049637,-0.010966,-0.09767,-0.109077,-0.120361,-0.030306,-0.084106,0.01281,-0.043499,0.071117
Edad,0.217858,0.098943,1.0,0.032383,-0.037566,0.084889,0.094467,-0.043565,0.064063,-0.256261,0.063686,0.173635,0.159926,0.134206,0.264636,-0.022515,0.186309,0.124721,-0.09784,-0.040781,0.005193,0.00469,0.097524,-0.129094,-0.111452,-0.050768,-0.011706,0.029906,0.001109,-0.001893,0.004486,0.070847,-0.060076,0.084554,-0.06888,0.051105,0.007082,-0.011234,-0.067941,-0.182202,-0.108863,-0.192374,-0.189362,-0.235543,0.251781,0.283831,0.016683,-0.072879,-0.150111,-0.196116,-0.1612,-0.161571,0.12689,0.009143,-0.137985,0.133052,0.180863,-0.190507,-0.173625,0.131697,-0.174433,0.169785,-0.147923,0.235304,-0.137982,-0.124971,0.017269,-0.209707,-0.129571,-0.259257,-0.158103,-0.124063,-0.229536,-0.098446,-0.057151,0.058347,-0.014228,0.101121,0.020622,0.006833,-0.024909,0.05018
Escolaridad,0.549191,-0.043074,0.032383,1.0,-0.520196,0.394873,0.35827,-0.092679,-0.107081,0.091957,-0.098346,0.447256,0.545184,0.386357,0.184703,0.018547,0.472179,0.016707,-0.618805,-0.025692,-0.049538,0.116686,0.011794,0.109231,0.103653,-0.079721,0.052749,0.003583,0.02333,-0.104379,-0.001859,-0.009142,-0.036055,-0.040695,0.209933,0.084466,-0.087782,0.058096,-0.022196,-0.370139,-0.365563,-0.576119,0.229794,0.301043,-0.33694,-0.278403,-0.050796,-0.264546,0.133834,0.160668,0.15391,0.095403,-0.170015,0.138326,0.102147,-0.033829,-0.046348,0.148916,0.134823,-0.124924,0.185269,-0.172905,0.186437,-0.183838,0.067576,0.085516,0.092952,0.079916,0.083563,0.144635,0.101684,0.150735,0.315046,0.012211,0.012837,0.007698,0.00816,0.046906,0.040889,0.023208,0.030824,0.03831
Letrado,-0.16688,-0.014258,-0.037566,-0.520196,1.0,-0.142629,-0.150896,0.037232,0.240688,-0.187678,0.077619,-0.211344,-0.19129,-0.117877,-0.080616,-0.112492,-0.184289,-0.140312,0.318984,0.078088,0.06478,-0.088408,-0.042741,-0.14909,-0.110227,0.082247,-0.027599,-0.089045,-0.07459,-0.013948,-0.045332,-0.079946,0.027007,0.012052,-0.041634,0.021918,0.096576,-0.044479,-0.036314,0.090984,0.070746,0.226027,-0.173199,-0.364851,0.464061,0.333645,-0.000254,0.134015,-0.080464,-0.108156,-0.071414,-0.01289,0.134937,-0.219368,-0.140681,0.077877,0.071388,-0.140927,-0.153029,0.1663,-0.074499,-0.000118,-0.029668,0.165619,-0.071125,-0.12081,-0.091291,-0.04682,-0.144198,0.000522,-0.167604,-0.156878,-0.269819,-0.024078,-0.054109,0.062691,0.03112,0.015675,-0.047888,0.005786,-0.001914,-0.064703
EdoCivil,0.38991,-0.021758,0.084889,0.394873,-0.142629,1.0,0.163046,-0.031278,-0.043348,0.17336,0.002282,0.221686,0.298632,0.260011,-0.016452,-0.147882,0.274839,-0.014604,-0.416731,0.078452,-0.034406,0.001942,0.067016,0.034936,0.006609,-0.11287,0.019237,-0.065926,-0.154023,-0.28162,-0.095549,-0.07728,-0.102438,-0.03904,0.033364,0.001788,-0.075754,-0.012614,-0.041201,-0.266114,-0.286107,-0.428001,0.137954,-0.010222,-0.004005,-0.021681,0.025383,-0.136293,0.077385,0.162232,0.213577,-0.001331,-0.025868,-0.095391,0.053314,0.076262,0.06413,0.019406,0.040974,-0.060959,0.112542,-0.076399,0.139737,-0.102434,0.050773,0.014233,0.014928,0.072887,0.130279,0.124942,0.176066,0.03148,0.085406,-0.005428,0.05352,0.034801,-0.03427,-0.049271,0.029208,0.04608,0.029038,0.036343
Cuidador,0.396145,-0.037562,0.094467,0.35827,-0.150896,0.163046,1.0,-0.185135,0.073692,0.183884,0.001709,0.549846,0.357947,0.206222,0.204718,0.047751,0.432849,0.015183,-0.488919,-0.056557,0.060234,0.065849,-0.065206,0.131024,0.19646,-0.075799,-0.044281,-0.059842,0.161518,-0.175279,0.051028,0.031734,0.051858,0.000389,-0.017324,0.143249,-0.063671,0.035756,0.021367,-0.276036,-0.316253,-0.430372,-0.029713,0.082012,-0.085306,-0.070508,0.073649,-0.125452,0.095308,0.029465,-0.015526,0.087802,-0.13111,0.126726,0.044189,0.000735,0.004075,0.152341,0.160792,-0.113246,0.044341,-0.008359,-0.034664,-0.165816,-0.028187,-0.008253,0.012213,0.043668,0.047378,0.077261,0.021973,0.062666,0.086821,-0.071362,0.020098,-0.044504,-0.078269,-0.099885,-0.068531,-0.013599,-0.083599,-0.086317
Religiòn,-0.178223,-0.09493,-0.043565,-0.092679,0.037232,-0.031278,-0.185135,1.0,-0.008588,0.216484,0.094878,-0.234813,-0.214083,-0.140821,-0.099491,0.025181,-0.133495,0.126579,0.190663,-0.075909,-0.177471,0.003345,0.074943,0.044418,0.037501,0.007701,-0.042989,-0.150666,0.156544,0.122843,0.130666,0.056282,0.109543,-0.177908,-0.004996,-0.064701,-0.034704,-0.031268,-0.047697,-0.027102,0.080747,0.116228,-0.021965,0.037399,-0.050652,-0.03794,-0.008317,-0.016063,0.132404,0.011733,0.035446,0.003946,-0.046278,0.057099,0.097562,-0.093053,-0.109447,0.11182,0.094786,-0.062584,0.070424,-0.01189,-0.038139,-0.054592,0.034619,0.062165,0.126599,0.165974,0.041877,0.08214,0.091316,0.07187,0.093224,0.101505,-0.071809,0.01266,0.052581,-0.054379,0.00844,0.022424,0.033528,0.031974
Residencia,-0.01389,0.108992,0.064063,-0.107081,0.240688,-0.043348,0.073692,-0.008588,1.0,-0.128734,0.050749,-0.083861,0.073778,0.023063,-0.004378,-0.043357,-0.043218,-0.006088,0.039138,0.054291,0.024968,0.081886,-0.071679,-0.133529,0.054291,-0.038309,-0.080618,0.018878,0.07514,0.067033,-0.06536,0.012229,0.063581,0.004889,0.033806,0.063827,0.065255,-0.037152,-0.028417,-0.134613,-0.075267,-0.10264,0.058161,-0.009961,0.039623,0.00832,0.031056,0.06562,-0.076803,-0.026235,-0.027525,-0.039277,0.079533,0.027407,-0.101775,0.08976,0.071679,-0.014581,0.003826,-0.040499,-0.06273,0.011279,-0.038922,0.005072,-0.084131,-0.085292,-0.065437,-0.073637,-0.060164,0.053596,-0.16665,0.034497,0.096634,0.001265,0.047209,0.016928,0.004603,0.02127,0.012938,0.04169,-0.008722,0.008712
Ocupacion,0.068119,-0.032255,-0.256261,0.091957,-0.187678,0.17336,0.183884,0.216484,-0.128734,1.0,-0.129469,-0.018722,-0.037843,-0.000354,-0.034038,0.081297,0.036022,0.150209,-0.193988,0.023768,-0.074562,-0.109907,0.018361,0.169496,0.139953,-0.048106,-0.036982,-0.113909,0.065882,-0.181647,0.002718,0.076,0.088727,-0.110124,0.013262,-0.079313,-0.081813,-0.07325,-0.019712,-0.006726,-0.075835,-0.06117,0.029434,0.19159,-0.214376,-0.187703,0.146813,0.002338,0.259761,0.140511,0.142468,0.090045,-0.134269,0.182371,0.267319,-0.146439,-0.169856,0.211987,0.198791,-0.142671,0.0755,-0.083219,0.046238,-0.241449,-0.04867,0.017867,0.003571,0.057031,0.082865,0.315467,0.197853,0.126078,0.153321,0.096658,-0.118568,-0.096681,0.188202,0.031888,0.131357,0.076445,0.138789,0.154468


In [7]:
# drop encoding of continous features (temporary): Check with prof
drop_col.extend(['MMSEx', 'MMSEcodif', 'BarthelX', 'BarthelAR', 'NortonX', 'NortonAlto',
                 'LawtonX', 'LawtonAR', 'MNAx', 'MNAAR'])

# add features with high correlation to drop_col
for pair in very_high_correlation:
    if pair[0] in drop_col or pair[1] in drop_col:
        continue
    if pair[0] not in drop_col: drop_col.append(pair[0])

# drop unnecessary features
data = drop_feat(data, drop_col)

In [20]:
# overview nan:


#for feat in data.columns:
#    print(data[feat].

0.053    3
0.100    3
0.140    3
0.150    3
0.160    1
        ..
1.290    2
1.380    1
1.540    3
1.560    2
1.820    2
Name: Marcha, Length: 71, dtype: int64


In [9]:
random.seed(1)

# generate training and testset
X = data.drop("Sarcopenia", axis=1)
y = data['Sarcopenia']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [10]:
# get 5 best features with chi2, f1 score
print("Best 5 features with:")
X_chi2_5 = SelectKBest(chi2, k=5).fit(X, y)
print("chi2:", X_chi2_5.get_feature_names_out(X.columns))
X_f1_5 = SelectKBest(f_classif, k=5).fit(X,y)
print("F1:", X_f1_5.get_feature_names_out(X.columns))


# get 10 best features with chi2, f1 score
print("Best 10 features with:")
X_chi2_10 = SelectKBest(chi2, k=10).fit(X, y)
print("chi2:", X_chi2_10.get_feature_names_out(X.columns))
X_f1_10 = SelectKBest(f_classif, k=10).fit(X,y)
print("F1:", X_f1_10.get_feature_names_out(X.columns))


# get 20 best features with chi2, f1 score
print("Best 20 features with:")
X_chi2_20 = SelectKBest(chi2, k=20).fit(X, y)
print("chi2:", X_chi2_20.get_feature_names_out(X.columns))
X_f1_20 = SelectKBest(f_classif, k=20).fit(X,y)
print("F1:", X_f1_20.get_feature_names_out(X.columns))

Best 5 features with:
chi2: ['MED3' 'MED4' 'MED5' 'MED9' 'MM']
F1: ['Ocupacion' 'TNCM' 'MM' 'Pantorrilla' 'Demencia']
Best 10 features with:
chi2: ['MED3' 'MED4' 'MED5' 'MED9' 'MM' 'FuerzaPrension' 'Pantorrilla'
 'Demencia' 'Hb' 'Urea']
F1: ['Edad' 'Letrado' 'EdoCivil' 'Ocupacion' 'Audiciòn' 'TNCM' 'MM'
 'Pantorrilla' 'Demencia' 'Congiciòn']
Best 20 features with:
chi2: ['Edad' 'Escolaridad' 'CorreccionVisual' 'CorreccionAuditiva' 'TNCM'
 'MED1' 'MED2' 'MED3' 'MED4' 'MED5' 'MED7' 'MED9' 'NùmeroDeCaìdas'
 'Lawton' 'MM' 'FuerzaPrension' 'Pantorrilla' 'Demencia' 'Hb' 'Urea']
F1: ['Edad' 'Letrado' 'EdoCivil' 'Ocupacion' 'CorreccionVisual' 'Audiciòn'
 'HAS' 'TNCM' 'Depresion' 'NùmeroDeCaìdas' 'Norton' 'Lawton' 'MNA'
 'Charlson' 'MM' 'FuerzaPrension' 'Pantorrilla' 'Demencia' 'Congiciòn'
 'Dolor']


In [11]:
# to do next:
# - hyperparameter tuning for RF, SVC, NN
# - afterwards recursive feature elimination