In [12]:
import os
import csv
import pandas as pd
import numpy as np
import skfeature.function.sparse_learning_based.MCFS as MCFS
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_selection import SelectFromModel, SelectKBest
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.svm import SVC
from keras.utils import to_categorical
from scipy import sparse

In [13]:
data = pd.read_csv("../processed_data.csv")

In [14]:
data = data.drop("Unnamed: 0", axis = 1)

In [15]:
data.columns

Index(['SEQN', 'CBD120', 'CBD130', 'DPQ010', 'DPQ020', 'DPQ030', 'DPQ040',
       'DPQ050', 'DPQ060', 'DPQ070', 'DPQ080', 'DPQ090', 'DUQ211', 'DUQ240',
       'FSQ012', 'FSQ162', 'HIQ011', 'HIQ031A', 'HIQ031B', 'HIQ031D',
       'HIQ031F', 'HIQ031G', 'HIQ031H', 'HIQ031I', 'OCQ180', 'OCD270',
       'PAD645', 'PAQ655', 'PAD660', 'PAQ665', 'SLD010H', 'SMQ040', 'SMD460',
       'RIAGENDR', 'RIDAGEYR', 'RIDRETH1', 'DMDEDUC3', 'DMDMARTL', 'RIDEXPRG',
       'DMDHHSIZ', 'INDHHIN2', 'INDFMPIR', 'DRQSDIET', 'DRQSDT1', 'BMXWT',
       'BMXBMI', 'SLQ050'],
      dtype='object')

In [16]:
data = data.loc[data["SLQ050"]!= 9,:]

In [17]:
X = data.drop(["SLQ050", "SEQN"], axis = 1)
y = data["SLQ050"]
X.replace("O", 0)

Unnamed: 0,CBD120,CBD130,DPQ010,DPQ020,DPQ030,DPQ040,DPQ050,DPQ060,DPQ070,DPQ080,...,DMDEDUC3,DMDMARTL,RIDEXPRG,DMDHHSIZ,INDHHIN2,INDFMPIR,DRQSDIET,DRQSDT1,BMXWT,BMXBMI
0,"(-2.142, 535.5]","(-1.028, 257.0]",1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,4.0,0.0,"(1.5, 3.0]",4.0,"(-0.005, 1.25]",2.0,0.0,"(55.65, 111.3]","(20.725, 41.45]"
1,"(-2.142, 535.5]","(-1.028, 257.0]",2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,"(3.0, 4.5]",0.0,"(1.25, 2.5]",2.0,0.0,"(55.65, 111.3]","(20.725, 41.45]"
2,"(-2.142, 535.5]","(-1.028, 257.0]",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,"(1.5, 3.0]",10.0,"(3.75, 5.0]",1.0,0.0,"(55.65, 111.3]","(20.725, 41.45]"
3,"(-2.142, 535.5]","(-1.028, 257.0]",2.0,1.0,0.0,3.0,3.0,0.0,0.0,0.0,...,0.0,1.0,0.0,"(1.5, 3.0]",15.0,"(3.75, 5.0]",1.0,0.0,"(-0.223, 55.65]","(-0.0829, 20.725]"
4,"(-2.142, 535.5]","(-1.028, 257.0]",3.0,3.0,3.0,3.0,3.0,1.0,2.0,1.0,...,0.0,3.0,0.0,"(-0.006, 1.5]",0.0,"(3.75, 5.0]",2.0,0.0,"(55.65, 111.3]","(41.45, 62.175]"
5,"(-2.142, 535.5]","(-1.028, 257.0]",0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,2.0,0.0,"(-0.006, 1.5]",10.0,"(3.75, 5.0]",2.0,0.0,"(55.65, 111.3]","(20.725, 41.45]"
6,"(-2.142, 535.5]","(-1.028, 257.0]",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,"(3.0, 4.5]",15.0,"(3.75, 5.0]",0.0,0.0,"(-0.223, 55.65]","(-0.0829, 20.725]"
7,"(-2.142, 535.5]","(-1.028, 257.0]",0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,3.0,0.0,"(-0.006, 1.5]",4.0,"(-0.005, 1.25]",2.0,0.0,"(55.65, 111.3]","(20.725, 41.45]"
8,"(-2.142, 535.5]","(-1.028, 257.0]",1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,2.0,0.0,"(-0.006, 1.5]",3.0,"(-0.005, 1.25]",2.0,0.0,"(55.65, 111.3]","(20.725, 41.45]"
9,"(-2.142, 535.5]","(-1.028, 257.0]",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,5.0,2.0,"(1.5, 3.0]",15.0,"(3.75, 5.0]",2.0,0.0,"(-0.223, 55.65]","(-0.0829, 20.725]"


In [18]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y)
encoded_y = label_encoder.transform(y)

In [19]:
for label, original_class in zip(encoded_y, y):
    print('Original Class: ' + str(original_class))
    print('Encoded Label: ' + str(label))
    print('-' * 12)

Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------


Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 1.0


Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0


Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0


Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------


Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------


------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2


------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2


Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0


Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0


Original Class: 1.0
Encoded Label: 1
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------


Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------


------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2


------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 1.0
Encoded Label: 1
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2
------------
Original Class: 2.0
Encoded Label: 2


In [20]:
X.head()


Unnamed: 0,CBD120,CBD130,DPQ010,DPQ020,DPQ030,DPQ040,DPQ050,DPQ060,DPQ070,DPQ080,...,DMDEDUC3,DMDMARTL,RIDEXPRG,DMDHHSIZ,INDHHIN2,INDFMPIR,DRQSDIET,DRQSDT1,BMXWT,BMXBMI
0,"(-2.142, 535.5]","(-1.028, 257.0]",1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,4.0,0.0,"(1.5, 3.0]",4.0,"(-0.005, 1.25]",2.0,0.0,"(55.65, 111.3]","(20.725, 41.45]"
1,"(-2.142, 535.5]","(-1.028, 257.0]",2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,"(3.0, 4.5]",0.0,"(1.25, 2.5]",2.0,0.0,"(55.65, 111.3]","(20.725, 41.45]"
2,"(-2.142, 535.5]","(-1.028, 257.0]",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,"(1.5, 3.0]",10.0,"(3.75, 5.0]",1.0,0.0,"(55.65, 111.3]","(20.725, 41.45]"
3,"(-2.142, 535.5]","(-1.028, 257.0]",2.0,1.0,0.0,3.0,3.0,0.0,0.0,0.0,...,0.0,1.0,0.0,"(1.5, 3.0]",15.0,"(3.75, 5.0]",1.0,0.0,"(-0.223, 55.65]","(-0.0829, 20.725]"
4,"(-2.142, 535.5]","(-1.028, 257.0]",3.0,3.0,3.0,3.0,3.0,1.0,2.0,1.0,...,0.0,3.0,0.0,"(-0.006, 1.5]",0.0,"(3.75, 5.0]",2.0,0.0,"(55.65, 111.3]","(41.45, 62.175]"


In [28]:
for col in X:
    k =label_encoder.fit(X[col])
    X[col] = label_encoder.transform(X[col])
    print(col)

CBD120
CBD130
DPQ010
DPQ020
DPQ030
DPQ040
DPQ050
DPQ060
DPQ070
DPQ080
DPQ090
DUQ211
DUQ240
FSQ012
FSQ162
HIQ011
HIQ031A
HIQ031B
HIQ031D
HIQ031F
HIQ031G
HIQ031H
HIQ031I
OCQ180
OCD270
PAD645
PAQ655
PAD660
PAQ665
SLD010H
SMQ040
SMD460
RIAGENDR
RIDAGEYR
RIDRETH1
DMDEDUC3
DMDMARTL
RIDEXPRG
DMDHHSIZ
INDHHIN2
INDFMPIR
DRQSDIET
DRQSDT1
BMXWT
BMXBMI


In [29]:
X.head()

Unnamed: 0,CBD120,CBD130,DPQ010,DPQ020,DPQ030,DPQ040,DPQ050,DPQ060,DPQ070,DPQ080,...,DMDEDUC3,DMDMARTL,RIDEXPRG,DMDHHSIZ,INDHHIN2,INDFMPIR,DRQSDIET,DRQSDT1,BMXWT,BMXBMI
0,0,0,1,0,0,0,0,0,0,0,...,0,4,0,1,4,0,2,0,3,1
1,0,0,2,0,0,0,0,0,0,0,...,0,1,0,2,0,1,2,0,3,1
2,0,0,0,0,0,0,0,0,0,0,...,0,1,0,1,8,3,1,0,3,1
3,0,0,2,1,0,3,3,0,0,0,...,0,1,0,1,12,3,1,0,0,0
4,0,0,3,3,3,3,3,1,2,1,...,0,3,0,0,0,3,2,0,3,2


In [30]:
Xcat = to_categorical(X)

In [33]:
Xsp =sparse.coo_matrix(X )


In [37]:
feat_sel = SelectKBest(k=15)

In [38]:
step1 = feat_sel.fit(X, encoded_y)

In [39]:
step1.get_support(indices=True)

array([ 2,  3,  4,  5,  6,  7,  8,  9, 15, 17, 18, 23, 30, 32, 33],
      dtype=int64)

In [40]:
X = step1.transform(X)

In [41]:
X_train, X_tv, y_train, y_tv = train_test_split(X, encoded_y, train_size=.7, random_state = 42 )



In [42]:
X_test, X_val, y_test, y_val = train_test_split(X_tv, y_tv, train_size = .7, random_state = 42)



In [43]:
model1 = LogisticRegression()

In [44]:
model1 = model1.fit(X_train, y_train)

In [45]:
predict = model1.predict(X_test)

In [46]:
print(model1.score(X_train, y_train))

0.7756410256410257


In [47]:
print(model1.score(X_test, y_test))

0.7842415316642121


In [48]:
print(classification_report(y_test, predict))

             precision    recall  f1-score   support

          0       0.00      0.00      0.00         1
          1       0.63      0.24      0.35       324
          2       0.80      0.96      0.87      1033

avg / total       0.76      0.78      0.75      1358



  'precision', 'predicted', average, warn_for)


In [49]:
model2 = SVC(kernel="rbf")

In [None]:
model2 = model2.fit(X_train, y_train)

In [None]:
predict2 = model2.predict(X_test)

In [None]:
print(classification_report(y_test, predict2))

In [50]:
step1.get_support(indices=True)

array([ 2,  3,  4,  5,  6,  7,  8,  9, 15, 17, 18, 23, 30, 32, 33],
      dtype=int64)

In [58]:
data.iloc[:,30:34]

Unnamed: 0,SLD010H,SMQ040,SMD460,RIAGENDR
0,"(-0.012, 3.0]",3.0,1.0,1
1,"(-0.012, 3.0]",2.0,2.0,1
2,"(6.0, 9.0]",3.0,0.0,1
3,"(-0.012, 3.0]",0.0,0.0,2
4,"(3.0, 6.0]",3.0,0.0,1
5,"(-0.012, 3.0]",0.0,0.0,2
6,"(-0.012, 3.0]",3.0,0.0,1
7,"(3.0, 6.0]",1.0,2.0,2
8,"(-0.012, 3.0]",1.0,1.0,1
9,"(6.0, 9.0]",0.0,0.0,2
