In [24]:
import numpy as np
from sklearn.svm import SVC
import json
import itertools
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

# Data Loading

In [2]:
fileObject = open("testing_data.json", "r")
jsonContent = fileObject.read()
aList = json.loads(jsonContent)
fileObject.close()

fileObject = open("training_data.json", "r")
jsonContent = fileObject.read()
bList = json.loads(jsonContent)
fileObject.close()

In [3]:
# training dataset
train_ids = []
train_data = []
train_labels = []

for i in range(len(bList['training_data'])):
    train_ids.append(bList['training_data'][i]['id'])
    datai = list(itertools.chain.from_iterable(bList['training_data'][i]['data']))
    train_data.append(datai)
    train_labels.append(list(bList['training_data'][i]['labels'].values()))

train_lst = train_data
train_pad = len(max(train_lst, key=len))
X_train = np.array([i + [0]*(train_pad-len(i)) for i in train_lst])

In [4]:
# test dataset
test_ids = []
test_data = []
test_labels = []

for i in range(len(aList['testing_data'])):
    test_ids.append(aList['testing_data'][i]['id'])
    datai = list(itertools.chain.from_iterable(aList['testing_data'][i]['data']))
    test_data.append(datai)
    test_labels.append(list(aList['testing_data'][i]['labels'].values()))
    
test_lst = test_data
test_pad = len(max(test_lst, key=len))
X_test = np.array([i + [0]*(train_pad-len(i)) for i in test_lst])

# SVM 

In [15]:
classes = ['abdominal', 'advanced-cad', 'alcohol-abuse', 'asp-for-mi', 'creatinine', 'dietsupp-2mos', 'drug-abuse', 'english', 'hba1c', 'keto-1yr', 'major-diabetes', 'makes-decisions', 'mi-6mos']

In [16]:
# create dictionary
d = {}

In [20]:
param_grid = {'C': [0.1, 1, 10], 
              'gamma': ['scale','auto'],
              'kernel': ['rbf']} 
  
grid = GridSearchCV(SVC(), param_grid, refit = True, verbose = 3)

svm = SVC(kernel='linear')

In [21]:
# for each class
for i in range(len(np.array(train_labels[0]))):
    
    y_train = np.array(train_labels)[:,i]
    
    svm.fit(X_train, y_train)
    
    y_pred = svm.predict(X_test)
    d.update({classes[i]:y_pred})

In [22]:
d

{'abdominal': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 'advanced-cad': array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
 'alcohol-abuse': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 'asp-for-mi': array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

# Performance

In [25]:
#f1 score
for i in range(len(np.array(train_labels[0]))):
    class_ = classes[i]
    y_true = np.array(test_labels)[:,i]
    y_pred = d[class_]
    f1 = f1_score(y_true, y_pred)
    print(class_, f1)

abdominal 0.0
advanced-cad 0.676923076923077
alcohol-abuse 0.0
asp-for-mi 0.8831168831168831
creatinine 0.3673469387755102
dietsupp-2mos 0.0
drug-abuse 0.0
english 0.9182389937106918
hba1c 0.0
keto-1yr 0.0
major-diabetes 0.0
makes-decisions 0.9822485207100593
mi-6mos 0.0


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


In [26]:
#accuracy score
for i in range(len(np.array(train_labels[0]))):
    class_ = classes[i]
    y_true = np.array(test_labels)[:,i]
    y_pred = d[class_]
    accuracy = accuracy_score(y_true, y_pred)
    print(class_, accuracy)

abdominal 0.6511627906976745
advanced-cad 0.5116279069767442
alcohol-abuse 0.9651162790697675
asp-for-mi 0.7906976744186046
creatinine 0.6395348837209303
dietsupp-2mos 0.4883720930232558
drug-abuse 0.9651162790697675
english 0.8488372093023255
hba1c 0.5930232558139535
keto-1yr 1.0
major-diabetes 0.5
makes-decisions 0.9651162790697675
mi-6mos 0.9069767441860465
