# Workload

## Import dependency

In [1]:
import pandas
import random
import numpy as np
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

## Prepare data

In [2]:
data_raw = pandas.read_csv("../data/workload.csv")
data = np.array(data_raw)

random.shuffle(data)

# 137 = 136 features + 1 label
data_features = data[:,:-1]
data_labels = np.concatenate(data[:,-1:])

for i in xrange(data_features.shape[1]):
    d_min = data_features[:, i].min()
    d_max = data_features[:, i].max()
    if d_min == d_max:
        data_features[:, i] = 1
        continue
    data_features[:, i] -= d_max
    data_features[:, i] /= d_max

data_labels *= 10

data_train = data_features[:480, :]
class_train = data_labels[:480]

data_test = data_features[480:, :]
class_test = data_labels[480:]

## Algorithm score

In [3]:
classifiers = [
    KNeighborsClassifier(4),
    svm.SVC(kernel="linear", C=0.025),
    svm.SVC(),
    DecisionTreeClassifier(max_depth=3),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1)
    ]
names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Decision Tree",
         "Random Forest"]

for name, clf in zip(names,classifiers):
    clf.fit(data_train,class_train)
    print name
#     print "train: {}".format(clf.score(data_train, class_train))
#     y_true = class_train
#     y_pred = clf.predict(data_train)
#     print classification_report(y_true, y_pred)
    
    print "test: {}".format(clf.score(data_test, class_test))
    y_true = class_test
    y_pred = clf.predict(data_test)
    print classification_report(y_true, y_pred)
    print '\n'

Nearest Neighbors
test: 0.758333333333
             precision    recall  f1-score   support

        5.0       0.66      1.00      0.79        27
       10.0       0.87      1.00      0.93        26
       15.0       1.00      1.00      1.00        27
       20.0       0.50      0.44      0.47        25
       25.0       0.00      0.00      0.00        15

avg / total       0.67      0.76      0.70       120



Linear SVM
test: 0.408333333333
             precision    recall  f1-score   support

        5.0       0.26      0.85      0.39        27
       10.0       0.00      0.00      0.00        26
       15.0       0.87      0.96      0.91        27
       20.0       0.00      0.00      0.00        25
       25.0       0.00      0.00      0.00        15

avg / total       0.25      0.41      0.29       120



RBF SVM
test: 0.408333333333
             precision    recall  f1-score   support

        5.0       0.26      0.85      0.39        27
       10.0       0.00      0.00      0.0

  'precision', 'predicted', average, warn_for)
