In [1]:
import numpy as np
from sklearn import preprocessing
from sklearn.svm import SVC

In [2]:
# Reading the data
input_file = 'building_event_multiclass.txt'
X = []
count = 0
with open(input_file, 'r') as f:
    for line in f.readlines():
        data = line[:-1].split(',')
        # 'Day of Week', 'Date', 'Time', 'Num Out', 'Num In', 'EventOrNot'
        # No need for the date (data[1])
        X.append([data[0]] + data[2:])
X = np.array(X)

In [3]:
# Convert string data to numeric data
label_encoder = []
X_encoded = np.empty(X.shape)
for i,item in enumerate(X[0]):
    if item.isdigit():
        X_encoded[:, i] = X[:, i]
    else:
        label_encoder.append(preprocessing.LabelEncoder())
        X_encoded[:, i] = label_encoder[-1].fit_transform(X[:, i])
X = X_encoded[:, :-1].astype(int)
y = X_encoded[:, -1].astype(int)

In [4]:
# Build the SVM (Support Vector Machine)
params = {'kernel': 'rbf', 'probability': True, 'class_weight': 'balanced'}
classifier = SVC(**params)
classifier.fit(X, y)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight='balanced', coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [5]:
# Cross validation
from sklearn import model_selection

accuracy = model_selection.cross_val_score(classifier, X, y, scoring='accuracy', cv=3)
print ("Accuracy of the classifier:  {}%".format(round(100*accuracy.mean(),2)))

Accuracy of the classifier:  58.51%


In [6]:
# Testing with a new datapoint
input_data = ['Tuesday', '12:30:00', '21', '32']
input_data_encoded = [-1] * len(input_data)
count = 0
for i, item in enumerate(input_data):
    if item.isdigit():
        input_data_encoded[i] = int(input_data[i])
    else:
        input_data_encoded[i] = int(label_encoder[count].transform([input_data[i]]))
        count = count + 1
input_data_encoded = np.array(input_data_encoded)

In [7]:
# Predict and print output for a particular datapoint
output_class = classifier.predict(input_data_encoded.reshape(1, -1))
print ("Output class: ", label_encoder[-1].inverse_transform(output_class))

Output class:  ['eventB']
