In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier 
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split


df = pd.read_table('training_data.txt', sep='\t', skiprows=1, header=None)
subject = df.iloc[:, 0]
activity = df.iloc[:, 1]

df = df.iloc[:, 2:]
X = df.values
y = activity.values

y[y>=7]=7

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=123)

df = pd.read_table('test_data.txt', sep='\t', skiprows=1, header=None) 
subject = df.iloc[:, 0]
df = df.iloc[:, 1:]
X_test = df.values

In [None]:
from sklearn.svm import SVC

clf = SVC()
clf.fit(X_train, y_train)
accuracy = clf.score(X_train, y_train)
print(f"Train Data Accuracy: {accuracy}")
accuracy = clf.score(X_val, y_val)
print(f"Validation Data Accuracy: {accuracy}")

Train Data Accuracy: 0.9784323193304362
Validation Data Accuracy: 0.9742599742599742


In [None]:
# Use GridSearchCV Tuning Parameters

In [None]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
import warnings

warnings.filterwarnings("ignore")


tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
                     'C': [1, 10, 100, 1000]},
                    {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]

# Use GridSearchCV to get the best SVC() model
clf = GridSearchCV(SVC(), tuned_parameters, cv=5,
                   scoring='accuracy')
# fit clf
clf.fit(X_train, y_train)

print("Best parameters set found on development set:")
print()

# Get best params
print(clf.best_params_)

print()
print("Grid scores on development set:")
print()
means = clf.cv_results_['mean_test_score']
stds = clf.cv_results_['std_test_score']

# print scores for every model
for mean, std, params in zip(means, stds, clf.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r"
          % (mean, std * 2, params))


Best parameters set found on development set:

{'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}

Grid scores on development set:

0.923 (+/-0.025) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.748 (+/-0.007) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.975 (+/-0.009) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.924 (+/-0.025) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.984 (+/-0.008) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.975 (+/-0.009) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.985 (+/-0.004) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.982 (+/-0.009) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.982 (+/-0.006) for {'C': 1, 'kernel': 'linear'}
0.981 (+/-0.004) for {'C': 10, 'kernel': 'linear'}
0.978 (+/-0.003) for {'C': 100, 'kernel': 'linear'}
0.978 (+/-0.003) for {'C': 1000, 'kernel': 'linear'}


In [None]:
accuracy = clf.score(X_train, y_train)
print(f"Train Data Accuracy: {accuracy}")
accuracy = clf.score(X_val, y_val)
print(f"Validation Data Accuracy: {accuracy}")


Train Data Accuracy: 0.9966199903428296
Validation Data Accuracy: 0.989060489060489
