In [1]:
from __future__ import print_function
from matplotlib import pyplot as plt
%matplotlib inline
import numpy as np
import sys
np.set_printoptions(threshold=sys.maxsize)
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn import tree
import seaborn as sns
from scipy import stats
from IPython.display import display, HTML
import sklearn.metrics as metrics
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report

In [2]:
# Set some standard parameters upfront
pd.options.display.float_format = '{:.1f}'.format
sns.set() # Default seaborn look and feel
plt.style.use('ggplot')
# Same labels will be reused throughout the program
LABELS = ['[1]Sitting-on-Bed',
          '[2]Sitting-on-Chair',
          '[3]Lying-Down',
          '[4]Ambulating']

In [3]:
def read_data(file_path):
    
    pd.options.display.float_format = '{:,.4f}'.format

    column_names = ['experiment-id',
                    'time',
                    'frontal-acceleration',
                    'vertical-acceleration',
                    'lateral-acceleration',
                    'antenna-id',
                    'RSSI',
                    'phase',
                    'frequency',
                    'label']
    
    df = pd.read_csv(file_path,
                     header=None,
                     names=column_names)

    # This is very important otherwise the model will not fit and loss
    # will show up as NAN
    df.dropna(axis=0, how='any', inplace=True)

    return df

def convert_to_float(x):

    try:
        return np.float(x)
    except:
        return np.nan
 
def show_basic_dataframe_info(dataframe):

    # Shape and how many rows and columns
    print('Number of columns in the dataframe: %i' % (dataframe.shape[1]))
    print('Number of rows in the dataframe: %i\n' % (dataframe.shape[0]))
    

# Load data set containing all the data

df = read_data('/Users/leono/Documents/MIEIC/IART/cart_tutorial/tutorials-master/test_file_S1.txt')

FileNotFoundError: [Errno 2] File b'/Users/leono/Documents/MIEIC/IART/cart_tutorial/tutorials-master/test_file_S1.txt' does not exist: b'/Users/leono/Documents/MIEIC/IART/cart_tutorial/tutorials-master/test_file_S1.txt'

In [None]:
# Describe the data
show_basic_dataframe_info(df)
df.head(20)

In [None]:
# Column labels.
# These are used only to print the tree.
header = ["experiment-id",
                    "time",
                    "frontal-acceleration",
                    "vertical-acceleration",
                    "lateral-acceleration",
                    "antenna-id",
                    "RSSI",
                    "phase",
                    "frequency",
                    "label"]

In [None]:
df['label'].value_counts().plot(kind='bar',
                                   title='Training Examples by Activity Type')

plt.show()

for i in LABELS:
    print(i)

In [None]:
X = df.values[:, 0:8]
Y = df.values[:,9]

print(Y)

#Splitting dataset into train and test
X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size = 0.2, shuffle=False)
print (X_train.shape, y_train.shape)
print (X_test.shape, y_test.shape)

In [None]:
clf_gini = DecisionTreeClassifier(criterion = "gini", random_state = 100,
                               max_depth=3, min_samples_leaf=5)
clf_gini.fit(X_train, Y_train)

In [None]:
Y_test

In [None]:
#clf_gini.predict([[4, 4, 3, 3]])
Y_pred = clf_gini.predict(X_test)
Y_pred

In [None]:
accuracy_score(Y_test,Y_pred)*100

In [None]:
def show_confusion_matrix(validations, predictions):

    matrix = metrics.confusion_matrix(validations, predictions)
    plt.figure(figsize=(6, 4))
    sns.heatmap(matrix,
                cmap='coolwarm',
                linecolor='white',
                linewidths=1,
                xticklabels=LABELS,
                yticklabels=LABELS,
                annot=True,
                fmt='d')
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.show()

# Take the class with the highest probability from the test predictions
#max_y_pred_test = np.argmax(Y_pred, axis=1)
#max_y_test = np.argmax(Y_test, axis=1)

show_confusion_matrix(Y_test, Y_pred)

print(classification_report(Y_test, Y_pred))