In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


#### Task 1. Load the kinematics dataset as measured on mobile sensors from the file “run_or_walk.csv.”

In [2]:
data_df = pd.read_csv("run_or_walk.csv")
data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 88588 entries, 0 to 88587
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   date            88588 non-null  object 
 1   time            88588 non-null  object 
 2   username        88588 non-null  object 
 3   wrist           88588 non-null  int64  
 4   activity        88588 non-null  int64  
 5   acceleration_x  88588 non-null  float64
 6   acceleration_y  88588 non-null  float64
 7   acceleration_z  88588 non-null  float64
 8   gyro_x          88588 non-null  float64
 9   gyro_y          88588 non-null  float64
 10  gyro_z          88588 non-null  float64
dtypes: float64(6), int64(2), object(3)
memory usage: 6.4+ MB


#### Task 2. List the columns in the dataset.

In [3]:
data_df.columns.to_list()

['date',
 'time',
 'username',
 'wrist',
 'activity',
 'acceleration_x',
 'acceleration_y',
 'acceleration_z',
 'gyro_x',
 'gyro_y',
 'gyro_z']

#### Task 3. Let the target variable “y” be the activity, and assign all the columns after it to “x”.

In [4]:
data_df.sample(n=5)

Unnamed: 0,date,time,username,wrist,activity,acceleration_x,acceleration_y,acceleration_z,gyro_x,gyro_y,gyro_z
39063,2017-7-16,15:43:9:796065986,viktor,1,0,-0.2208,-1.2577,-0.2193,1.1632,-0.9508,-1.3319
62696,2017-7-2,19:59:20:40491998,viktor,0,0,0.4621,-0.9777,-0.1684,0.9861,0.879,2.3149
27803,2017-7-16,14:31:45:848020970,viktor,1,0,-0.4736,-1.5287,-0.3163,1.8043,0.3988,-2.0916
49500,2017-7-17,19:58:1:485530972,viktor,1,1,-0.6107,0.7065,-0.0779,-1.1297,-0.8804,-0.5607
31290,2017-7-16,14:50:22:437716007,viktor,1,0,-0.4173,-1.5657,-0.4488,-1.4357,0.0857,1.1656


In [5]:
X = data_df.iloc[:,5:]
Y = data_df.iloc[:,4]

In [6]:
X[0:4]

Unnamed: 0,acceleration_x,acceleration_y,acceleration_z,gyro_x,gyro_y,gyro_z
0,0.265,-0.7814,-0.0076,-0.059,0.0325,-2.9296
1,0.6722,-1.1233,-0.2344,-0.1757,0.0208,0.1269
2,0.4399,-1.4817,0.0722,-0.9105,0.1063,-2.4367
3,0.3031,-0.8125,0.0888,0.1199,-0.4099,-2.9336


In [7]:
Y[0:4]

0    0
1    0
2    0
3    0
Name: activity, dtype: int64

In [8]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=5)
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((62011, 6), (26577, 6), (62011,), (26577,))

#### Task 4. Using Scikit-learn, fit a Gaussian Naive Bayes model and observe the accuracy.

In [9]:
gnb = GaussianNB()
gnb.fit(x_train, y_train)

y_predict = gnb.predict(x_test)

In [10]:
gnb_acc = accuracy_score(y_test, y_predict)
print("Model 1 Accuracy: {:0.2f}%.".format(gnb_acc * 100))

Model 1 Accuracy: 95.63%.


In [11]:
confusion_matrix(y_test, y_predict)

array([[13111,   152],
       [ 1009, 12305]], dtype=int64)

#### Task 5. Generate a classification report using Scikit-learn.

In [12]:
print(classification_report(y_test, y_predict))

              precision    recall  f1-score   support

           0       0.93      0.99      0.96     13263
           1       0.99      0.92      0.95     13314

    accuracy                           0.96     26577
   macro avg       0.96      0.96      0.96     26577
weighted avg       0.96      0.96      0.96     26577



#### Task 6.1. Repeat the model once using only the acceleration values as predictors.

In [13]:
data_df.columns.to_list()

['date',
 'time',
 'username',
 'wrist',
 'activity',
 'acceleration_x',
 'acceleration_y',
 'acceleration_z',
 'gyro_x',
 'gyro_y',
 'gyro_z']

In [14]:
x_acceleration = data_df[[ 'acceleration_x', 'acceleration_y', 'acceleration_z']]
y_acceleration = data_df.iloc[:,4]

In [15]:
print(x_acceleration[0:4])
print("=============================================")
print(y_acceleration[0:4])

   acceleration_x  acceleration_y  acceleration_z
0          0.2650         -0.7814         -0.0076
1          0.6722         -1.1233         -0.2344
2          0.4399         -1.4817          0.0722
3          0.3031         -0.8125          0.0888
0    0
1    0
2    0
3    0
Name: activity, dtype: int64


In [16]:
x_train_acc, x_test_acc, y_train_acc, y_test_acc = train_test_split(x_acceleration, y_acceleration, test_size=0.3, random_state=5)
x_train_acc.shape, x_test_acc.shape, y_train_acc.shape, y_test_acc.shape

((62011, 3), (26577, 3), (62011,), (26577,))

In [17]:
gnb_acc = GaussianNB()
gnb_acc.fit(x_train_acc, y_train_acc)

y_pred_acc = gnb_acc.predict(x_test_acc)

In [18]:
gnb_acc_racy = accuracy_score(y_test_acc, y_pred_acc)
print("Accuracy with only acceleration: {:0.2F}%.".format(gnb_acc_racy*100))

Accuracy with only acceleration: 95.82%.


#### Task 6.2. Repeat the model once using only the gyro values as predictors.

In [19]:
data_df.columns.to_list()

['date',
 'time',
 'username',
 'wrist',
 'activity',
 'acceleration_x',
 'acceleration_y',
 'acceleration_z',
 'gyro_x',
 'gyro_y',
 'gyro_z']

In [20]:
x_gyro = data_df[['gyro_x', 'gyro_y', 'gyro_z']]
y_gyro = data_df.iloc[:,4]

In [21]:
x_train_gyro, x_test_gyro, y_train_gyro, y_test_gyro = train_test_split(x_gyro, y_gyro, test_size=0.3, random_state=5)
x_train_gyro.shape, x_test_gyro.shape, y_train_gyro.shape, y_test_gyro.shape

((62011, 3), (26577, 3), (62011,), (26577,))

In [22]:
gnb_gyro = GaussianNB()
gnb_gyro.fit(x_train_gyro, y_train_gyro)

y_pred_gyro = gnb_gyro.predict(x_test_gyro)

In [23]:
gyro_acc = accuracy_score(y_pred_gyro, y_test_gyro)
print("Accuracy with only gyro: {:0.2F}%.".format(gyro_acc*100))

Accuracy with only gyro: 64.61%.


## Observations:
#### Accuracy with both acceleration and gyro readings: 95.63%.
#### Accuracy with only acceleration readings: 95.82%.
#### Accuracy with both acceleration and gyro readings: 64.61%.

### We have seen that gyro (direction) isn't important in prediction of activity of walking or running. When we used only acceleration it improved the prediction which is obvious to understand.