#### Import required python modules 

In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from sklearn.manifold import TSNE

import matplotlib.pyplot as plt

#### Read the data from csv file into DataFrame 

In [2]:
df = pd.read_csv("data.csv")

  interactivity=interactivity, compiler=compiler, result=result)


#### Understand the data

In [3]:
df.dtypes

user_name                   object
raw_timestamp_part_1         int64
raw_timestamp_part_2         int64
cvtd_timestamp              object
new_window                  object
num_window                   int64
roll_belt                  float64
pitch_belt                 float64
yaw_belt                   float64
total_accel_belt             int64
kurtosis_roll_belt          object
kurtosis_picth_belt         object
kurtosis_yaw_belt           object
skewness_roll_belt          object
skewness_roll_belt.1        object
skewness_yaw_belt           object
max_roll_belt              float64
max_picth_belt             float64
max_yaw_belt                object
min_roll_belt              float64
min_pitch_belt             float64
min_yaw_belt                object
amplitude_roll_belt        float64
amplitude_pitch_belt       float64
amplitude_yaw_belt          object
var_total_accel_belt       float64
avg_roll_belt              float64
stddev_roll_belt           float64
var_roll_belt       

In [4]:
df.shape

(19622, 159)

In [5]:
df.head()

Unnamed: 0,user_name,raw_timestamp_part_1,raw_timestamp_part_2,cvtd_timestamp,new_window,num_window,roll_belt,pitch_belt,yaw_belt,total_accel_belt,...,gyros_forearm_x,gyros_forearm_y,gyros_forearm_z,accel_forearm_x,accel_forearm_y,accel_forearm_z,magnet_forearm_x,magnet_forearm_y,magnet_forearm_z,classe
0,carlitos,1323084231,788290,05-12-2011 11:23,no,11,1.41,8.07,-94.4,3,...,0.03,0.0,-0.02,192,203,-215,-17,654.0,476.0,A
1,carlitos,1323084231,808298,05-12-2011 11:23,no,11,1.41,8.07,-94.4,3,...,0.02,0.0,-0.02,192,203,-216,-18,661.0,473.0,A
2,carlitos,1323084231,820366,05-12-2011 11:23,no,11,1.42,8.07,-94.4,3,...,0.03,-0.02,0.0,196,204,-213,-18,658.0,469.0,A
3,carlitos,1323084232,120339,05-12-2011 11:23,no,12,1.48,8.05,-94.4,3,...,0.02,-0.02,0.0,189,206,-214,-16,658.0,469.0,A
4,carlitos,1323084232,196328,05-12-2011 11:23,no,12,1.48,8.07,-94.4,3,...,0.02,0.0,-0.02,189,206,-214,-17,655.0,473.0,A


##### Handline missing values

Count the number of missing elements (NaN) in each column

In [5]:
counter_nan = df.isnull().sum()
counter_nan

user_name                      0
raw_timestamp_part_1           0
raw_timestamp_part_2           0
cvtd_timestamp                 0
new_window                     0
num_window                     0
roll_belt                      0
pitch_belt                     0
yaw_belt                       0
total_accel_belt               0
kurtosis_roll_belt         19216
kurtosis_picth_belt        19216
kurtosis_yaw_belt          19216
skewness_roll_belt         19216
skewness_roll_belt.1       19216
skewness_yaw_belt          19216
max_roll_belt              19216
max_picth_belt             19216
max_yaw_belt               19216
min_roll_belt              19216
min_pitch_belt             19216
min_yaw_belt               19216
amplitude_roll_belt        19216
amplitude_pitch_belt       19216
amplitude_yaw_belt         19216
var_total_accel_belt       19216
avg_roll_belt              19216
stddev_roll_belt           19216
var_roll_belt              19216
avg_pitch_belt             19216
          

In [6]:
counter_without_nan = counter_nan[counter_nan==0]

In [7]:
counter_without_nan

user_name               0
raw_timestamp_part_1    0
raw_timestamp_part_2    0
cvtd_timestamp          0
new_window              0
num_window              0
roll_belt               0
pitch_belt              0
yaw_belt                0
total_accel_belt        0
gyros_belt_x            0
gyros_belt_y            0
gyros_belt_z            0
accel_belt_x            0
accel_belt_y            0
accel_belt_z            0
magnet_belt_x           0
magnet_belt_y           0
magnet_belt_z           0
roll_arm                0
pitch_arm               0
yaw_arm                 0
total_accel_arm         0
gyros_arm_x             0
gyros_arm_y             0
gyros_arm_z             0
accel_arm_x             0
accel_arm_y             0
accel_arm_z             0
magnet_arm_x            0
magnet_arm_y            0
magnet_arm_z            0
roll_dumbbell           0
pitch_dumbbell          0
yaw_dumbbell            0
total_accel_dumbbell    0
gyros_dumbbell_x        0
gyros_dumbbell_y        0
gyros_dumbbe

In [8]:
counter_without_nan.keys()

Index([u'user_name', u'raw_timestamp_part_1', u'raw_timestamp_part_2',
       u'cvtd_timestamp', u'new_window', u'num_window', u'roll_belt',
       u'pitch_belt', u'yaw_belt', u'total_accel_belt', u'gyros_belt_x',
       u'gyros_belt_y', u'gyros_belt_z', u'accel_belt_x', u'accel_belt_y',
       u'accel_belt_z', u'magnet_belt_x', u'magnet_belt_y', u'magnet_belt_z',
       u'roll_arm', u'pitch_arm', u'yaw_arm', u'total_accel_arm',
       u'gyros_arm_x', u'gyros_arm_y', u'gyros_arm_z', u'accel_arm_x',
       u'accel_arm_y', u'accel_arm_z', u'magnet_arm_x', u'magnet_arm_y',
       u'magnet_arm_z', u'roll_dumbbell', u'pitch_dumbbell', u'yaw_dumbbell',
       u'total_accel_dumbbell', u'gyros_dumbbell_x', u'gyros_dumbbell_y',
       u'gyros_dumbbell_z', u'accel_dumbbell_x', u'accel_dumbbell_y',
       u'accel_dumbbell_z', u'magnet_dumbbell_x', u'magnet_dumbbell_y',
       u'magnet_dumbbell_z', u'roll_forearm', u'pitch_forearm', u'yaw_forearm',
       u'total_accel_forearm', u'gyros_forearm_x'

Remove the columns with missing elements

In [9]:
df = df[counter_without_nan.keys()]

Remove the first 6 columns which contain no discriminative information

In [10]:
df = df.iloc[:,6:]

List of columns. The last column is the class label

In [12]:
df.columns

Index(['roll_belt', 'pitch_belt', 'yaw_belt', 'total_accel_belt',
       'gyros_belt_x', 'gyros_belt_y', 'gyros_belt_z', 'accel_belt_x',
       'accel_belt_y', 'accel_belt_z', 'magnet_belt_x', 'magnet_belt_y',
       'magnet_belt_z', 'roll_arm', 'pitch_arm', 'yaw_arm', 'total_accel_arm',
       'gyros_arm_x', 'gyros_arm_y', 'gyros_arm_z', 'accel_arm_x',
       'accel_arm_y', 'accel_arm_z', 'magnet_arm_x', 'magnet_arm_y',
       'magnet_arm_z', 'roll_dumbbell', 'pitch_dumbbell', 'yaw_dumbbell',
       'total_accel_dumbbell', 'gyros_dumbbell_x', 'gyros_dumbbell_y',
       'gyros_dumbbell_z', 'accel_dumbbell_x', 'accel_dumbbell_y',
       'accel_dumbbell_z', 'magnet_dumbbell_x', 'magnet_dumbbell_y',
       'magnet_dumbbell_z', 'roll_forearm', 'pitch_forearm', 'yaw_forearm',
       'total_accel_forearm', 'gyros_forearm_x', 'gyros_forearm_y',
       'gyros_forearm_z', 'accel_forearm_x', 'accel_forearm_y',
       'accel_forearm_z', 'magnet_forearm_x', 'magnet_forearm_y',
       'magnet_forea

Extract all independent features and convert them into numpy array(s)

In [11]:
x = df.iloc[:,:-1].values

Standardize the data

In [12]:
standard_scaler = StandardScaler()

x_std = standard_scaler.fit_transform(x)

Get class labels y 

In [13]:
y = df.iloc[:,-1].values

Encode the class label it into number

In [14]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

##### Split the data into training set and test set

In [15]:
test_percentage = 0.1
x_train, x_test, y_train, y_test = train_test_split(x_std, y, test_size = test_percentage, random_state = 0)

##### t-distributed Stochastic Neighbor Embedding (t-SNE) visualization

In [None]:
tsne = TSNE(n_components=2, random_state=0)
x_test_2d = tsne.fit_transform(x_test)

Scatter plot the sample points among 5 classes

In [None]:
markers=('s', 'd', 'o', '^', 'v')
color_map = {0:'red', 1:'blue', 2:'lightgreen', 3:'purple', 4:'cyan'}

plt.figure()
for idx, cl in enumerate(np.unique(y_test)):
    plt.scatter(x=x_test_2d[y_test==cl,0], y=x_test_2d[y_test==cl,1], c=color_map[idx], marker=markers[idx], label=cl)
plt.xlabel('X in t-SNE')
plt.ylabel('Y in t-SNE')
plt.legend(loc='upper left')
plt.title('t-SNE visualization of test data')
plt.show()

Ref

    https://github.com/llSourcell