In [1]:
#DTW calculation

import pandas as pd
import numpy as np
import os
import glob 
import pickle
from math import *
import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns
from pylab import rcParams
from sklearn import metrics
from sklearn.model_selection import train_test_split
from scipy.ndimage import gaussian_filter
from scipy.fftpack import fft
from dtw import dtw
from numpy import array, zeros, argmin, inf, ndim
from scipy.spatial.distance import cdist
from sklearn.metrics.pairwise import manhattan_distances
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import cross_val_score
from sklearn.metrics import accuracy_score
# from quatrotate import qv_mult # this routine implements rotation via quaternion multiplication

%matplotlib inline



In [186]:
def find_contiguous_colors(colors):
    # finds the continuous segments of colors and returns those segments
    segs = []
    curr_seg = []
    prev_color = ''
    for c in colors:
        if c == prev_color or prev_color == '':
            curr_seg.append(c)
        else:
            segs.append(curr_seg)
            curr_seg = []
            curr_seg.append(c)
        prev_color = c
    segs.append(curr_seg) # the final one
    return segs
 
def plot_multicolored_lines(x,y,colors):
    segments = find_contiguous_colors(colors)
    plt.figure(figsize=(20,4))
    start= 0
    for seg in segments:
        end = start + len(seg)
        l, = plt.gca().plot(x[start-1:end+1],y[start-1:end+1],lw=2,c=seg[0]) 
        start = end

In [187]:
# Function to map the colors as a list from the input list of x variables
def color_mapping(lst):
    cols=[]
    colors = {'ST':'blue', 'LT':'red','RT':'green'}
    for l in lst:
        cols.append(colors[l])      
    return cols

In [188]:
def GaussianFilter(df, window_length):
    df1 = df
    
    df1['AX'] = gaussian_filter(df['AX'], window_length)
    df1['AY'] = gaussian_filter(df['AY'], window_length)
    df1['AZ'] = gaussian_filter(df['AZ'], window_length)

    df1['GX'] = gaussian_filter(df['GX'], window_length)
    df1['GY'] = gaussian_filter(df['GY'], window_length)
    df1['GZ'] = gaussian_filter(df['GZ'], window_length)
    
    df1['AX1'] = gaussian_filter(df['AX1'], window_length)
    df1['AY1'] = gaussian_filter(df['AY1'], window_length)
    df1['AZ1'] = gaussian_filter(df['AZ1'], window_length)

    df1['GX1'] = gaussian_filter(df['GX1'], window_length)
    df1['GY1'] = gaussian_filter(df['GY1'], window_length)
    df1['GZ1'] = gaussian_filter(df['GZ1'], window_length)
    return df1

In [189]:
def Amplitude(df, input_features, out_feature):
    df[out_feature] = (df[input_features[0]]**2 + df[input_features[1]]**2  + df[input_features[2]]**2)**(1/2)
  
    

In [190]:
def Yaw_Roll_Pitch(df):
    roll  = []
    pitch = []
    yaw   = []
    for i in range(len(df)):
        x = df['AX'][i]
        y = df['AY'][i]
        z = df['AZ'][i]
        roll1 = atan(y/z)*57.3
        pitch1= atan((-x/(y*y + z*z)**(1/2)))*57.3
        yaw1  = atan((z/((x*x + z*z)**(1/2))))*57.3
        
        roll.append(roll1)
        pitch.append(pitch1)
        yaw.append(yaw1)
    
    df['orientX'] = pitch
    df['orientY'] = roll
    df['orientZ'] = yaw

In [191]:
def getQuat(df):

        """ Given 3 orientation angles, compute the quaternion. """

        yaw   = df['orientZ'] / 2. * np.pi / 180
        roll  = df['orientX'] / 2. * np.pi / 180
        pitch = df['orientY'] / 2. * np.pi / 180

        w =  np.cos(roll) * np.cos(pitch) * np.cos(yaw) + \
                np.sin(roll) * np.sin(pitch) * np.sin(yaw)

        x =  np.sin(roll) * np.cos(pitch) * np.cos(yaw) - \
                np.cos(roll) * np.sin(pitch) * np.sin(yaw)

        y =  np.cos(roll) * np.sin(pitch) * np.cos(yaw) + \
                np.sin(roll) * np.cos(pitch) * np.sin(yaw)

        z =  np.cos(roll) * np.cos(pitch) * np.sin(yaw) - \
                np.sin(roll) * np.sin(pitch) * np.cos(yaw)

        return w, x, y, z

In [192]:
"""
Rotate a 3D vector using the axis-angle method (quaternions).
"""

import numpy as np


def normalize(v, tolerance=0.00001):
    mag2 = sum(n * n for n in v)
    if abs(mag2 - 1.0) > tolerance:
        mag = np.sqrt(mag2)
        v = tuple(n / mag for n in v)
    return v

def q_mult(q1, q2):
    w1, x1, y1, z1 = q1
    w2, x2, y2, z2 = q2
    w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2
    x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2
    y = w1 * y2 + y1 * w2 + z1 * x2 - x1 * z2
    z = w1 * z2 + z1 * w2 + x1 * y2 - y1 * x2
    return w, x, y, z

def q_conjugate(q):
    w, x, y, z = q
    return (w, -x, -y, -z)

def qv_mult(q1, v1):
    q2 = (0.0,) + v1
    return q_mult(q_mult(q1, q2), q_conjugate(q1))[1:]

def axisangle_to_q(v, theta):
    v = normalize(v)
    x, y, z = v
    theta /= 2
    w = np.cos(theta)
    x = x * np.sin(theta)
    y = y * np.sin(theta)
    z = z * np.sin(theta)
    return w, x, y, z

def q_to_axisangle(q):
    w, v = q[0], q[1:]
    theta = np.acos(w) * 2.0
    return normalize(v), theta

In [193]:
def get_rotation(quatern, vector):
    rotaedvector = []
    for i in range(vector.shape[0]):
        rotaedvector.append(qv_mult(tuple(quatern[i,:]), tuple(vector[i,:])))
    return np.array(rotaedvector)

In [194]:
def rotate(df):
    features = ['A', 'G']
    quaternion = df[['quaternionW', 'quaternionX',
                     'quaternionY','quaternionZ']].values
    
    for f in features:
        xyzlist = [f + 'X', f + 'Y', f + 'Z']
        xyz = df[xyzlist].values
        xyz_rotated = get_rotation(quaternion, xyz)
        df[f + 'X1'] = xyz_rotated[:,0]
        df[f + 'Y1'] = xyz_rotated[:,1]
        df[f + 'Z1'] = xyz_rotated[:,2]
        

In [195]:
#generate frequency domain data from time domain data usinf Fast Fourier transformation

def FFT(df, input_features, output_features):
    
    for i in range(len(input_features)):
        reals = np.real(np.fft.rfft(df[input_features[i]]))
        imagn = np.imag(np.fft.rfft(df[input_features[i]]))

        complexs = [reals[0]]
        n = len(reals)
        if(n%2 == 0):
            complexs.append(imagn[0])
        for j in range(1, n-1):
            complexs.append(reals[j])
            complexs.append(imagn[j])
        if( len(df) > len(complexs)):
            complexs.append(reals[j])
        if( len(df) > len(complexs)):
            complexs.append(imagn[j])
            
        df[output_features[i]] = complexs
    return df

In [196]:
def segmentation(df, N_TIME_STEPS):
    
    N_FEATURES = 1
    step = 50
    segments = []
    labels = []
    for i in range(0, len(df) - N_TIME_STEPS, step):
        gx = df['GX1'].values[i: i + N_TIME_STEPS]
        gy = df['GY1'].values[i: i + N_TIME_STEPS]
        gz = df['GZ1'].values[i: i + N_TIME_STEPS]
        
        ax = df['AX1'].values[i: i + N_TIME_STEPS]
        ay = df['AY1'].values[i: i + N_TIME_STEPS]
        az = df['AZ1'].values[i: i + N_TIME_STEPS]
        
        fax = df['fAX'].values[i: i + N_TIME_STEPS]
        fay = df['fAY'].values[i: i + N_TIME_STEPS]
        faz = df['fAZ'].values[i: i + N_TIME_STEPS]
        
        ma = df['mAcc'].values[i: i + N_TIME_STEPS]
        mz = df['mGyro'].values[i: i + N_TIME_STEPS]
        label = "ST"
        segments.append([gx, gy, gz, ax, ay, az, ma, mz, fax, fay, faz])
#         labels = stats.mode(df['activity'][i: i + N_TIME_STEPS])[0][0]
        labels.append(label)
    return segments, labels

In [197]:
def DTWDistance(s1, s2, w):
    DTW={}

    w = max(w, abs(len(s1)-len(s2)))

    for i in range(-1,len(s1)):
        for j in range(-1,len(s2)):
            DTW[(i, j)] = float('inf')
    DTW[(-1, -1)] = 0

    for i in range(len(s1)):
        for j in range(max(0, i-w), min(len(s2), i+w)):
            dist= (s1[i]-s2[j])**2
            DTW[(i, j)] = dist + min(DTW[(i-1, j)],DTW[(i, j-1)], DTW[(i-1, j-1)])

    return (DTW[len(s1)-1, len(s2)-1])**(1/2)

In [198]:
def loss_for_segments(segments, comp, feature):
    loss =[]
    labels=[]
    length = len(segments)
    for i in range(length):
        x = segments[i][feature]        
        dist = DTWDistance(x, comp, 50)
#         print(i , '=>', dist)
        loss.append(dist)
    
    return loss

In [199]:
def data_preprocessing(df):
    #mean feature of 3-axis accelerometer data
    input_features = ['AX', 'AY', 'AZ']
    output_feature = 'mAcc'
    Amplitude(df, input_features, output_feature)
    
    #mean feature of 3-axis gyroscope data
    input_features = ['GX', 'GY', 'GZ']
    output_feature = 'mGyro'
    Amplitude(df, input_features, output_feature)
    
    #Re-Orientation of 3-axis acc. and gyro sensor data
    Yaw_Roll_Pitch(df)
    qw, qx, qy, qz = getQuat(df)
    df['quaternionW'] = qw
    df['quaternionX'] = qx
    df['quaternionY'] = qy
    df['quaternionZ'] = qz
    rotate(df)
    #apply gaussian filter with window size 10
    df = GaussianFilter(df, 10)
    
    #Frequency domain feature generation from time series accelerometer data
    input_features = ['AX1', 'AY1', 'AZ1']
    output_feature = ['fAX', 'fAY', 'fAZ']
    df = FFT(df, input_features, output_feature)
    
    #Frequency domain feature generation from time series gyroscope data
    input_features = ['AX1', 'AY1', 'AZ1']
    output_feature = ['fGX', 'fGY', 'fGZ']
    df = FFT(df, input_features, output_feature)
    
    return df

In [200]:
#read csv file and return its data-frame
def read_csv_file(acc, gyro, mode):
    df_gyro = pd.read_csv(gyro, index_col=False)
    df      = pd.read_csv(acc,  index_col=False)
    df.rename(columns = {'X': 'AX', 'Y':'AY', 'Z':'AZ'}, inplace = True)
    df['GX'] = df_gyro['X']
    df['GY'] = df_gyro['Y']
    df['GZ'] = df_gyro['Z']
    
    df['activity'] = mode
    
    return df

In [201]:
def read_files(acc_file, gyro_file, activity):
    df = read_csv_file(acc_file, gyro_file, activity)
    df.reset_index(drop = True , inplace = True)
    if('Milliseconds' in df.columns):
        df = df.drop('Milliseconds', axis=1)
    if('Timestamp' in df.columns):
        df = df.drop('Timestamp', axis=1)

    df = data_preprocessing(df)
    
    return df
    

In [19]:
  
path_acc = os.getcwd() + '/data/Bike2308/*/*AccelerometerLinear.csv'  
path_gyro= os.getcwd() + '/data/Bike2308/*/*Gyroscope.csv' 
acc_files = glob.glob(path_acc)
gyro_files= glob.glob(path_gyro)
bike_df= read_files(acc_files[0], gyro_files[0], 'bike')
n = len(bike_df)
bike_df = bike_df[500:n-500]
for acc_file, gyro_file in zip(acc_files, gyro_files) :
    print(acc_file)
    print(gyro_file)
    df_this = read_files(acc_file, gyro_file, 'bike')
    n = len(df_this)
    print("len of this file ",df_this.shape)
    df_this = df_this[350 : n-500]
    print("len of this file ",df_this.shape)
    bike_df = pd.concat([bike_df, df_this])
    print(bike_df.shape)
bike_df.reset_index(drop = True, inplace = True)
bike_df.head()


/home/vicky/Desktop/Dhananjay/human-activity/data/Bike2308/2018-08-23_16-01-56/AccelerometerLinear.csv
/home/vicky/Desktop/Dhananjay/human-activity/data/Bike2308/2018-08-23_16-01-56/Gyroscope.csv
len of this file  (12548, 28)
len of this file  (11698, 28)
(23246, 28)
/home/vicky/Desktop/Dhananjay/human-activity/data/Bike2308/2018-08-23_16-15-15/AccelerometerLinear.csv
/home/vicky/Desktop/Dhananjay/human-activity/data/Bike2308/2018-08-23_16-15-15/Gyroscope.csv
len of this file  (7601, 28)
len of this file  (6751, 28)
(29997, 28)
/home/vicky/Desktop/Dhananjay/human-activity/data/Bike2308/2018-08-23_16-18-07/AccelerometerLinear.csv
/home/vicky/Desktop/Dhananjay/human-activity/data/Bike2308/2018-08-23_16-18-07/Gyroscope.csv
len of this file  (14413, 28)
len of this file  (13563, 28)
(43560, 28)
/home/vicky/Desktop/Dhananjay/human-activity/data/Bike2308/2018-08-23_16-00-19/AccelerometerLinear.csv
/home/vicky/Desktop/Dhananjay/human-activity/data/Bike2308/2018-08-23_16-00-19/Gyroscope.csv
le

Unnamed: 0,AX,AY,AZ,GX,GY,GZ,activity,mAcc,mGyro,orientX,...,AZ1,GX1,GY1,GZ1,fAX,fAY,fAZ,fGX,fGY,fGZ
0,-0.0743,0.456554,-0.182508,-0.036865,0.003397,0.080608,bike,0.426444,0.145873,2.652757,...,-0.035259,-0.046475,0.020892,0.039588,-0.30836,-8.924048,38.414573,-0.30836,-8.924048,38.414573
1,-0.061872,0.457102,-0.17309,-0.033712,0.003523,0.068831,bike,1.002651,0.09754,-35.814092,...,-0.033809,-0.04457,0.020135,0.034127,-41.994016,-49.155563,-74.332197,-41.994016,-49.155563,-74.332197
2,-0.049648,0.457061,-0.163515,-0.030654,0.00406,0.057569,bike,0.689433,0.123884,-45.232244,...,-0.033229,-0.042532,0.019373,0.028992,-6.285454,-87.913096,-25.22401,-6.285454,-87.913096,-25.22401
3,-0.037555,0.456411,-0.15377,-0.027684,0.005004,0.046849,bike,0.856619,0.121199,42.974459,...,-0.033408,-0.040379,0.018621,0.024156,-33.357039,65.253064,-98.960931,-33.357039,65.253064,-98.960931
4,-0.025532,0.455134,-0.143828,-0.024786,0.006355,0.036678,bike,0.936104,0.086623,39.187794,...,-0.034188,-0.038124,0.017893,0.019589,49.216495,-117.272423,-89.515525,49.216495,-117.272423,-89.515525


In [82]:
print(bike_df.shape)
bike_df.columns

(75596, 28)


Index(['AX', 'AY', 'AZ', 'GX', 'GY', 'GZ', 'activity', 'mAcc', 'mGyro',
       'orientX', 'orientY', 'orientZ', 'quaternionW', 'quaternionX',
       'quaternionY', 'quaternionZ', 'AX1', 'AY1', 'AZ1', 'GX1', 'GY1', 'GZ1',
       'fAX', 'fAY', 'fAZ', 'fGX', 'fGY', 'fGZ'],
      dtype='object')

In [83]:
bike_segments, bike_labels = segmentation(bike_df, 400)
np.array(bike_segments).shape



(1504, 11, 400)

In [84]:
def DTWdistance(segments, ref):
    y = segments
    DTW_list = []
    for i in range(len(segments[1])):
        x = ref
        print("calculating loss for feature ", i, "....")
        diff = loss_for_segments(y, x, i)

        DTW_list.append(diff)
    
    return DTW_list  

In [85]:
ref  = [0.0]*400
len(ref)


400

In [86]:
bike_dtw_data = DTWdistance(bike_segments, ref)

calculating loss for feature  0 ....
calculating loss for feature  1 ....
calculating loss for feature  2 ....
calculating loss for feature  3 ....
calculating loss for feature  4 ....
calculating loss for feature  5 ....
calculating loss for feature  6 ....
calculating loss for feature  7 ....
calculating loss for feature  8 ....
calculating loss for feature  9 ....
calculating loss for feature  10 ....


In [157]:
# print(np.array(bike_dtw_data).shape)
# bike_dtw_data = [*zip(*bike_dtw_data)]
# print(np.array(bike_dtw_data).shape)



In [42]:
# df = pd.DataFrame({"name1" :bike_dtw_data[0], "name2" :bike_dtw_data[1],"name3" :bike_dtw_data[2],
#                    "name4" :bike_dtw_data[3], "name5" :bike_dtw_data[4],"name6" :bike_dtw_data[5],})
# df.to_csv("bike_dtw.csv", index=False)

In [87]:
print(np.array(bike_dtw_data).shape)
bike_dtw_data = [*zip(*bike_dtw_data)]
print(np.array(bike_dtw_data).shape)

(11, 1504)
(1504, 11)


In [88]:
import csv
with open("train_bike.csv","w+") as my_csv:
    csvWriter = csv.writer(my_csv,delimiter=',')
    csvWriter.writerows(bike_dtw_data)

In [34]:
# for i in range(6):
#     loss_series_st = pd.Series(dtw_data[i])
#     loss_series_st[0:200].plot(figsize =(20,4))
#     plt.xlabel('Segment numbers')
#     plt.ylabel('Diff. with ref. segm.')


In [35]:
# for i in range(6):
#     loss_series_st = pd.Series(dtw_data_for_car[i])
#     loss_series_st[0:200].plot(figsize =(20,4))
#     plt.xlabel('Segment numbers')
#     plt.ylabel('Diff. with ref. segm.')

In [36]:
path_acc = os.getcwd() + '/data/Car1109out/*/*AccelerometerLinear.csv'  
path_gyro= os.getcwd() + '/data/Car1109out/*/*Gyroscope.csv' 
acc_files = glob.glob(path_acc)
gyro_files= glob.glob(path_gyro)
car_df= read_files(acc_files[0], gyro_files[0], 'car')
n = len(car_df)
car_df = car_df[350:n-500]
for acc_file, gyro_file in zip(acc_files, gyro_files) :
    print(acc_file)
    print(gyro_file)
    df_this = read_files(acc_file, gyro_file, 'car')
    n = len(df_this)
    print("len of this file ",df_this.shape)
    df_this = df_this[350 : n-500]
    print("len of this file ",df_this.shape)
    car_df = pd.concat([car_df, df_this])
    print(car_df.shape)
car_df.reset_index(drop = True, inplace = True)
car_df.head()

/home/vicky/Desktop/Dhananjay/human-activity/data/Car1109out/2018-09-11_19-14-12/AccelerometerLinear.csv
/home/vicky/Desktop/Dhananjay/human-activity/data/Car1109out/2018-09-11_19-14-12/Gyroscope.csv
len of this file  (3622, 28)
len of this file  (2772, 28)
(5544, 28)
/home/vicky/Desktop/Dhananjay/human-activity/data/Car1109out/2018-09-11_18-46-25/AccelerometerLinear.csv
/home/vicky/Desktop/Dhananjay/human-activity/data/Car1109out/2018-09-11_18-46-25/Gyroscope.csv
len of this file  (6310, 28)
len of this file  (5460, 28)
(11004, 28)
/home/vicky/Desktop/Dhananjay/human-activity/data/Car1109out/2018-09-11_18-55-22/AccelerometerLinear.csv
/home/vicky/Desktop/Dhananjay/human-activity/data/Car1109out/2018-09-11_18-55-22/Gyroscope.csv
len of this file  (2872, 28)
len of this file  (2022, 28)
(13026, 28)
/home/vicky/Desktop/Dhananjay/human-activity/data/Car1109out/2018-09-11_19-17-04/AccelerometerLinear.csv
/home/vicky/Desktop/Dhananjay/human-activity/data/Car1109out/2018-09-11_19-17-04/Gyros

Unnamed: 0,AX,AY,AZ,GX,GY,GZ,activity,mAcc,mGyro,orientX,...,AZ1,GX1,GY1,GZ1,fAX,fAY,fAZ,fGX,fGY,fGZ
0,0.151147,-0.548212,0.384174,0.014104,0.032599,0.212953,car,5.896712,0.918298,-2.105947,...,0.468213,-0.102314,-0.065251,0.19436,0.390241,0.242471,1.450791,0.390241,0.242471,1.450791
1,0.145558,-0.549001,0.315767,0.022043,0.024087,0.209872,car,6.891665,0.890564,-10.407979,...,0.398035,-0.088283,-0.070148,0.191415,-0.956881,0.785667,1.839521,-0.956881,0.785667,1.839521
2,0.13164,-0.550599,0.237944,0.029397,0.016481,0.20564,car,5.505644,1.006707,-33.960476,...,0.318826,-0.073928,-0.073003,0.186786,-0.691534,-0.422401,-0.150374,-0.691534,-0.422401,-0.150374
3,0.109519,-0.552682,0.153242,0.035762,0.010158,0.200435,car,4.220319,1.314102,-59.080153,...,0.233326,-0.059636,-0.073536,0.180548,-0.849753,0.600769,1.43938,-0.849753,0.600769,1.43938
4,0.07965,-0.554942,0.064491,0.040765,0.005447,0.194477,car,4.9587,1.252351,-33.272546,...,0.144593,-0.045799,-0.07155,0.172831,-0.610689,-0.188359,1.171801,-0.610689,-0.188359,1.171801


In [89]:
print(car_df.shape)
car_df.columns

(70345, 28)


Index(['AX', 'AY', 'AZ', 'GX', 'GY', 'GZ', 'activity', 'mAcc', 'mGyro',
       'orientX', 'orientY', 'orientZ', 'quaternionW', 'quaternionX',
       'quaternionY', 'quaternionZ', 'AX1', 'AY1', 'AZ1', 'GX1', 'GY1', 'GZ1',
       'fAX', 'fAY', 'fAZ', 'fGX', 'fGY', 'fGZ'],
      dtype='object')

In [90]:
car_segments, car_labels = segmentation(car_df, 400)
np.array(car_segments).shape

(1399, 11, 400)

In [91]:
car_dtw_data = DTWdistance(car_segments, ref)

calculating loss for feature  0 ....
calculating loss for feature  1 ....
calculating loss for feature  2 ....
calculating loss for feature  3 ....
calculating loss for feature  4 ....
calculating loss for feature  5 ....
calculating loss for feature  6 ....
calculating loss for feature  7 ....
calculating loss for feature  8 ....
calculating loss for feature  9 ....
calculating loss for feature  10 ....


In [53]:
# df1 = pd.DataFrame({"name1" :car_dtw_data[0], "name2" :car_dtw_data[1],"name3" :car_dtw_data[2],
#                    "name4" :car_dtw_data[3], "name5" :car_dtw_data[4],"name6" :car_dtw_data[5],})
# df1.to_csv("car_dtw.csv", index=False)


In [92]:
print(np.array(car_dtw_data).shape)
car_dtw_data = [*zip(*car_dtw_data)]
print(np.array(car_dtw_data).shape)

(11, 1399)
(1399, 11)


In [93]:
import csv
with open("train_car.csv","w+") as my_csv:
    csvWriter = csv.writer(my_csv,delimiter=',')
    csvWriter.writerows(car_dtw_data)

In [95]:
train1 = pd.read_csv('train_bike.csv').values.tolist()
train2 = pd.read_csv('train_car.csv').values.tolist()
print(len(train1))
print(len(train2))

1503
1398


In [43]:

# # train1 = bike_dtw_data                 #dtw feature vector for bike
# label1 = [1]*len(train1)       #putting lebel = 1 to all bike samples

# # train2 = car_dtw_data         #dtw feature vector for car
# label2 = [0]*len(train2)       #putting lebel = 1 to all car samples

# train = [x+y for x,y in zip(train1, train2)]  #putting both traing data into one  

# label = label1 + label2  #putting both lables into one label

# # train = [*zip(*train)]   #transpose of traing data
# train = train1 + train2


In [96]:
label1 = [1]*len(train1)
label2 = [0]*len(train2)

label = label1 + label2
train = train1 + train2


In [97]:
len(train)

2901

In [98]:
RANDOM_SEED = 42
X_train, X_test, y_train, y_test = train_test_split(train, label, test_size=0.2, random_state=RANDOM_SEED)

In [99]:
print(len(X_train))
print(len(y_train))
print(len(X_test))
print(len(y_test))


2320
2320
581
581


In [100]:
# apply svm model
print("---------SVM model---------\n")
svm_clf = svm.SVC(kernel='linear', C=1)
svm_clf.fit(X_train, y_train)
svm_predict_y = svm_clf.predict(X_test)

# get the 5-fold cross-validation score

print("cross-validation score \n")
scores = cross_val_score(svm_clf, X_train, y_train, cv=5)
print(scores, scores.mean(), scores.std())


# apply model to test set
svm_clf.fit(X_train, y_train)
svm_predict_y = svm_clf.predict(X_test)
print("\n")
# obtain accuracy score
testscore = accuracy_score(y_test, svm_predict_y)
print("Accuracy score on test set: %6.3f" % testscore)

---------SVM model---------

cross-validation score 

[ 0.9311828   0.95053763  0.9375      0.95464363  0.94816415] 0.944405641097 0.00870718798753


Accuracy score on test set:  0.974


In [101]:

# train a random forest
print("---------Random-Forest---------\n")
rf_clf = RandomForestClassifier(n_estimators=200)
# get the 5-fold cross-validation score
print("cross-validation score \n")
scores = cross_val_score(rf_clf, X_train, y_train, cv=5)
print(scores, scores.mean(), scores.std())


# apply model to test set
rf_clf.fit(X_train, y_train)
rf_predict_y = rf_clf.predict(X_test)

print("\n")
# obtain accuracy score
testscore = accuracy_score(y_test, rf_predict_y)
print("Accuracy score on test set: %6.3f" % testscore)

---------Random-Forest---------

cross-validation score 

[ 0.99784946  0.99784946  0.99353448  0.99568035  0.99784017] 0.99655078517 0.00172576885309


Accuracy score on test set:  0.997


In [103]:
pickle_path = 'rf_model2.pkl'
unpickle = open(pickle_path, 'rb')

grd_bst = pickle.load(unpickle)
pred_grd = grd_bst.predict(X_test)
# pred_grd = le.inverse_transform(pred_grd)
accuracy_grd = accuracy_score(pred_grd,y_test)
print('accuracy using svm model1 : ',accuracy_grd)

accuracy using svm model1 :  0.566265060241


In [79]:
pickle_path = 'svm_model2.pkl'
unpickle = open(pickle_path, 'rb')
grd_bst = pickle.load(unpickle)
pred_grd = grd_bst.predict(X_test)
# pred_grd = le.inverse_transform(pred_grd)
accuracy_grd = accuracy_score(pred_grd,y_test)
print('accuracy using svm model1 : ',accuracy_grd)

accuracy using svm model1 :  0.707401032702
