<a href="https://colab.research.google.com/github/farhanfuadabir/SHL2020_RedCircle/blob/master/SHL_import_featureExtraction_%5Bfuad%5D.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
# Dependencies

import pandas as pd
import numpy as np
import pickle as pk
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


# Functions

def magnitude(x, y, z):
  return np.sqrt(x**2 + y**2 + z**2)


def entrop(pk, axis=0):

  from scipy.special import entr

  sumPk = np.sum(pk, axis=axis, keepdims=True)
  sumPk = np.where(sumPk == 0, 0.000001, sumPk)
  pk = pk / sumPk
  vec = entr(pk)
  S = np.sum(vec, axis=axis)
  return S

def autocorrelation(x, axis=0):

  import numpy as np
  
  def autocorr_along_axis(x):
    result = np.correlate(x, x, mode='full')
    return result[result.size // 2:]
  return np.apply_along_axis(autocorr_along_axis, axis, x)


def zero_crossing_rate(X, axis=0):

  import numpy as np
  X = np.nan_to_num(X)

  def zcr_along_axis(X):
    temp = np.where(X>=0, 1, -1)
    for i in range (0, temp.shape[0]):    
      if i == 0:
        prev_i = i
        S = 0
        continue
      S += abs(temp[i] - temp[prev_i])
      prev_i = i
    ans = 0.5*S/X.shape[0]
    
    return ans

  return np.apply_along_axis(zcr_along_axis, axis, X)


def first_zero_crossing(x, axis=0):
  
  import numpy as np
  
  def fzc_along_axis(k):    
    index = np.where(np.diff(np.sign(k)))[0] + 1
    if index.size == 0:
      return 0
    else:
      return index[0] 
      
  return np.apply_along_axis(fzc_along_axis, axis, x)



def calculateAngle(lx, ly, lz, position='', picklePrefix='', unpicklePath=''):

  import numpy as np

  gx, gy, gz = process_channels(position, ['Gra_x','Gra_y','Gra_z'], 
                                      picklePrefix=picklePrefix, 
                                      unpicklePath=unpicklePath)  
  gx = np.nan_to_num(gx)
  gy = np.nan_to_num(gy)
  gz = np.nan_to_num(gz)
  lx = np.nan_to_num(lx)
  ly = np.nan_to_num(ly)
  lz = np.nan_to_num(lz)
  
  n = gx*lx + gy*ly + gz*lz
  d = magnitude(gx, gy, gz) * magnitude(lx, ly, lz)

  # Division by zero is not allowed
  # Replacing all 0s with 0.000001
  d = np.where(d == 0, 0.000001, d)

  theta = np.arccos(n/d)
  
  return theta


def calculate_stat_features(magnitude, prefix=''):
  
  from scipy.stats import median_absolute_deviation, iqr
  import numpy as np
  import pandas as pd

  mean = np.mean(magnitude, axis=1)
  std = np.std(magnitude, axis=1)
  max = np.max(magnitude, axis=1)
  min = np.min(magnitude, axis=1)
  mad = median_absolute_deviation(np.nan_to_num(magnitude), axis=1, nan_policy='omit')
  iqr = iqr(magnitude, axis=1, nan_policy='omit')
  
  corr = autocorrelation(magnitude, axis=1)

  maxcorr = np.max(corr, axis=1)
  argmax_corr = np.argmax(corr, axis=1)
  zcr = zero_crossing_rate(corr, axis=1)
  fzc = first_zero_crossing(corr, axis=1)
  
  columnName = [prefix+'_mean', prefix+'_std', prefix+'_max', prefix+'_min', 
                prefix+'_mad', prefix+'_iqr', prefix+'_max.corr', 
                prefix+'_idx.max.corr', prefix+'_zcr', prefix+'_fzc']

  stat_feat_df = pd.DataFrame(np.stack((mean, std, max, min, mad, iqr, maxcorr, 
                                        argmax_corr, zcr, fzc), axis=1), 
                              columns=columnName)
  
  return stat_feat_df

def calculate_spectral_features(magnitude, fs=100, prefix=''):
  
  import pandas as pd
  import numpy as np
  from scipy.stats import kurtosis, skew
  from scipy import signal, stats

  FREQ,PSD = signal.welch(magnitude, fs, nperseg=magnitude.shape[1], axis=1)
  # Max PSD value
  maxPSD = np.max(PSD, axis=1)
  # Frequency Entropy
  entropy = entrop(PSD, axis=1)
  # Frequency Center
  sumPSD = np.sum(PSD, axis=1)
  sumPSD = np.where(sumPSD == 0, 0.000001, sumPSD)
  fc = np.sum((FREQ*PSD), axis=1) / sumPSD
  # Kurtosis
  kurt = kurtosis(PSD, axis=1, nan_policy='omit')
  # Skewness
  skew = skew(PSD, axis=1, nan_policy='omit')

  columnName = [prefix+'_max.psd', prefix+'_entropy', prefix+'_fc', 
                prefix+'_kurt', prefix+'_skew']

  spectral_feat_df = pd.DataFrame(np.stack((maxPSD, entropy, fc, kurt, skew), 
                                           axis=1), columns=columnName)
  
  return spectral_feat_df



def rotate_axis(ax, ay, az, position='', picklePrefix='', unpicklePath=''):
  
  import numpy as np

  w, x, y, z = process_channels(position, ['Ori_w','Ori_x','Ori_y','Ori_z'], 
                                picklePrefix=picklePrefix, 
                                unpicklePath=unpicklePath)

  if ax.shape == ay.shape == az.shape == w.shape == x.shape == y.shape == z.shape:
    num_column = ax.shape[1]
  else:
    print("Dimention mismatch. Derotation Failed.")

  w = w.ravel()
  x = x.ravel()
  y = y.ravel()
  z = z.ravel()

  ax = ax.ravel()
  ay = ay.ravel()
  az = az.ravel()

  Ax = 1 - 2*(y**2 + z**2) * ax + 2*(x*y - w*z) * ay + 2*(x*z + w*y) * az
  Ay = 2*(x*y + w*z) * ax + 1 - 2*(x**2 + z**2) * ay + 2*(y*z - w*x) * az
  Az = 2*(x*z - w*y) * ax + 2*(y*z + w*x) * ay + 1 - 2*(x**2 + y**2) * az

  Ax = Ax.reshape(-1,num_column)
  Ay = Ay.reshape(-1,num_column)
  Az = Az.reshape(-1,num_column)

  return Ax,Ay,Az

def process_channels(position, channels, picklePrefix='', unpicklePath='',
                     statFeatures=False, spectralFeatures=False, process=None,
                     processEachAxis=False, calculateComponent=None, 
                     outMagnitude=False, isLabel=False, calculateJerk=False, 
                     prefix=''): 
  
  import pandas as pd
  import numpy as np
  import pickle as pk
  from scipy.stats import mode
  
  num_channels = len(channels) 

  print("\nPosition: " + position)
  
  # Unpickle the channel[1]
  print("Unpickling From: " + unpicklePath + picklePrefix + "_" + position.lower() + "_" + 
        channels[0].lower() + ".pickle ...", end=" ") 
  with open(unpicklePath + picklePrefix + "_" + position.lower() + "_" + channels[0].lower() +
            ".pickle",'rb') as f:
    x = pk.load(f)
  print('Done')

  if num_channels >= 3:
    # Unpickle channel[2]
    print("Unpickling From: " + unpicklePath + picklePrefix + "_" + position.lower() + "_" + 
          channels[1].lower() + ".pickle ...", end=" ") 
    with open(unpicklePath + picklePrefix + "_" + position.lower() + "_" + channels[1].lower() + 
              ".pickle",'rb') as f:
      y = pk.load(f)
    print('Done')

    # Unpickle channel[3]
    print("Unpickling From: " + unpicklePath + picklePrefix + "_" + position.lower() + "_" + 
          channels[2].lower() + ".pickle ...", end=" ") 
    with open(unpicklePath + picklePrefix + "_" + position.lower() + "_" + channels[2].lower() + 
              ".pickle",'rb') as f:
      z = pk.load(f)
    print('Done')

    if num_channels == 4:
      # Unpickle channel[4]
      print("Unpickling From: " + unpicklePath + picklePrefix + "_" + position.lower() + "_" + 
            channels[3].lower() + ".pickle ...", end=" ") 
      with open(unpicklePath + picklePrefix + "_" + position.lower() + "_" + channels[3].lower() + 
                ".pickle",'rb') as f:
        w = pk.load(f)
      print('Done')

  print("Processing... ",end=" ")
  
  if num_channels == 4:
    # According to the equation the data should be read as: w,x,y,z
    print("Quarternions Returned")
    return x, y, z, w
  elif (num_channels == 1) and (isLabel==True):
      label,_ = mode(x,axis=1,nan_policy='omit')
      label = label.flatten()
      print("Labels Returned")
      label = pd.DataFrame(label, columns=['label'])
      return label                              # datatype: dataframe
  else:
    if num_channels == 1:
      sq_mag = x
    elif num_channels == 3:
      if process=='rotate':
        print("Calculating Axis Rotation...", end=" ")
        x, y, z = rotate_axis(x, y, z, position=position, 
                              picklePrefix=picklePrefix, 
                              unpicklePath=unpicklePath)
        sq_mag = magnitude(x, y, z)
      elif process is not None:
        theta = calculateAngle(x, y, z, position=position, 
                              picklePrefix=picklePrefix, 
                              unpicklePath=unpicklePath)
        if process=='horizontal':
          print("Calculating Horizontal Component...", end=" ")
          sq_mag = magnitude(x, y, z)*np.cos(theta)
          if calculateJerk==True:
            print("Calculating Jerk...", end=" ")
            sq_mag = np.diff(sq_mag,axis=1)
        elif process=='vertical':
          print("Calculating Vertical Component...", end=" ")
          sq_mag = magnitude(x, y, z)*np.sin(theta)
          if calculateJerk==True:
            print("Calculating Jerk...", end=" ")
            sq_mag = np.diff(sq_mag,axis=1)
      else:
          sq_mag = magnitude(x, y, z)

    if (statFeatures==False) and (spectralFeatures==False):
      if(outMagnitude==True):
        print("Magnitude Returned")
        return sq_mag                           # datatype: ndarray
      else:
        print("Triaxial Value Returned")
        return x, y, z                          # datatype: ndarray
    else:
      if statFeatures==True:
        stat_features_mag = calculate_stat_features(sq_mag, prefix=prefix+"M")
        if (processEachAxis==True) and (num_channels>1):
          stat_features_x = calculate_stat_features(x, prefix=prefix+"X")
          stat_features_y = calculate_stat_features(y, prefix=prefix+"Y")
          stat_features_z = calculate_stat_features(z, prefix=prefix+"Z")

          stat_features = pd.concat([stat_features_mag, stat_features_x, 
                                     stat_features_y, stat_features_z],axis=1)

          #stat_features = np.concatenate([stat_features_mag, stat_features_x, 
          #                                stat_features_y, stat_features_z], 
          #                               axis=1)
        else:
          stat_features = stat_features_mag
        
        if spectralFeatures==False:
          print("Statistical Features Returned")
          return stat_features                  # datatype: dataframe
      if spectralFeatures==True:
        spectral_features_mag = calculate_spectral_features(sq_mag,
                                                            prefix=prefix+"M")
        if (processEachAxis==True) and (num_channels>1):
          spectral_features_x = calculate_spectral_features(x, prefix=prefix+"X")
          spectral_features_y = calculate_spectral_features(y, prefix=prefix+"Y")
          spectral_features_z = calculate_spectral_features(z, prefix=prefix+"Z")

          spectral_features = pd.concat([spectral_features_mag, 
                                         spectral_features_x, 
                                         spectral_features_y, 
                                         spectral_features_z],axis=1)

          #spectral_features = np.concatenate([spectral_features_mag, 
          #                                    spectral_features_x, 
          #                                    spectral_features_y, 
          #                                    spectral_features_z],
          #                                   axis=1)
        else:
          spectral_features = spectral_features_mag

        if statFeatures==False:
          print("Spectral Features Returned")
          return spectral_features              # datatype: dataframe
      print("Statistical and Spectral Features Returned")
                                                # datatype: dataframe
      return pd.concat([stat_features, spectral_features],axis=1)
      #return np.concatenate([stat_features,spectral_features],axis=1)
    

def import_and_pickle(location, positions, filenames, picklePrefix='', 
                      picklePath=''):
  print("Importing data from: " + location + "...")
  for j in positions:
    print("\nPosition: " + j)
    for i in filenames:
      # Import from text file
      print("Importing " + i + "...", end=" ")
      value = np.loadtxt(location + j + "/" + i + ".txt")
      print("Done | Shape: ", value.shape)

      # Check for nan values
      nan_count = np.isnan(value).sum()
      if nan_count != 0:
        print("\tnan detected | : ", nan_count, " | Filename: " + i)

      # Pickle the values
      import pickle as pk
      with open(picklePath + picklePrefix + "_" + j.lower() + "_" + i.lower() + 
                ".pickle",'wb') as f:
        pk.dump(value,f)
      print("\tPickled at: " + picklePath + picklePrefix + "_" + j.lower() + 
            "_" + i.lower() + ".pickle")
      

def extract_acc_pickle(positions, picklePrefix='', unpicklePath='', 
                       picklePath='', prefix='acc'):
  # Extract ACC
  print("Extracting ACC... ", end=" ")
  acc = process_channels(positions, ['Acc_x','Acc_y','Acc_z'], 
                         picklePrefix=picklePrefix, processEachAxis=True, 
                         statFeatures=True, spectralFeatures=True, 
                         process='rotate', unpicklePath=unpicklePath, 
                         prefix=prefix)
  print("acc Shape: ", acc.shape)

  # Pickle 
  import pickle as pk
  with open(picklePath + picklePrefix + "_" + positions.lower() + "_ACC.pickle",
            'wb') as f:
    pk.dump(acc,f)
  print("\nPickled at: " + picklePath + picklePrefix + "_" + 
        positions.lower() + "_ACC.pickle\n\n")
  
def extract_lacc_pickle(positions, picklePrefix='', unpicklePath='', 
                       picklePath='', prefix='lacc'):
  # Extract LACC
  print("Extracting LACC... ", end=" ")
  lacc = process_channels(positions, ['LAcc_x','LAcc_y','LAcc_z'], 
                          picklePrefix=picklePrefix, processEachAxis=True, 
                          statFeatures=True, spectralFeatures=True, 
                          process='rotate', unpicklePath=unpicklePath,
                          prefix=prefix)
  print("lacc Shape: ", lacc.shape)

  # Pickle 
  import pickle as pk
  with open(picklePath + picklePrefix + "_" + positions.lower() + "_LACC.pickle",
            'wb') as f:
    pk.dump(lacc,f)
  print("\nPickled at: " + picklePath + picklePrefix + "_" + 
        positions.lower() + "_LACC.pickle\n\n")


def extract_acch_pickle(positions, picklePrefix='', unpicklePath='', 
                       picklePath='', prefix='acch'):
  # Extract ACCH
  print("Extracting ACCH... ", end=" ")
  acch = process_channels(positions, ['LAcc_x','LAcc_y','LAcc_z'], 
                          picklePrefix=picklePrefix, processEachAxis=True,
                          statFeatures=True, spectralFeatures=True, 
                          process='horizontal', unpicklePath=unpicklePath,
                          prefix=prefix)
  print("acch Shape: ", acch.shape)

  # Pickle 
  import pickle as pk
  with open(picklePath + picklePrefix + "_" + positions.lower() + "_ACCH.pickle",
            'wb') as f:
    pk.dump(acch,f)
  print("\nPickled at: " + picklePath + picklePrefix + "_" + 
        positions.lower() + "_ACCH.pickle\n\n")

def extract_accv_pickle(positions, picklePrefix='', unpicklePath='', 
                       picklePath='', prefix='accv'):
  # Extract ACCV
  print("Extracting ACCV... ", end=" ")
  accv = process_channels(positions, ['LAcc_x','LAcc_y','LAcc_z'], 
                            picklePrefix=picklePrefix, processEachAxis=True,
                            statFeatures=True, spectralFeatures=True, 
                            process='vertical', unpicklePath=unpicklePath,
                          prefix=prefix)
  print("accv Shape: ", accv.shape)

  # Pickle 
  import pickle as pk
  with open(picklePath + picklePrefix + "_" + positions.lower() + "_ACCV.pickle",
            'wb') as f:
    pk.dump(accv,f)
  print("\nPickled at: " + picklePath + picklePrefix + "_" + 
        positions.lower() + "_ACCV.pickle\n\n")


def extract_jerkh_pickle(positions, picklePrefix='', unpicklePath='', 
                       picklePath='', prefix='jerkh'):
  # Extract JERKH
  print("Extracting JERKH... ", end=" ")
  jerkh = process_channels(positions, ['LAcc_x','LAcc_y','LAcc_z'], 
                          picklePrefix=picklePrefix, processEachAxis=True,
                          statFeatures=True, spectralFeatures=True, 
                          process='horizontal', unpicklePath=unpicklePath,
                          calculateJerk=True, prefix=prefix)
  print("jerkh Shape: ", jerkh.shape)

  # Pickle 
  import pickle as pk
  with open(picklePath + picklePrefix + "_" + positions.lower() + "_JERKH.pickle",
            'wb') as f:
    pk.dump(jerkh,f)
  print("\nPickled at: " + picklePath + picklePrefix + "_" + 
        positions.lower() + "_JERKH.pickle\n\n")


def extract_jerkv_pickle(positions, picklePrefix='', unpicklePath='', 
                       picklePath='', prefix='jerkv'):
  # Extract JERKV
  print("Extracting JERKV... ", end=" ")
  jerkv = process_channels(positions, ['LAcc_x','LAcc_y','LAcc_z'], 
                          picklePrefix=picklePrefix, processEachAxis=True,
                          statFeatures=True, spectralFeatures=True, 
                          process='vertical', unpicklePath=unpicklePath,
                          calculateJerk=True, prefix=prefix)
  print("jerkv Shape: ", jerkv.shape)

  # Pickle 
  import pickle as pk
  with open(picklePath + picklePrefix + "_" + positions.lower() + "_JERKV.pickle",
            'wb') as f:
    pk.dump(jerkv,f)
  print("\nPickled at: " + picklePath + picklePrefix + "_" + 
        positions.lower() + "_JERKV.pickle\n\n")
  

def extract_mag_pickle(positions, picklePrefix='', unpicklePath='', 
                       picklePath='', prefix='mag'):
  # Extract MAG
  print("Extracting MAG... ", end=" ")
  mag = process_channels(positions, ['Mag_x','Mag_y','Mag_z'], 
                         picklePrefix=picklePrefix, processEachAxis=True,
                         statFeatures=True, spectralFeatures=True, 
                         process='rotate', unpicklePath=unpicklePath, 
                         prefix=prefix)
  print("mag Shape: ", mag.shape)

  # Pickle 
  import pickle as pk
  with open(picklePath + picklePrefix + "_" + positions.lower() + "_MAG.pickle",
            'wb') as f:
    pk.dump(mag,f)
  print("\nPickled at: " + picklePath + picklePrefix + "_" + 
        positions.lower() + "_MAG.pickle\n\n")


def extract_gyr_pickle(positions, picklePrefix='', unpicklePath='', 
                       picklePath='', prefix='gyr'):
  # Extract GYR
  print("Extracting GYR... ", end=" ")
  gyr = process_channels(positions, ['Gyr_x','Gyr_y','Gyr_z'], 
                         picklePrefix=picklePrefix, processEachAxis=True,
                         statFeatures=True, spectralFeatures=True, 
                         unpicklePath=unpicklePath, prefix=prefix)
  print("gyr Shape: ", gyr.shape)

  # Pickle 
  import pickle as pk
  with open(picklePath + picklePrefix + "_" + positions.lower() + "_GYR.pickle",
            'wb') as f:
    pk.dump(gyr,f)
  print("\nPickled at: " + picklePath + picklePrefix + "_" + 
        positions.lower() + "_GYR.pickle\n\n")

def extract_pres_pickle(positions, picklePrefix='', unpicklePath='', 
                       picklePath='', prefix='pres'):
  # Extract PRES
  print("Extracting PRES... ", end=" ")
  pres = process_channels(positions, ['Pressure'], picklePrefix=picklePrefix, 
                          statFeatures=True, spectralFeatures=True, 
                          unpicklePath=unpicklePath, prefix=prefix)
  print("pres Shape: ", pres.shape)

  # Pickle 
  import pickle as pk
  with open(picklePath + picklePrefix + "_" + positions.lower() + "_PRES.pickle",
            'wb') as f:
    pk.dump(pres,f)
  print("\nPickled at: " + picklePath + picklePrefix + "_" + 
        positions.lower() + "_PRES.pickle\n\n")


def extract_label_pickle(positions, picklePrefix='', unpicklePath='', 
                       picklePath='', prefix='label'):
  # Extract LABEL
  print("Extracting LABEL... ", end=" ")
  label = process_channels(positions, ['Label'], picklePrefix=picklePrefix, 
                           isLabel=True, unpicklePath=unpicklePath, 
                           prefix=prefix)
  print("Label Shape: ", label.shape)

  # Pickle 
  import pickle as pk
  with open(picklePath + picklePrefix + "_" + positions.lower() + "_LABEL.pickle",
            'wb') as f:
    pk.dump(label,f)
  print("\nPickled at: " + picklePath + picklePrefix + "_" + 
        positions.lower() + "_LABEL.pickle\n\n")





def extractFeatures_pickle(positions, picklePrefix='', unpicklePath='', 
                           picklePath=''):
  print("Importing data from: " + picklePath + "...")
  for j in positions:
    print("\nPosition: " + j)
  
    # Extract Features
    print("Calculating Features...", end=" ")
    
    acc = process_channels(j, ['Acc_x','Acc_y','Acc_z'], 
                           picklePrefix=picklePrefix, processEachAxis=True, 
                           statFeatures=True, spectralFeatures=True,  
                           process='rotate', unpicklePath=unpicklePath, 
                           prefix='acc')
    print("acc Shape: ", acc.shape)

    lacc = process_channels(j, ['LAcc_x','LAcc_y','LAcc_z'], 
                            picklePrefix=picklePrefix, processEachAxis=True, 
                            statFeatures=True, spectralFeatures=True, 
                            process='rotate', unpicklePath=unpicklePath,
                            prefix='lacc')
    print("lacc Shape: ", lacc.shape)

    acch = process_channels(j, ['LAcc_x','LAcc_y','LAcc_z'], 
                            picklePrefix=picklePrefix, processEachAxis=True,
                            statFeatures=True, spectralFeatures=True, 
                            process='horizontal', unpicklePath=unpicklePath, 
                            prefix='acch')
    print("acch Shape: ", acch.shape)

    accv = process_channels(j, ['LAcc_x','LAcc_y','LAcc_z'], 
                            picklePrefix=picklePrefix, processEachAxis=True,
                            statFeatures=True, spectralFeatures=True, 
                            process='vertical', unpicklePath=unpicklePath, 
                            prefix='accv')
    print("accv Shape: ", accv.shape)

    jerkh = process_channels(j, ['LAcc_x','LAcc_y','LAcc_z'], 
                            picklePrefix=picklePrefix, processEachAxis=True,
                            statFeatures=True, spectralFeatures=True, 
                            process='horizontal', unpicklePath=unpicklePath,
                            calculateJerk=True, prefix='jerkh')
    print("jerkh Shape: ", jerkh.shape)

    jerkv = process_channels(j, ['LAcc_x','LAcc_y','LAcc_z'], 
                            picklePrefix=picklePrefix, processEachAxis=True,
                            statFeatures=True, spectralFeatures=True, 
                            process='vertical', unpicklePath=unpicklePath,
                            calculateJerk=True, prefix='jerkv')
    print("jerkv Shape: ", jerkv.shape)

    mag = process_channels(j, ['Mag_x','Mag_y','Mag_z'], 
                           picklePrefix=picklePrefix, processEachAxis=True,
                           statFeatures=True, spectralFeatures=True, 
                           unpicklePath=unpicklePath, prefix='mag')
    print("mag Shape: ", mag.shape)

    gyr = process_channels(j, ['Gyr_x','Gyr_y','Gyr_z'], 
                           picklePrefix=picklePrefix, processEachAxis=True,
                           statFeatures=True, spectralFeatures=True, 
                           unpicklePath=unpicklePath, prefix='gyr')
    print("gyr Shape: ", gyr.shape)

    pres = process_channels(j, ['Pressure'], picklePrefix=picklePrefix, 
                            statFeatures=True, spectralFeatures=True, 
                            unpicklePath=unpicklePath, prefix='pres')
    print("pres Shape: ", pres.shape)

    print("\nFeature Calculation Done\n")

    print("Accumulating Features...",end=" ")
    X = pd.concat([acc, lacc, acch, accv, jerkh, jerkv, mag, gyr, pres], axis=1)
    print("Done | Shape: ", X.shape)

    # Check for nan values in Features
    #nan_count = np.isnan(X).sum()
    #if nan_count != 0:
    #  print("\n\tnan detected in X | ", nan_count, "\n")

    print("Processing Labels...", end=" ")
    label = process_channels(j, ['Label'], picklePrefix=picklePrefix, 
                             isLabel=True, unpicklePath=unpicklePath, 
                             prefix='label')
    print("Done | Shape: ", label.shape)
    
    # Check for nan values in Labels
    #nan_count = np.isnan(label).sum()
    #if nan_count != 0:
    #  print("\n\tnan detected in y | ", nan_count, "\n")

    # Pickle Features
    import pickle as pk
    with open(picklePath + picklePrefix + "_" + j.lower() + "_X.pickle",
              'wb') as f:
      pk.dump(X,f)
    print("\n\nPickled at: " + picklePath + picklePrefix + "_" + j.lower() + 
          "_X.pickle")
    
    # Pickle Labels
    import pickle as pk
    with open(picklePath + picklePrefix + "_" + j.lower() + "_y.pickle",
              'wb') as f:
      pk.dump(label,f)
    print("Pickled at: " + picklePath + picklePrefix + "_" + j.lower() + 
          "_y.pickle")


In [0]:
# DATASET PATH
location_train = '/content/drive/My Drive/SHL Dataset 2020/Train/'
location_cv = '/content/drive/My Drive/challenge-2020-validation/validation/'
location_test = '/content/drive/My Drive/SHL 2020/TEST_2020/'
#location_cv = '/content/drive/My Drive/validation_2020/'

#positions = ['Bag','Hand','Torso','Hips']
positions = ['Hand']

channels = ['LAcc_x', 'LAcc_y', 'LAcc_z', 'Acc_x', 'Acc_y', 'Acc_z',
            'Mag_x', 'Mag_y', 'Mag_z', 'Gyr_x', 'Gyr_y', 'Gyr_z',
            'Gra_x', 'Gra_y', 'Gra_z', 'Ori_w', 'Ori_x', 'Ori_y', 'Ori_z',
            'Pressure']#, 'Label']

picklePrefix = 'test_2020'
#picklePrefix = "train_2020"
savePath = "/content/drive/My Drive/test_2020/"
#savePath = "/content/drive/My Drive/train_2020/"
#savePath = '/content/drive/My Drive/validation_2020/'
temporaryPath = "/content/"
#temporaryPath = "/content/drive/My Drive/validation_2020/"
#picklePrefix = "validation_2020"


In [0]:
for pos in positions:

  import_and_pickle(location_test, [pos], channels, picklePrefix=picklePrefix, 
                    picklePath=temporaryPath)

  extract_acc_pickle(pos, picklePrefix=picklePrefix, unpicklePath=temporaryPath, 
                        picklePath=savePath)
  extract_lacc_pickle(pos, picklePrefix=picklePrefix, unpicklePath=temporaryPath, 
                        picklePath=savePath)
  extract_acch_pickle(pos, picklePrefix=picklePrefix, unpicklePath=temporaryPath, 
                        picklePath=savePath)
  extract_accv_pickle(pos, picklePrefix=picklePrefix, unpicklePath=temporaryPath, 
                        picklePath=savePath)
  extract_jerkh_pickle(pos, picklePrefix=picklePrefix, unpicklePath=temporaryPath, 
                        picklePath=savePath)
  extract_jerkv_pickle(pos, picklePrefix=picklePrefix, unpicklePath=temporaryPath, 
                        picklePath=savePath)
  extract_mag_pickle(pos, picklePrefix=picklePrefix, unpicklePath=temporaryPath, 
                        picklePath=savePath)
  extract_gyr_pickle(pos, picklePrefix=picklePrefix, unpicklePath=temporaryPath, 
                        picklePath=savePath)
  extract_pres_pickle(pos, picklePrefix=picklePrefix, unpicklePath=temporaryPath, 
                        picklePath=savePath)
  extract_label_pickle(pos, picklePrefix=picklePrefix, unpicklePath=temporaryPath, 
                        picklePath=savePath)

In [0]:
def accumulate_dataset(positions, picklePath='', picklePrefix='', addLabel=True, 
                       writeCSV=False):
  
  import pandas as pd

  for pos in positions:
  
    print('Position: ', pos)

    acc = pd.read_pickle(picklePath + picklePrefix + '_' + pos.lower() + '_' + 
                         'ACC.pickle')
    lacc = pd.read_pickle(picklePath + picklePrefix + '_' + pos.lower() + '_' + 
                          'LACC.pickle')
    acch = pd.read_pickle(picklePath + picklePrefix + '_' + pos.lower() + '_' + 
                          'ACCH.pickle')
    accv = pd.read_pickle(picklePath + picklePrefix + '_' + pos.lower() + '_' + 
                          'ACCV.pickle')
    jerkh = pd.read_pickle(picklePath + picklePrefix + '_' + pos.lower() + '_' + 
                           'JERKH.pickle')
    jerkv = pd.read_pickle(picklePath + picklePrefix + '_' + pos.lower() + '_' + 
                           'JERKV.pickle')
    mag = pd.read_pickle(picklePath + picklePrefix + '_' + pos.lower() + '_' + 
                         'MAG.pickle')
    gyr = pd.read_pickle(picklePath + picklePrefix + '_' + pos.lower() + '_' + 
                         'GYR.pickle')
    pres = pd.read_pickle(picklePath + picklePrefix + '_' + pos.lower() + '_' + 
                          'PRES.pickle')
  
    if addLabel==True:
      label = pd.read_pickle(picklePath + picklePrefix + '_' + pos.lower() + 
                             '_' + 'LABEL.pickle')
      data = pd.concat([acc, lacc, acch, accv, jerkh, jerkv, mag, gyr, pres, 
                        label], axis=1)
    else:
      data = pd.concat([acc, lacc, acch, accv, jerkh, jerkv, mag, gyr, pres], 
                       axis=1)

    print('Accumulated Dataset | Shape: ', data.shape)

    print('\nPickling at: ' + picklePath + picklePrefix + '_' + pos.lower() + 
          '_' + 'DATA.pickle')
    data.to_pickle(picklePath + picklePrefix + '_' + pos.lower() + '_' + 
                   'DATA.pickle')
    if writeCSV==True:
      data.to_csv(picklePath + picklePrefix + '_' + pos.lower() + '_' + 
                  'DATA.csv', index=False)
    print('\nDone\n')

In [0]:
positions = ['Bag','Hand','Torso','Hips']
#positions = ['Hand']

#picklePrefix = 'test_2019'
picklePrefix = "train_2020"
#savePath = "/content/drive/My Drive/test_2019/"
savePath = "/content/drive/My Drive/train_2020/"
#savePath = '/content/drive/My Drive/validation_2020/'
#picklePrefix = "validation_2020"


accumulate_dataset(positions, picklePath=savePath, picklePrefix=picklePrefix, 
                   addLabel=True, writeCSV=True)

Position:  Bag
Accumulated Dataset | Shape:  (196072, 496)

Pickling at: /content/drive/My Drive/train_2020/train_2020_bag_DATA.pickle

Done

Position:  Hand
Accumulated Dataset | Shape:  (196072, 496)

Pickling at: /content/drive/My Drive/train_2020/train_2020_hand_DATA.pickle

Done

Position:  Torso
Accumulated Dataset | Shape:  (196072, 496)

Pickling at: /content/drive/My Drive/train_2020/train_2020_torso_DATA.pickle

Done

Position:  Hips
Accumulated Dataset | Shape:  (196072, 496)

Pickling at: /content/drive/My Drive/train_2020/train_2020_hips_DATA.pickle

Done



In [0]:
import_and_pickle(location_cv, ['Bag'], channels, picklePrefix=picklePrefix, 
                  picklePath=temporaryPath)

In [0]:
extract_label_pickle('Bag', picklePrefix=picklePrefix, unpicklePath=temporaryPath, 
                       picklePath=savePath)

In [0]:
import pandas as pd
nak = pd.read_pickle('/content/drive/My Drive/train_2020/train_2020_bag_MAG.pickle')
print(type(nak))
print(nak.shape)
print(nak.head())
print(list(nak.columns))

In [0]:
extractFeatures_pickle(['Bag'], picklePrefix=picklePrefix, 
                        unpicklePath=temporaryPath, picklePath=savePath)

In [0]:
picklePath = ''

# Extract Features
print("Calculating Features...", end=" ")

acc = process_channels(positions[0], ['Acc_x','Acc_y','Acc_z'], 
                        picklePrefix=picklePrefix, processEachAxis=True, 
                        statFeatures=True, spectralFeatures=True, 
                        process='rotate', picklePath=picklePath)

acch = process_channels(positions[0], ['LAcc_x','LAcc_y','LAcc_z'], 
                        picklePrefix=picklePrefix, processEachAxis=True,
                        statFeatures=True, spectralFeatures=True, 
                        process='horizontal', picklePath=picklePath)

accv = process_channels(positions[0], ['LAcc_x','LAcc_y','LAcc_z'], 
                        picklePrefix=picklePrefix, processEachAxis=True,
                        statFeatures=True, spectralFeatures=True, 
                        process='vertical', picklePath=picklePath)

mag = process_channels(positions[0], ['Mag_x','Mag_y','Mag_z'], 
                        picklePrefix=picklePrefix, processEachAxis=True,
                        statFeatures=True, spectralFeatures=True, 
                        picklePath=picklePath)

gyr = process_channels(positions[0], ['Gyr_x','Gyr_y','Gyr_z'], 
                        picklePrefix=picklePrefix, processEachAxis=True,
                        statFeatures=True, spectralFeatures=True, 
                        picklePath=picklePath)

pres = process_channels(positions[0], ['Pressure'], picklePrefix=picklePrefix, 
                        statFeatures=True, spectralFeatures=True, 
                        picklePath=picklePath)
print("Done")

label = process_channels(positions[0], ['Label'], picklePrefix=picklePrefix, 
                             isLabel=True, picklePath=picklePath)
print("Done")

Calculating Features... 

NameError: ignored

In [0]:
bag_X = np.concatenate((acc, acch, accv, mag, gyr, pres), 
                       axis=1)
print(bag_X.shape)

bag_y = label

(28789, 231)


In [0]:
# Remove NaN rows

def remove_nan_rows(X,indexfrom):
  X = X[~np.isnan(indexfrom).any(axis=1)]
  return X

def cleanXy(X,y):
  nan_count_x = np.isnan(X).sum()
  print(nan_count_x," nan found in: X")
  nan_count_y = np.isnan(X).sum()
  print(nan_count_y," nan found in: y")

  y = remove_nan_rows(y,X)
  X = remove_nan_rows(X,X)

  return X,y 

In [0]:
X_train,y_train = cleanXy(bag_X,bag_y)
#X_testC,y_testC = cleanXy(X_test,y_test)

print("X_train Shape: ",X_train.shape)
print("y_train Shape: ",y_train.shape)
#print("X_test Shape: ",X_testC.shape)
#print("y_test Shape: ",y_testC.shape)

4  nan found in: X
4  nan found in: y
X_train Shape:  (28788, 231)
y_train Shape:  (28788,)


In [0]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier


#clf = GaussianNB()
#clf = RandomForestClassifier(n_estimators=1000)
#clf = LogisticRegression()
#clf = DecisionTreeClassifier()
#clf = KNeighborsClassifier(n_neighbors=3)
#clf = SVC(kernel='linear', C=1)
#clf = SVC(kernel='rbf', C=1, gamma=0.1)
#clf = SVC(kernel='poly', degree=1, C=1)
clf = AdaBoostClassifier()

#train_test_split
X_tr, X_test, y_tr, y_test = train_test_split(X_train, y_train, test_size = 0.20, random_state=1234)

clf.fit(X_tr,y_tr)
y_pred = clf.predict(X_test)
print("Accuracy using train_test_split: %0.2f" %(accuracy_score(y_test,y_pred)))
print('\n') 

'''
print("cv = 3")
scores = cross_val_score(clf, X_train, y_train, cv=3,scoring='accuracy')
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
print("Scores: ",scores)

print("cv = 4")
scores = cross_val_score(clf, X_train, y_train, cv=4,scoring='accuracy')
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
print("Scores: ",scores)

print("cv = 6")
scores = cross_val_score(clf, X_train, y_train, cv=6,scoring='accuracy')
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
print("Scores: ",scores)

print("cv = 8")
scores = cross_val_score(clf, X_train, y_train, cv=8,scoring='accuracy')
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
print("Scores: ",scores)

print("cv = 10")
scores = cross_val_score(clf, X_train, y_train, cv=10,scoring='accuracy')
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
print("Scores: ",scores)

print("cv = 15")
scores = cross_val_score(clf, X_train, y_train, cv=15,scoring='accuracy')
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
print("Scores: ",scores)
'''

Accuracy using train_test_split: 0.49




'\nprint("cv = 3")\nscores = cross_val_score(clf, X_train, y_train, cv=3,scoring=\'accuracy\')\nprint("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))\nprint("Scores: ",scores)\n\nprint("cv = 4")\nscores = cross_val_score(clf, X_train, y_train, cv=4,scoring=\'accuracy\')\nprint("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))\nprint("Scores: ",scores)\n\nprint("cv = 6")\nscores = cross_val_score(clf, X_train, y_train, cv=6,scoring=\'accuracy\')\nprint("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))\nprint("Scores: ",scores)\n\nprint("cv = 8")\nscores = cross_val_score(clf, X_train, y_train, cv=8,scoring=\'accuracy\')\nprint("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))\nprint("Scores: ",scores)\n\nprint("cv = 10")\nscores = cross_val_score(clf, X_train, y_train, cv=10,scoring=\'accuracy\')\nprint("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))\nprint("Scores: ",scores)\n\nprint("cv = 15")\nsc

In [0]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import time
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn import tree
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.gaussian_process import GaussianProcessClassifier


dict_classifiers = {
    "Logistic Regression": LogisticRegression(max_iter=1000,solver='saga'),
    "Nearest Neighbors": KNeighborsClassifier(),
    #"RBF SVM": SVC(C=10000,gamma=0.1),
    #"Linear SVM": SVC(kernel='linear'),
    #"Gradient Boosting Classifier": GradientBoostingClassifier(n_estimators=1000),
    "Decision Tree": tree.DecisionTreeClassifier(),
    #"Random Forest": RandomForestClassifier(n_estimators=1000),
    "Neural Net": MLPClassifier(alpha = 1),
    "Naive Bayes": GaussianNB(),
    #"AdaBoost": AdaBoostClassifier(),
    "QDA": QuadraticDiscriminantAnalysis(),
    "Gaussian Process": GaussianProcessClassifier()
}


def batch_classify(X_train, Y_train, X_test, Y_test, no_classifiers = 5, verbose = True):
    """
    This method, takes as input the X, Y matrices of the Train and Test set.
    And fits them on all of the Classifiers specified in the dict_classifier.
    The trained models, and accuracies are saved in a dictionary. The reason to use a dictionary
    is because it is very easy to save the whole dictionary with the pickle module.

    Usually, the SVM, Random Forest and Gradient Boosting Classifier take quiet some time to train.
    So it is best to train them on a smaller dataset first and
    decide whether you want to comment them out or not based on the test accuracy score.
    """

    dict_models = {}
    for classifier_name, classifier in list(dict_classifiers.items())[:no_classifiers]:
        t_start = time.clock()
        classifier.fit(X_train, Y_train)
        t_end = time.clock()

        t_diff = t_end - t_start
        train_score = classifier.score(X_train, Y_train)
        test_score = classifier.score(X_test, Y_test)

        dict_models[classifier_name] = {'model': classifier, 'train_score': train_score, 'test_score': test_score, 'train_time': t_diff}
        if verbose:
            print("trained {c} in {f:.2f} s".format(c=classifier_name, f=t_diff))
    return dict_models



def display_dict_models(dict_models, sort_by='test_score'):
    cls = [key for key in dict_models.keys()]
    test_s = [dict_models[key]['test_score'] for key in cls]
    training_s = [dict_models[key]['train_score'] for key in cls]
    training_t = [dict_models[key]['train_time'] for key in cls]

    df_ = pd.DataFrame(data=np.zeros(shape=(len(cls),4)), columns = ['classifier', 'train_score', 'test_score', 'train_time'])
    for ii in range(0,len(cls)):
        df_.loc[ii, 'classifier'] = cls[ii]
        df_.loc[ii, 'train_score'] = training_s[ii]
        df_.loc[ii, 'test_score'] = test_s[ii]
        df_.loc[ii, 'train_time'] = training_t[ii]

    display(df_.sort_values(by=sort_by, ascending=False))


# Train-Test Split
X_tr, X_test, y_tr, y_test = train_test_split(X_train, y_train, test_size = 0.20, random_state=1234)

dict_models = batch_classify(X_tr, y_tr, X_test, y_test, no_classifiers = 10)
display_dict_models(dict_models)


  import pandas.util.testing as tm


NameError: ignored