In [44]:
import csv
import math
import psycopg2
import itertools
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import neighbors, datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn import preprocessing
from statsmodels import robust

In [2]:
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

In [3]:
def csv_reader(file_obj):
    reader = csv.reader(file_obj)
    testArray=np.empty((0,4), float)
    testArrayLabel=np.empty((0,1),float)
    for row in reader:
        testArray=np.vstack((testArray,[float(row[0]),float(row[1]),float(row[2]),float(row[3])]))
        testArrayLabel=np.append(testArrayLabel,[float(row[4])])
    return testArray,testArrayLabel

In [4]:
def generateFeatures(Temp):
    f0=np.mean(Temp[:,0])
    f1=np.mean(Temp[:,1])
    f2=np.mean(Temp[:,2])
    f3=np.std(Temp[:,0])
    f4=np.std(Temp[:,1])
    f5=np.std(Temp[:,2])
    f6=np.var(Temp[:,0])
    f7=np.var(Temp[:,1])
    f8=np.var(Temp[:,2])
    f9=robust.mad(Temp[:,0])
    f10=robust.mad(Temp[:,1])
    f11=robust.mad(Temp[:,2])
    f12=abs(np.mean(Temp[:,0])-np.mean(Temp[:,1]))
    f13=abs(np.mean(Temp[:,0])-np.mean(Temp[:,2]))
    f14=abs(np.mean(Temp[:,1])-np.mean(Temp[:,2]))
    return [f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14]

In [5]:
"""Se crea diccionarios en el cual se acumulan los archivos csv"""
dictTrainData = {}
dictTrainLabel = {}
if __name__ == "__main__":
    for i in range(1,7):   #Range del 1 al 7 dado que actualmente se encuentran 6 archivos CSV    
        csv_path = str(i)+".csv"
        with open(csv_path, "rb") as f_obj:
            trainingData,trainingLabel=csv_reader(f_obj)
            dictTrainData[i]=trainingData
            dictTrainLabel[i]=trainingLabel

"""Se convierten los diccionarios en vectores/matrices de numpy para ser procesado"""
trainDataGroup=np.empty((0,4),float)
trainLabelGroup=np.empty((0,1),int)

for key in dictTrainData:
    trainDataGroup=np.vstack((trainDataGroup,dictTrainData[key]))
    trainLabelGroup=np.append(trainLabelGroup,dictTrainLabel[key])

In [6]:
"""Para el procesamiento de los datos se requieren de ante mano 2 cosas. 
1) Agrupar intervalos de 1 segundo y 2) Agrupar por Actividad"""

ExactDataGroup=np.empty((0,3),float)
ExactLabelGroup=np.empty((0,1),int)

TempData=np.empty((0,3),float)
TempLabel=np.empty((0,1),int)

count=0

for i in range(1,int(max(trainDataGroup[:,0]))+1):
    for ii in range(0,len(trainDataGroup)):
        if(trainDataGroup[ii,0]==i):
            TempData=np.vstack((TempData,[trainDataGroup[ii,1],trainDataGroup[ii,2],trainDataGroup[ii,3]]))
            TempLabel=np.append(TempLabel,trainLabelGroup[ii])
            count=count+1
            if (count==25):
                ExactDataGroup=np.vstack((ExactDataGroup,TempData))
                ExactLabelGroup=np.append(ExactLabelGroup,TempLabel)
                TempData=np.empty((0,3),float)
                TempLabel=np.empty((0,1),int)
                count=0
        else:
            TempData=np.empty((0,3),float)
            TempLabel=np.empty((0,1),int)
            count=0
            
            

In [18]:
"""Para sacar los features. Se tienen 8== [Media Desviacion Varianza Entropia Kurtosis MAD Correlacion] per Axis and Distance"""
features=np.empty((0,15),float)#Deberia ser (0,18)
labels=np.empty((0,1),int)
for i in range (0,(len(ExactDataGroup)/25)):
    Temp=ExactDataGroup[25*(i):25*(i+1),:]
    features=np.vstack((features, generateFeatures(Temp)))
    labels=np.append(labels, ExactLabelGroup[i*25])
    

In [28]:
"""Se permutan aleatoriamente los vectores de features y labels con proposito de entrenar y probar el clasificador"""
featuresShuffle,labelsShuffle = unison_shuffled_copies(features,labels)

In [36]:
TrainFeatures,TestFeatures,TrainLabels,TestLabels=train_test_split(featuresShuffle,labelsShuffle,test_size=0.3)

In [37]:
print(len(TrainFeatures))
print(len(TestFeatures))
print(len(TrainLabels))
print(len(TestLabels))

759
326
759
326


In [49]:
clf = RandomForestClassifier(n_estimators=1)
clf = clf.fit(TrainFeatures, TrainLabels)

In [51]:
print(clf.predict(TestFeatures))

[ 2.  2.  1.  2.  1.  2.  2.  1.  2.  2.  2.  1.  2.  2.  1.  1.  2.  2.
  2.  2.  2.  1.  1.  1.  1.  1.  1.  1.  1.  2.  1.  2.  2.  1.  2.  1.
  2.  1.  1.  2.  2.  1.  2.  2.  2.  1.  2.  1.  1.  1.  1.  1.  2.  2.
  1.  1.  2.  2.  2.  1.  2.  2.  1.  2.  1.  2.  1.  2.  2.  2.  2.  2.
  2.  2.  1.  2.  1.  2.  1.  1.  2.  2.  2.  2.  2.  1.  1.  2.  1.  2.
  1.  2.  2.  1.  2.  2.  1.  2.  2.  2.  2.  2.  1.  1.  2.  1.  1.  1.
  2.  2.  1.  1.  1.  2.  2.  2.  2.  2.  2.  2.  1.  1.  1.  2.  1.  1.
  2.  1.  1.  1.  2.  2.  1.  2.  2.  2.  2.  1.  2.  1.  1.  2.  2.  1.
  2.  1.  1.  2.  2.  1.  2.  1.  1.  1.  2.  1.  2.  1.  1.  2.  1.  2.
  2.  1.  2.  2.  2.  1.  2.  1.  2.  1.  1.  1.  2.  2.  1.  1.  1.  2.
  1.  2.  1.  2.  2.  1.  1.  1.  2.  2.  2.  1.  1.  2.  2.  1.  1.  2.
  1.  1.  2.  1.  2.  2.  2.  2.  1.  2.  2.  2.  2.  2.  1.  2.  2.  1.
  2.  2.  1.  1.  2.  2.  1.  1.  1.  2.  2.  2.  2.  1.  2.  1.  2.  2.
  1.  2.  2.  1.  1.  2.  1.  2.  1.  1.  2.  2.  1