# 2. Linear Discriminant Analysis

## 2.1. Importar bibliotecas estándar

In [2]:
import os
import numpy             as np
import pandas            as pd
import matplotlib.pyplot as plt

from sklearn.utils   import shuffle
from sklearn.metrics import confusion_matrix as cmx

#Clasificador LDA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

## Ejemplo 1. Iris

In [3]:
data = pd.read_csv( '../data/iris.csv' )
data.shape

(150, 5)

In [4]:
data = shuffle(data)
data

Unnamed: 0,sepal_l,sepal_w,petal_l,petal_w,type
70,5.9,3.2,4.8,1.8,Iris-versicolor
28,5.2,3.4,1.4,0.2,Iris-setosa
117,7.7,3.8,6.7,2.2,Iris-virginica
24,4.8,3.4,1.9,0.2,Iris-setosa
9,4.9,3.1,1.5,0.1,Iris-setosa
125,7.2,3.2,6.0,1.8,Iris-virginica
43,5.0,3.5,1.6,0.6,Iris-setosa
21,5.1,3.7,1.5,0.4,Iris-setosa
148,6.2,3.4,5.4,2.3,Iris-virginica
109,7.2,3.6,6.1,2.5,Iris-virginica


In [5]:
X_C1 = data.loc[:, 'type'] == 'Iris-setosa'
X_C2 = data.loc[:, 'type'] == 'Iris-virginica'

In [6]:
X_C1

70     False
28      True
117    False
24      True
9       True
125    False
43      True
21      True
148    False
109    False
59     False
49      True
44      True
2       True
136    False
98     False
7       True
37      True
145    False
79     False
69     False
40      True
72     False
127    False
133    False
132    False
12      True
92     False
120    False
149    False
       ...  
71     False
19      True
55     False
33      True
0       True
41      True
73     False
126    False
130    False
99     False
85     False
108    False
58     False
140    False
115    False
32      True
22      True
75     False
124    False
101    False
106    False
39      True
64     False
5       True
53     False
66     False
78     False
113    False
38      True
4       True
Name: type, Length: 150, dtype: bool

In [7]:
N        = 10
FV       = ['sepal_l','sepal_w','petal_l','petal_w']
X_Train  = pd.concat( [ data.loc[ X_C1, FV][:-N],  data.loc[ X_C2, FV][:-N] ]   )
X_Test   = pd.concat( [ data.loc[ X_C1, FV][ -N:], data.loc[ X_C2, FV][ -N:] ]  )

Y_Train = -np.ones( X_C1.sum()-N )
Y_Train =  np.concatenate( (Y_Train,np.ones( X_C2.sum()-N ) )  )

Y_Test = -np.ones( N )
Y_Test =  np.concatenate( (Y_Test,np.ones( N ) )  )

In [8]:
X_Train = X_Train.values
ext     = np.ones( (X_Train.shape[0],1) )
X_Train = np.concatenate( (X_Train,ext),axis=1 )

X_Test = X_Test.values
ext    = np.ones( (X_Test.shape[0],1) )
X_Test = np.concatenate( (X_Test,ext),axis=1 )

### Crear etiquetas

$$\textbf{W}=(\textbf{X}^T\textbf{X})^{-1}\textbf{X}^T\textbf{y}$$
$$\textbf{W}=(\textbf{X}^T\textbf{X}+\lambda\textbf{I})^{-1}\textbf{X}^T\textbf{y}$$

In [9]:
inv = np.dot( X_Train.T, X_Train )
inv = np.linalg.inv( inv )
W   = np.dot( inv,X_Train.T )
W   = np.dot( W,Y_Train )
W

array([-0.10858015, -0.19014452,  0.32929616,  0.39570001, -0.37648454])

In [10]:
L_es = np.dot( X_Test,W )
L_es = -2*(L_es<0)+1

### Implementación en sistema embebido

In [111]:
X_Test

array([[4.6, 3.6, 1. , 0.2, 1. ],
       [4.8, 3.1, 1.6, 0.2, 1. ],
       [5. , 3. , 1.6, 0.2, 1. ],
       [4.9, 3.1, 1.5, 0.1, 1. ],
       [4.8, 3. , 1.4, 0.3, 1. ],
       [5.8, 4. , 1.2, 0.2, 1. ],
       [5.4, 3.4, 1.5, 0.4, 1. ],
       [4.4, 3.2, 1.3, 0.2, 1. ],
       [5.2, 3.5, 1.5, 0.2, 1. ],
       [5. , 3.5, 1.3, 0.3, 1. ],
       [6.3, 2.5, 5. , 1.9, 1. ],
       [7.7, 2.6, 6.9, 2.3, 1. ],
       [7.2, 3.6, 6.1, 2.5, 1. ],
       [7.7, 2.8, 6.7, 2. , 1. ],
       [6.7, 3. , 5.2, 2.3, 1. ],
       [6. , 3. , 4.8, 1.8, 1. ],
       [6.2, 3.4, 5.4, 2.3, 1. ],
       [5.7, 2.5, 5. , 2. , 1. ],
       [6.5, 3. , 5.5, 1.8, 1. ],
       [6.7, 3.1, 5.6, 2.4, 1. ]])

In [114]:
W

array([-0.1181069 , -0.1672677 ,  0.3704336 ,  0.32675174, -0.4465346 ])

In [116]:
X_Test = np.array([[4.6, 3.6, 1. , 0.2, 1. ],
       [4.8, 3.1, 1.6, 0.2, 1. ],
       [5. , 3. , 1.6, 0.2, 1. ],
       [4.9, 3.1, 1.5, 0.1, 1. ],
       [4.8, 3. , 1.4, 0.3, 1. ],
       [5.8, 4. , 1.2, 0.2, 1. ],
       [5.4, 3.4, 1.5, 0.4, 1. ],
       [4.4, 3.2, 1.3, 0.2, 1. ],
       [5.2, 3.5, 1.5, 0.2, 1. ],
       [5. , 3.5, 1.3, 0.3, 1. ],
       [6.3, 2.5, 5. , 1.9, 1. ],
       [7.7, 2.6, 6.9, 2.3, 1. ],
       [7.2, 3.6, 6.1, 2.5, 1. ],
       [7.7, 2.8, 6.7, 2. , 1. ],
       [6.7, 3. , 5.2, 2.3, 1. ],
       [6. , 3. , 4.8, 1.8, 1. ],
       [6.2, 3.4, 5.4, 2.3, 1. ],
       [5.7, 2.5, 5. , 2. , 1. ],
       [6.5, 3. , 5.5, 1.8, 1. ],
       [6.7, 3.1, 5.6, 2.4, 1. ]])

W = np.array([-0.1181069 , -0.1672677 ,  0.3704336 ,  0.32675174, -0.4465346 ])

In [130]:
for x_test in X_Test:
    l = np.dot( x_test,W )
    print '{0:2.2f},\t{1}'.format( l,  -2*(l<0)+1 )

-1.16,	-1
-0.87,	-1
-0.88,	-1
-0.96,	-1
-0.90,	-1
-1.29,	-1
-0.97,	-1
-0.95,	-1
-1.03,	-1
-1.04,	-1
0.86,	1
1.52,	1
1.18,	1
1.31,	1
0.94,	1
0.71,	1
1.00,	1
0.97,	1
0.91,	1
1.10,	1


In [15]:
model = LDA( shrinkage='auto', solver='eigen')
model.fit( X_Train[:,:-1],Y_Train )

LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage='auto',
              solver='eigen', store_covariance=False, tol=0.0001)

In [16]:
L_es = model.predict( X_Test[:,:-1] )
L_es

array([-1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.])

In [137]:
cmx( Y_Test,L_es )

array([[10,  0],
       [ 0, 10]])

In [22]:
print( 'W={},b={}'.format(model.coef_,model.intercept_)  )

W=[[-0.38246903 -1.24827654  1.93451445  2.6868256 ]],b=[-3.69436187]


### Entrenar

In [138]:
from sklearn.externals import joblib

In [140]:
joblib.dump(model, 'model_lda.pkl') 

['model_lda.pkl']

In [144]:
X_Test[:,:-1]

array([[4.6, 3.6, 1. , 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5. , 3. , 1.6, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [4.8, 3. , 1.4, 0.3],
       [5.8, 4. , 1.2, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [4.4, 3.2, 1.3, 0.2],
       [5.2, 3.5, 1.5, 0.2],
       [5. , 3.5, 1.3, 0.3],
       [6.3, 2.5, 5. , 1.9],
       [7.7, 2.6, 6.9, 2.3],
       [7.2, 3.6, 6.1, 2.5],
       [7.7, 2.8, 6.7, 2. ],
       [6.7, 3. , 5.2, 2.3],
       [6. , 3. , 4.8, 1.8],
       [6.2, 3.4, 5.4, 2.3],
       [5.7, 2.5, 5. , 2. ],
       [6.5, 3. , 5.5, 1.8],
       [6.7, 3.1, 5.6, 2.4]])

In [141]:
from sklearn.externals import joblib

In [143]:
model = joblib.load('model_lda.pkl')

In [146]:
X_Test = np.array([[4.6, 3.6, 1. , 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5. , 3. , 1.6, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [4.8, 3. , 1.4, 0.3],
       [5.8, 4. , 1.2, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [4.4, 3.2, 1.3, 0.2],
       [5.2, 3.5, 1.5, 0.2],
       [5. , 3.5, 1.3, 0.3],
       [6.3, 2.5, 5. , 1.9],
       [7.7, 2.6, 6.9, 2.3],
       [7.2, 3.6, 6.1, 2.5],
       [7.7, 2.8, 6.7, 2. ],
       [6.7, 3. , 5.2, 2.3],
       [6. , 3. , 4.8, 1.8],
       [6.2, 3.4, 5.4, 2.3],
       [5.7, 2.5, 5. , 2. ],
       [6.5, 3. , 5.5, 1.8],
       [6.7, 3.1, 5.6, 2.4]])

### Parametros adicionales

In [158]:
for x_test,y_test in zip( X_Test,Y_Test ):
    l = model.predict( x_test[np.newaxis] )
    print '{0:2.2f},\t{1:2.2f}'.format( y_test,l[0] )

-1.00,	-1.00
-1.00,	-1.00
-1.00,	-1.00
-1.00,	-1.00
-1.00,	-1.00
-1.00,	-1.00
-1.00,	-1.00
-1.00,	-1.00
-1.00,	-1.00
-1.00,	-1.00
1.00,	1.00
1.00,	1.00
1.00,	1.00
1.00,	1.00
1.00,	1.00
1.00,	1.00
1.00,	1.00
1.00,	1.00
1.00,	1.00
1.00,	1.00


## 2.2. Cargar archivo [Human Activity Recognition with Smartphones (Simplified)](https://www.kaggle.com/mboaglio/simplifiedhuarus)

In [None]:
data = pd.read_csv( '../data/wearable-train.csv' )

## 2.3. Despliegue de los datos

In [None]:
data

## Análisis

In [None]:
data.describe()

### Seleccionando aceleración media del cuerpo en los tres ejes

In [161]:
FV = ['tBodyAcc.mean.X', 'tBodyAcc.mean.Y', 'tBodyAcc.mean.Z', 'tGravityAcc.mean.X', 'tGravityAcc.mean.Y', 'tGravityAcc.mean.Z', 'tBodyGyro.mean.X', 'tBodyGyro.mean.Y', 'tBodyGyro.mean.Z','fBodyAcc.mean.X','fBodyAcc.mean.Y','fBodyAcc.mean.Z','angle.X.gravityMean','angle.Y.gravityMean','angle.Z.gravityMean']

## Clasificar entre caminar y estar acostado