## REGRESIÓN LINEAL MÚLTIPLE - IRIS

In [1]:
import numpy as np
from sys import float_info
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler, LabelBinarizer
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

### Carga de Datos, Escalado y muestra de etiquetas diferentes

In [2]:
iris = load_iris()
scaler = MinMaxScaler()

x = scaler.fit_transform(iris.data)#solo min max sobre x

np.unique(iris.target)

array([0, 1, 2])

In [3]:
np.min(x, axis=0), np.max(x, axis=0)

(array([0., 0., 0., 0.]), array([1., 1., 1., 1.]))

### Binarización de las Etiquetas

In [5]:
d = LabelBinarizer().fit(np.unique(iris.target)).transform(iris.target)#label fit unique transform
d = d.astype(float)


### Método de HOLD-OUT (2/3 - 1/3)

In [6]:
X_train, X_test, d_train, d_test = train_test_split(x, d, test_size=0.33, 
                                                    random_state=20, stratify=iris.target)

In [7]:
X_train.shape, X_test.shape, d_train.shape, d_test.shape

((100, 4), (50, 4), (100, 3), (50, 3))

### REGRESIÓN LINEAL DE RESPUESTA MÚLTIPLE

In [8]:
y_predict = float_info.max * np.ones(d_test.shape, dtype=float)#valor maximo float por tamano test
for i in range(d_test.shape[1]):
    regresion = LinearRegression().fit(X_train, d_train[:,i])#entreno en su posicion
    y_predict[:,i] = regresion.predict(X_test)#guardo preccion
y_predict_test = np.argmax(y_predict, axis=1)#vcojo el maximo

#array=maxfloat
#reg=train
#predic=test
#cojo maximo

### RESULTADOS RLM

In [9]:
#maximo de d maximo y predict
print("Tasa de aciertos =", accuracy_score(np.argmax(d_test, axis=1), np.argmax(y_predict, axis=1)))

print("\nMatriz de Confusión:\n", confusion_matrix(np.argmax(d_test, axis=1), np.argmax(y_predict, axis=1)))

Tasa de aciertos = 0.86

Matriz de Confusión:
 [[16  0  0]
 [ 0 10  7]
 [ 0  0 17]]


### K-FOLD

In [10]:
from sklearn.model_selection import StratifiedKFold
K = 10
kfold = StratifiedKFold(n_splits=K)

In [12]:
aciertos = np.zeros(K, dtype=float)
k = 0
print("ACIERTOS\n")
for train_index, test_index in kfold.split(x, iris.target):#recorro los separados: train,test
    
    y_test = np.zeros((len(test_index), 3), dtype=float)#matrriz con 0
    for i in range(3):
        regresion = LinearRegression().fit(x[train_index], d[train_index, i])#entreno con los 3
        y_test[:,i] = regresion.predict(x[test_index])#guardo preccion
    aciertos[k] = accuracy_score(iris.target[test_index], np.argmax(y_test, axis=1))# score con el maximo
    print(aciertos[k])
    k += 1
print("\n\nTasa de aciertos =", np.mean(aciertos))    



ACIERTOS

[  0   1   2   3   4  50  51  52  53  54 100 101 102 103 104]
0.7333333333333333
[  5   6   7   8   9  55  56  57  58  59 105 106 107 108 109]
0.8
[ 10  11  12  13  14  60  61  62  63  64 110 111 112 113 114]
0.8666666666666667
[ 15  16  17  18  19  65  66  67  68  69 115 116 117 118 119]
0.8
[ 20  21  22  23  24  70  71  72  73  74 120 121 122 123 124]
0.8666666666666667
[ 25  26  27  28  29  75  76  77  78  79 125 126 127 128 129]
0.7333333333333333
[ 30  31  32  33  34  80  81  82  83  84 130 131 132 133 134]
0.7333333333333333
[ 35  36  37  38  39  85  86  87  88  89 135 136 137 138 139]
0.8
[ 40  41  42  43  44  90  91  92  93  94 140 141 142 143 144]
0.9333333333333333
[ 45  46  47  48  49  95  96  97  98  99 145 146 147 148 149]
1.0


Tasa de aciertos = 0.8266666666666668


### REGRESIÓN LOGÍSTICA

In [28]:
regresion_logistica = LogisticRegression(solver='lbfgs', multi_class='auto').fit(x, iris.target)

In [29]:
y_test = regresion_logistica.predict(X_test)
print("Tasa de aciertos =", accuracy_score(np.argmax(d_test, axis=1), y_test))#maximo

Tasa de aciertos = 0.96


In [30]:
print("Matriz de Confusión:\n", confusion_matrix(np.argmax(d_test, axis=1), y_test))

Matriz de Confusión:
 [[16  0  0]
 [ 0 15  2]
 [ 0  0 17]]


### VALIDACIÓN CRUZADA

In [35]:
aciertos = np.zeros(K, dtype=float)#tamano vecinos
k = 0
print("ACIERTOS\n")
for train_index, test_index in kfold.split(x, iris.target):
    regresion_logistica = LogisticRegression(solver='lbfgs', multi_class='auto').fit(x[train_index], iris.target[train_index])
    y_test = regresion_logistica.predict(x[test_index])#prediccion
    aciertos[k] = accuracy_score(iris.target[test_index], y_test)#score
    print(aciertos[k])
    k += 1
print("\n\nTasa de aciertos =", np.mean(aciertos))

ACIERTOS

0.8
0.8666666666666667
1.0
0.9333333333333333
0.9333333333333333
0.9333333333333333
0.8666666666666667
0.8666666666666667
1.0
1.0


Tasa de aciertos = 0.9199999999999999
