## REGRESIÓN LINEAL MÚLTIPLE - IRIS

In [1]:
import numpy as np
from sys import float_info
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler, LabelBinarizer
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

### Carga de Datos, Escalado y muestra de etiquetas diferentes

In [2]:
iris = load_iris()
scaler = MinMaxScaler()

x = scaler.fit_transform(iris.data)#solo min max sobre x

np.unique(iris.target)

array([0, 1, 2])

In [3]:
np.min(x, axis=0), np.max(x, axis=0)

(array([0., 0., 0., 0.]), array([1., 1., 1., 1.]))

### Binarización de las Etiquetas

In [4]:
d = LabelBinarizer().fit(np.unique(iris.target)).transform(iris.target)#label fit unique transform
d = d.astype(float)
d
#[100
# 100
# 010
# 001]


array([[1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0

### Método de HOLD-OUT (2/3 - 1/3)

In [5]:
X_train, X_test, d_train, d_test = train_test_split(x, d, test_size=0.33, 
                                                    random_state=20, stratify=iris.target)

In [6]:
X_train.shape, X_test.shape, d_train.shape, d_test.shape

((100, 4), (50, 4), (100, 3), (50, 3))

### REGRESIÓN LINEAL DE RESPUESTA MÚLTIPLE

In [7]:
y_predict = float_info.max * np.ones(d_test.shape, dtype=float)#valor maximo float por tamano test
for i in range(d_test.shape[1]):#columnas
    regresion = LinearRegression().fit(X_train, d_train[:,i])#entreno en su posicion
    y_predict[:,i] = regresion.predict(X_test)#guardo preccion
y_predict_test = np.argmax(y_predict, axis=1)#vcojo el maximo

#predice cada casilla
print(y_predict)

#array=maxfloat
#reg=train
#predic=test
#cojo maximo

[[-0.03155233  0.6000388   0.43151353]
 [ 1.0198519   0.00285948 -0.02271137]
 [ 0.05761055  0.01517946  0.92721   ]
 [ 0.13478549  0.51872403  0.34649048]
 [-0.02258336  0.61902441  0.40355895]
 [ 0.17474356  0.53504841  0.29020803]
 [ 0.25532725  0.28349064  0.46118211]
 [ 0.21416817  0.29514663  0.49068519]
 [ 1.05382423  0.04466651 -0.09849073]
 [-0.23420458  0.43563543  0.79856915]
 [ 0.10272483  0.47399358  0.42328159]
 [ 0.92372418  0.19754412 -0.1212683 ]
 [ 0.88817876  0.23874306 -0.12692182]
 [ 0.9592696   0.15634517 -0.11561477]
 [-0.00302358  0.39147816  0.61154542]
 [-0.07925295  0.43869911  0.64055384]
 [ 0.08062807  0.32148512  0.5978868 ]
 [ 0.0736114   0.15412343  0.77226516]
 [ 0.14665394  0.4183671   0.43497896]
 [ 0.85153537  0.357314   -0.20884937]
 [ 0.83084163  0.34262428 -0.17346591]
 [ 1.10161019 -0.15191498  0.05030479]
 [ 0.16327361  0.45561605  0.38111034]
 [ 0.05514959  0.30429769  0.64055272]
 [ 0.90720755  0.20968679 -0.11689434]
 [-0.11500038  0.31409895

### RESULTADOS RLM

In [8]:
#maximo de d maximo y predict
print("Tasa de aciertos =", accuracy_score(np.argmax(d_test, axis=1), np.argmax(y_predict, axis=1)))

print("\nMatriz de Confusión:\n", confusion_matrix(np.argmax(d_test, axis=1), np.argmax(y_predict, axis=1)))

Tasa de aciertos = 0.86

Matriz de Confusión:
 [[16  0  0]
 [ 0 10  7]
 [ 0  0 17]]


### K-FOLD

In [9]:
from sklearn.model_selection import StratifiedKFold
K = 10
kfold = StratifiedKFold(n_splits=K)

In [10]:
#FYFLPA
aciertos = np.zeros(K, dtype=float)
k = 0
print("ACIERTOS\n")
for train_index, test_index in kfold.split(x, iris.target):#recorro los separados: train,test
    
    y_test = np.zeros((len(test_index), 3), dtype=float)#matrriz con 0
    for i in range(3):
        regresion = LinearRegression().fit(x[train_index], d[train_index, i])#entreno con los 3. solo la comulna
        y_test[:,i] = regresion.predict(x[test_index])#guardo preccion
    aciertos[k] = accuracy_score(iris.target[test_index], np.argmax(y_test, axis=1))# score con el maximo
    print(aciertos[k])
    k += 1
print("\n\nTasa de aciertos =", np.mean(aciertos))    



ACIERTOS

0.7333333333333333
0.8
0.8666666666666667
0.8
0.8666666666666667
0.7333333333333333
0.7333333333333333
0.8
0.9333333333333333
1.0


Tasa de aciertos = 0.8266666666666668


### REGRESIÓN LOGÍSTICA

In [28]:
regresion_logistica = LogisticRegression(solver='lbfgs', multi_class='auto').fit(x, iris.target)

In [29]:
y_test = regresion_logistica.predict(X_test)
print("Tasa de aciertos =", accuracy_score(np.argmax(d_test, axis=1), y_test))#maximo

Tasa de aciertos = 0.96


In [30]:
print("Matriz de Confusión:\n", confusion_matrix(np.argmax(d_test, axis=1), y_test))

Matriz de Confusión:
 [[16  0  0]
 [ 0 15  2]
 [ 0  0 17]]


### VALIDACIÓN CRUZADA

In [35]:
#FLPA
aciertos = np.zeros(K, dtype=float)#tamano vecinos
k = 0
print("ACIERTOS\n")
for train_index, test_index in kfold.split(x, iris.target):
    regresion_logistica = LogisticRegression(solver='lbfgs', multi_class='auto').fit(x[train_index], iris.target[train_index])
    y_test = regresion_logistica.predict(x[test_index])#prediccion
    aciertos[k] = accuracy_score(iris.target[test_index], y_test)#score
    print(aciertos[k])
    k += 1
print("\n\nTasa de aciertos =", np.mean(aciertos))

ACIERTOS

0.8
0.8666666666666667
1.0
0.9333333333333333
0.9333333333333333
0.9333333333333333
0.8666666666666667
0.8666666666666667
1.0
1.0


Tasa de aciertos = 0.9199999999999999
