# Trabajo integrador - Parte 1
## Python y Numpy

**Nombre**:

In [None]:
import numpy as np
import pandas as pd
from tabulate import tabulate


## Ejercicio 1

Dada una matriz en formato *numpy array*, donde cada fila de la matriz representa un vector matemático, se requiere computar las normas $l_0$, $l_1$, $l_2$, $l_{\infty}$, según la siguientes definiciones:

\begin{equation}
    ||\mathbf{x}||^{p} = \bigg(\sum_{j=1}^{n}{|x_i|^p}\bigg)^{\frac{1}{p}}
\end{equation}

con los casos especiales para $p=0$ y $p=\infty$ siendo:

\begin{equation}
    \begin{array}{rcl}
        ||\mathbf{x}||_0 & = & \bigg(\sum_{j=1 \wedge x_j != 0}{|x_i|}\bigg)\\
        ||\mathbf{x}||_{\infty} & = & \max_{i}{|x_i|}\\
    \end{array}
\end{equation}

In [None]:

def calc_norms(arr):
    l0_norm = np.sum(arr != 0, axis=1)
    l1_norm = np.sum(np.abs(arr), axis=1)
    l2_norm = np.linalg.norm(arr, axis=1)
    linf_norm = np.max(np.abs(arr), axis=1)

    return l0_norm, l1_norm, l2_norm, linf_norm

arr_ejemplo = np.array([
    [0, 2, 0, -10],
    [2, -2, 5, -4],
    [1, -1, 1, 1]
])

l0, l1, l2, linf = calc_norms(arr_ejemplo)

print("Norma l0:", l0)
print("Norma l1:", l1)
print("Norma l2:", l2)
print("Norma linf:", linf)

Norma l0: [2 4 4]
Norma l1: [12 13  4]
Norma l2: [10.19803903  7.          2.        ]
Norma linf: [10  5  1]


## Ejercicio 2

En clasificación contamos con dos arreglos, la “verdad” y la “predicción”. Cada elemento de los arreglos pueden tomar dos valores, “True” (representado por 1) y “False” (representado por 0). Entonces podemos definir 4 variables:

* True Positive (TP): El valor verdadero es 1 y el valor predicho es 1
* True Negative (TN): El valor verdadero es 0 y el valor predicho es 0
* False Positive (FP): El valor verdadero es 0 y el valor predicho es 1
* False Negative (FN): El valor verdadero es 1 y el valor predicho es 0

A partir de esto definimos:

* Precision = TP / (TP + FP)
* Recall = TP / (TP + FN)
* Accuracy = (TP + TN) / (TP + TN + FP + FN)

Calcular las 3 métricas con Numpy y operaciones vectorizadas.

In [None]:
truth = np.array([1,1,0,1,1,1,0,0,0,1])
prediction = np.array([1,1,1,1,0,0,1,1,0,0])

def calc_pra(true, pred):

    # Calculamos TP, TN, FP, FN
    TP = np.sum((true == 1) & (pred == 1))
    TN = np.sum((true == 0) & (pred == 0))
    FP = np.sum((true == 0) & (pred == 1))
    FN = np.sum((true == 1) & (pred == 0))

    # Calculamos Precision, Recall y Accuracy
    precision = TP / (TP + FP) if (TP + FP) != 0 else 0
    recall = TP / (TP + FN) if (TP + FN) != 0 else 0
    accuracy = (TP + TN) / (TP + TN + FP + FN)

    return precision, recall, accuracy


precision, recall, accuracy = calc_pra(truth, prediction)

print(f"El total de datos del array truth es: {truth.size}")
print(f"El total de datos del array prediction es: {prediction.size}")
print(f"La metrica Precision es: {precision:.2f}")
print(f"La metrica Recall es: {recall:.2f}")
print(f"La metrica Accuracy es: {accuracy:.2f}")


El total de datos del array truth es: 10
El total de datos del array prediction es: 10
La metrica Precision es: 0.50
La metrica Recall es: 0.50
La metrica Accuracy es: 0.40


## Ejercicio 3

Crear una función que separe los datos en train-validation-test. Debe recibir de parametros:

- X: Array o Dataframe que contiene los datos de entrada del sistema.
- y: Array o Dataframe que contiene la(s) variable(s) target del problema.
- train_percentage: _float_ el porcentaje de training.
- test_percentage: _float_ el porcentaje de testing.
- val_percentage: _float_ el porcentaje de validación.
- shuffle: _bool_ determina si el split debe hacerse de manera random o no.

Hints:

* Usar Indexing y slicing
* Usar np.random.[...]

In [None]:
def split(X_input,
          Y_input,
          train_size=0.7,
          val_size=0.15,
          test_size=0.15,
          random_state=42,
          shuffle=True):

    assert  train_size+val_size+test_size==1, "Sumatoria de % splits debe ser = 1"

    if shuffle:
        indx = np.random.RandomState(seed=random_state).permutation(len(X))
    else:
        indx = np.arange(len(X))

    train_set = int(train_size * len(X))
    val_set = train_set + int(val_size * len(X))

    train_indx = indx[:train_set]
    val_indx = indx[train_set:val_set]
    test_indx = indx[val_set:]

    if isinstance(X_input, type(np.array(0))):
        X_input = pd.DataFrame(X_input).T
    if isinstance(Y_input, type(np.array(0))):
        Y_input = pd.DataFrame(Y_input)


    X_train = X_input.iloc[train_indx]
    y_train = Y_input.iloc[train_indx]

    X_val = X_input.iloc[val_indx]
    y_val = Y_input.iloc[val_indx]

    X_test = X_input.iloc[test_indx]
    y_test = Y_input.iloc[test_indx]

    return X_train, y_train, X_val, y_val, X_test, y_test


In [None]:
# Ejemplo de uso:
"""
X, y son dataframes
X2, y2 son arrays
"""
X = pd.DataFrame({
    'feature1': [0,1, 2, 3, 4, 5, 6, 7, 8, 9, 10,11, 12, 13, 14, 15, 16, 17, 18, 19],
    'feature2': [20,21, 22, 23, 24, 25, 26, 27, 28, 29, 30,31, 32, 33, 34, 35, 36, 37, 38, 39]
})
X2 = np.array([[0,1, 2, 3, 4, 5, 6, 7, 8, 9, 10,11, 12, 13, 14, 15, 16, 17, 18, 19],[20,21, 22, 23, 24, 25, 26, 27, 28, 29, 30,31, 32, 33, 34, 35, 36, 37, 38, 39]])

y =  pd.DataFrame({'class':[0,1, 0, 1, 0, 1, 0, 1, 0, 1, 0,1, 0, 1, 0, 1, 0, 1, 0, 1]}) #par = 0 / impar = 1
y2 =  np.array([0,1, 0, 1, 0, 1, 0, 1, 0, 1, 0,1, 0, 1, 0, 1, 0, 1, 0, 1]) #par = 0 / impar = 1

X_train, y_train, X_val, y_val, X_test, y_test = split(X, y2)


In [None]:
print("Training set:")
print(tabulate(pd.concat([X_train,y_train], axis=1), headers=("indx","feature1","feature2","class"), tablefmt='pretty',showindex=True))

print("\nValidation set:")
print(tabulate(pd.concat([X_train,y_train], axis=1), headers=("indx","feature1","feature2","class"), tablefmt='pretty',showindex=True))

print("\nTesting set:")
print(tabulate(pd.concat([X_train,y_train], axis=1), headers=("indx","feature1","feature2","class"), tablefmt='pretty',showindex=True))

Training set:
+------+----------+----------+-------+
| indx | feature1 | feature2 | class |
+------+----------+----------+-------+
|  0   |    0     |    20    |   0   |
|  17  |    17    |    37    |   1   |
|  15  |    15    |    35    |   1   |
|  1   |    1     |    21    |   1   |
|  8   |    8     |    28    |   0   |
|  5   |    5     |    25    |   1   |
|  11  |    11    |    31    |   1   |
|  3   |    3     |    23    |   1   |
|  18  |    18    |    38    |   0   |
|  16  |    16    |    36    |   0   |
|  13  |    13    |    33    |   1   |
|  2   |    2     |    22    |   0   |
|  9   |    9     |    29    |   1   |
|  19  |    19    |    39    |   1   |
+------+----------+----------+-------+

Validation set:
+------+----------+----------+-------+
| indx | feature1 | feature2 | class |
+------+----------+----------+-------+
|  0   |    0     |    20    |   0   |
|  17  |    17    |    37    |   1   |
|  15  |    15    |    35    |   1   |
|  1   |    1     |    21    |   