# Librerías

In [1]:
#Importamos las librerías estándar
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from google.colab import drive

In [2]:
#Importamos las librerías de Keras
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.optimizers import Adam, SGD

# Conexión al repositorio de datos

In [3]:
#Accedemos a Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


# Lectura de datos

In [4]:
#Por ejemplo tenemos una columna de Gender con "Male" y "Female"
df = pd.read_csv('/content/drive/MyDrive/Data/weight-height.csv')
df.head()

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.04247
4,Male,69.881796,206.349801


# Conversión de datos categóricos a datos numéricos

In [None]:
#Las redes neuronales solo entienden de numero, no de categorias
#Hay que transfomar las categorías a números

In [5]:
#Vemos que en toda la data para la columna "Gender" hay dos categorías
df['Gender'].unique()

array(['Male', 'Female'], dtype=object)

In [6]:
#Obtendremos columnas dummies, se creara una columna para cada categoria, si el registro pertenece a una categoria, se colocará 1
#A cada columna le coloca el prefijo que asignemos
#Notemos como la columna con data categórica se ha eliminado
df2 = pd.get_dummies(df, columns = ['Gender'], prefix='Gender')
df2

Unnamed: 0,Height,Weight,Gender_Female,Gender_Male
0,73.847017,241.893563,0,1
1,68.781904,162.310473,0,1
2,74.110105,212.740856,0,1
3,71.730978,220.042470,0,1
4,69.881796,206.349801,0,1
...,...,...,...,...
9995,66.172652,136.777454,1,0
9996,67.067155,170.867906,1,0
9997,63.867992,128.475319,1,0
9998,69.034243,163.852461,1,0


# Escalamiento de los datos

In [7]:
#Las redes neuronales funcionan mejor con rangos de valores entre 0 y 1
#Vamos convertir los datos a un equivalente de datos entre 0 y 1

In [8]:
#Importamos el escalador
from sklearn.preprocessing import MinMaxScaler

In [9]:
#Instanciamos el escalador
mms = MinMaxScaler()

In [10]:
#Transformamos las columnas con el escalador

#En el dataframe 2, creamos una nueva columna llamada "Weight_mms" de datos escalados
df2['Weight_mms'] = mms.fit_transform(df[['Weight']])
df2

Unnamed: 0,Height,Weight,Gender_Female,Gender_Male,Weight_mms
0,73.847017,241.893563,0,1,0.863139
1,68.781904,162.310473,0,1,0.475476
2,74.110105,212.740856,0,1,0.721131
3,71.730978,220.042470,0,1,0.756699
4,69.881796,206.349801,0,1,0.689999
...,...,...,...,...,...
9995,66.172652,136.777454,1,0,0.351101
9996,67.067155,170.867906,1,0,0.517161
9997,63.867992,128.475319,1,0,0.310660
9998,69.034243,163.852461,1,0,0.482988


In [11]:
#En el dataframe 2, creamos una nueva columna llamada "Height_mms" de datos escalados
df2['Height_mms'] = mms.fit_transform(df[['Height']])
df2

Unnamed: 0,Height,Weight,Gender_Female,Gender_Male,Weight_mms,Height_mms
0,73.847017,241.893563,0,1,0.863139,0.791728
1,68.781904,162.310473,0,1,0.475476,0.586958
2,74.110105,212.740856,0,1,0.721131,0.802364
3,71.730978,220.042470,0,1,0.756699,0.706182
4,69.881796,206.349801,0,1,0.689999,0.631424
...,...,...,...,...,...,...
9995,66.172652,136.777454,1,0,0.351101,0.481473
9996,67.067155,170.867906,1,0,0.517161,0.517635
9997,63.867992,128.475319,1,0,0.310660,0.388301
9998,69.034243,163.852461,1,0,0.482988,0.597160


In [12]:
#Finalmente, dropeamos las columnas no escaladas
del df2['Height']
del df2['Weight']

In [13]:
#Mostramos el dataframe final
df2

Unnamed: 0,Gender_Female,Gender_Male,Weight_mms,Height_mms
0,0,1,0.863139,0.791728
1,0,1,0.475476,0.586958
2,0,1,0.721131,0.802364
3,0,1,0.756699,0.706182
4,0,1,0.689999,0.631424
...,...,...,...,...
9995,1,0,0.351101,0.481473
9996,1,0,0.517161,0.517635
9997,1,0,0.310660,0.388301
9998,1,0,0.482988,0.597160
