prepare input data for deep learning 

perform the following steps for preparing data 

1. load data into pandas dataframe
2. convert the dataframe
3. scale the feature dataset
4. use on-hot-encoding for the target variable
5. split into training and test datasets


In [None]:

import pandas as pd 
import os
import tensorflow as tf
import keras
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import preprocessing

# load data and review content
iris_data = pd.read_csv("iris.csv")

print("\nLoaded Data :\n---")
print(iris_data.head())

# use a label encoder to convert string to numeric values
# for the target variable

label_encoder = preprocessing.LabelEncoder()
iris_data['Species'] = label_encoder.fit_transform(iris_data['Species'])

# convert input to numpy array
np_iris = iris_data.to_numpy()

# separate feature and target variables
x_data = np_iris[:,0:4]
y_data = np_iris[:,4]

print("\nFeatures before scaling :\n---")
print(x_data[:5,:])
print("\nTarget before scaling :\n---")
print(y_data[:5])

# create a scaler model that is fit on the input data
scaler = StandardScaler().fit(x_data)

# scale the numeric feature variables
x_data = scaler.transform(x_data)

# convert target variable as on-hot-encoding array
y_data = keras.utils.to_categorical(y_data,3)

print("\nFeatures after scaling :\n---")
print(x_data[:5,:])
print("\nTarget after one-hot-encoding :\n---")
print(y_data[:5,:])

# split training and test data
x_train,x_test,y_train,y_test = train_test_split( x_data, y_data, test_size=0.10 )

print("\nTrain Test Dimensions:\n---")
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)