In [32]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from collections import Counter

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers


### Import the data file into a pandas dataframe for some investigation

In [33]:
df = pd.read_csv('cover_data.csv')
#print(df.head())
#print(df.info())
#print(df.columns)

### Isolate the features columns and the label column

In [34]:
x = df.iloc[:, :54]
#print(x.columns)

y = df.iloc[:, -1]
#print(y)
#use Counter() to see how many classes are in the labels column
print(len(Counter(y)))

7


### Splitting data into training and testing, followed by scaling
- split the data with train_test_split
- fit the StandardScaler to the training data only. this captures the mean and std dev of the training set
- transform the training_data and testing_data using the fitted StandardScaler

### Afterwards, the features data will be ready for ML

In [35]:
x_train, x_temp, y_train, y_temp = train_test_split(x, y, test_size=0.3, random_state=7)
x_val, x_test, y_val, y_test = train_test_split(x_temp, y_temp, test_size=0.5, random_state=7)

scaler = StandardScaler()
scaler.fit(x_train)
scaler.transform(x_train)
scaler.transform(x_test)
scaler.transform(x_val)

array([[ 0.05533431,  0.24487548, -1.35061829, ..., -0.16601016,
        -0.15589661, -0.12347927],
       [ 0.05891091, -1.02334781, -0.54827161, ..., -0.16601016,
        -0.15589661, -0.12347927],
       [ 0.23774059,  1.47737416, -1.08316939, ..., -0.16601016,
        -0.15589661, -0.12347927],
       ...,
       [-1.56843921,  0.13770168,  0.38779951, ..., -0.16601016,
        -0.15589661, -0.12347927],
       [ 0.64547227, -0.83579366,  0.52152396, ..., -0.16601016,
        -0.15589661, -0.12347927],
       [ 0.01956838,  1.18264621, -0.01337383, ..., -0.16601016,
        -0.15589661, -0.12347927]])

### Prepare the labels (y).  This columns has 7 classes, therefore use LabelEncoder

In [36]:
le = LabelEncoder()
y_train = le.fit_transform(y_train.astype(str))
y_test = le.transform(y_test.astype(str))
y_val = le.transform(y_val.astype(str))

int_mapping = {l: i for i, l in enumerate(le.classes_)}
print(int_mapping)

{'1': 0, '2': 1, '3': 2, '4': 3, '5': 4, '6': 5, '7': 6}


### Use Kera's to_categorical() to convert above labels as integers into one-hot-encoding

In [40]:
y_train = tf.keras.utils.to_categorical(y_train, dtype='int64')
y_test = tf.keras.utils.to_categorical(y_test, dtype='int64')
y_val = tf.keras.utils.to_categorical(y_val, dtype='int64')

### Create the Sequential model

In [41]:
from tensorflow.keras.layers import InputLayer, Dense

model = keras.Sequential()
model.add(InputLayer(input_shape=(x_train.shape[1],)))
model.add(Dense(8, activation='relu'))
model.add(Dense(7, activation='softmax'))


### Setting up the optimizer and compiling the model

In [42]:
from tensorflow.keras.optimizers import Adam


model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])


### Train and evaluate the classification model