In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!cp /content/drive/MyDrive/lab/TestSet-withLabels.xlsx /content/dataset/
!cp /content/drive/MyDrive/lab/TrainingSet.xlsx /content/dataset/

## Import datasets

In [None]:
import numpy as np
import pandas as pd

In [None]:
train = pd.read_excel('dataset/TrainingSet.xlsx', engine = 'openpyxl')
test = pd.read_excel('dataset/TestSet-withLabels.xlsx', engine = 'openpyxl')

In [None]:
train.head()

Unnamed: 0,leaf.length,leaf.width,flower.length,flower.width,plant
0,5.4,3.7,1.5,0.2,Arctica
1,4.8,3.4,1.6,0.2,Arctica
2,4.8,3.0,1.4,0.1,Arctica
3,4.3,3.0,1.1,0.1,Arctica
4,5.8,4.0,1.2,0.2,Arctica


In [None]:
test.head()

Unnamed: 0,leaf.length,leaf.width,flower.length,flower.width,plant
0,4.4,2.9,1.4,0.2,Arctica
1,4.6,3.1,1.5,0.2,Arctica
2,4.6,3.4,1.4,0.3,Arctica
3,4.7,3.2,1.3,0.2,Arctica
4,4.9,3.0,1.4,0.2,Arctica


In [None]:
len(train.index)

120

## Processing datasets

Includes standard scaling of numerical values and one hot encoding of target variable (plant type).

In [None]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [None]:
plant_num = train.drop("plant", axis=1)
num_pipeline = Pipeline([
        ('std_scaler', StandardScaler()),
    ])
plant_num_tr = num_pipeline.fit_transform(plant_num)

In [None]:
num_attribs = list(plant_num)
cat_attribs = ["plant"]

full_pipeline = ColumnTransformer([
        ("num", num_pipeline, num_attribs),
        ("cat", OneHotEncoder(), cat_attribs),
    ])
plant_prepared = full_pipeline.fit_transform(train)
plant_X = plant_prepared[:, 0:4].astype(float)
plant_Y = plant_prepared[:, 4:]

## Creating Neural Network

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
model = keras.Sequential(
[
    layers.Dense(30, activation ="relu"),
    layers.Dense(30, activation ="relu"),
    layers.Dense(30, activation ="relu"),
    layers.Dense(30, activation ="relu"),
    layers.Dense(3, activation = "softmax")    
])

In [None]:
model.compile(optimizer="adam", loss='categorical_crossentropy', metrics=["accuracy"]) #try changing optimizers

In [None]:
model.fit(plant_X, plant_Y, epochs=20, validation_split=0.2, batch_size=15)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f981a0f0f50>

## Process Test set before evaluating model

In [None]:
plant_test = full_pipeline.transform(test)

plant_test_X = plant_test[:, 0:4].astype(float)
plant_test_Y = plant_test[:, 4:]

model.evaluate(plant_test_X, plant_test_Y)



[0.22079992294311523, 0.9666666388511658]

In [None]:
predictions = model.predict(plant_test_X)
predictions

array([[9.9843627e-01, 6.1664107e-04, 9.4704417e-04],
       [9.9882835e-01, 5.1229651e-04, 6.5936375e-04],
       [9.9942517e-01, 2.6918703e-04, 3.0566542e-04],
       [9.9915016e-01, 3.8450584e-04, 4.6536259e-04],
       [9.9750358e-01, 1.0354965e-03, 1.4609876e-03],
       [9.9839741e-01, 6.9472310e-04, 9.0787240e-04],
       [1.0430496e-02, 3.7340265e-02, 9.5222920e-01],
       [2.9122152e-03, 1.2957321e-01, 8.6751455e-01],
       [9.9938881e-01, 2.7597006e-04, 3.3523492e-04],
       [9.9893171e-01, 4.8246823e-04, 5.8576383e-04],
       [9.9906701e-01, 4.1784736e-04, 5.1521783e-04],
       [1.1546876e-02, 7.7842399e-02, 9.1061074e-01],
       [9.9871337e-01, 4.7573028e-04, 8.1093714e-04],
       [6.4130273e-04, 5.7354778e-02, 9.4200391e-01],
       [8.2690213e-03, 9.8502435e-02, 8.9322859e-01],
       [1.8558435e-03, 5.7761359e-01, 4.2053056e-01],
       [1.3873868e-02, 4.1099134e-01, 5.7513481e-01],
       [2.3602064e-04, 9.6914434e-01, 3.0619631e-02],
       [1.2727907e-03, 6.403

In [None]:
predictions = model.predict_classes(plant_test_X, verbose = 1)
predictions





array([0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 2, 0, 2, 2, 1, 2, 1, 1, 2, 2, 1,
       2, 1, 2, 2, 1, 1, 1, 1])

In [None]:
df = pd.DataFrame(predictions, columns = ['plant_pred']).replace({0:'Artica', 1:'Carolinian', 2:'Harlequin'})
df.join(test['plant'])

Unnamed: 0,plant_pred,plant
0,Artica,Arctica
1,Artica,Arctica
2,Artica,Arctica
3,Artica,Arctica
4,Artica,Arctica
5,Artica,Arctica
6,Harlequin,Harlequin
7,Harlequin,Carolinian
8,Artica,Arctica
9,Artica,Arctica
