###  Importing the necessary libraries:

In [1]:
import tensorflow as tf
import sklearn
from sklearn import model_selection, datasets, preprocessing
from sklearn.preprocessing import StandardScaler
import pandas as pd
import matplotlib.pyplot as plt

# A. Iris Classification Model

### 1. Loading the Iris dataset:

In [6]:
x, y = datasets.load_iris (return_X_y=True) #Splitting the Iris data into x (features) and y (targets).

### 2. Splitting the data and prepare it for training:

In [8]:
#Splitting the data into 90% training set and 10% testing set.
x_train, x_test, y_train , y_test = model_selection.train_test_split(x,y, test_size = 0.1, random_state = 42, stratify=y) 

### 3. Performing data preprocessing:

In [None]:
#Using QuantileTransformer() function to perform non-parametric transformation, to map the data to a uniform distribution with values between 0 and 1, to facilitate the training process.
quantile_transformer = preprocessing.QuantileTransformer(random_state=42) 

#Using fit_transform() function on the training data to calculate the mean and variance.
x_train_trans = quantile_transformer.fit_transform(x_train) 
#Using transform() function on the test data, so we can use the same mean and variance from the one calculated using fit_transform() function.
x_test_trans = quantile_transformer.transform(x_test)

#Exploring the data instances and shape so we can build our network based on those informations.
print(x_train_trans.shape) 
print(y_train.shape)
print(x_test_trans.shape)
print(y_test.shape)

### 4. Encoding the target (y) values:

In [10]:
#Using one_hot() function encode the classes into a (0,1) to prevent the model from assuming a natural ordering between categories that may result in poor performance. 
y_train_onehot, y_test_onehot = tf.one_hot(y_train, depth=len(set(y_train))), tf.one_hot(y_test, depth=len(set(y_test)))

### 5. Building, Compiling and Training the classification model using deep neural networks:



In [12]:
#Deleting tensorflow and importing it again to prevent the model from the saving the weights after training.
del tf 
import tensorflow as tf 

tf.random.set_seed(42) #Setting the graph-level random seed to get the same random numbers at every session (reproducible results). 

#Building the model using tensorflow's Sequential API for easier implementation.
model = tf.keras.Sequential([
                             tf.keras.layers.Input(shape=[4]), #Input layer with 4 units to hold the 4 features from the dataset.
                             tf.keras.layers.Dense(10, activation=tf.nn.leaky_relu,), #Hidden layer with 10 units to be trained, with leaky_relu to eleminate linearity.
                             tf.keras.layers.Dense(3, activation=tf.nn.softmax) #Output layer with 3 units to represent the 3 classes of the dataset.
])

In [None]:
#Compiling the model by using Adam() as the optimizer and CategoricalCrossentropy() as this is a multi-class problem.
model.compile(optimizer=tf.optimizers.Adam(), loss=tf.losses.CategoricalCrossentropy(), metrics=['acc'])

#Training the model.
history = model.fit(x=x_train_trans, y=y_train_onehot, batch_size = 16, epochs=250, validation_data=(x_test_trans, y_test_onehot))

### 6. Plotting the results:

In [None]:
#Plotting the results to look for overfitting within the results.
#Converting the results into a data frame using Pandas tool.
pd.DataFrame(history.history).plot(figsize=(10, 7))

#Plotting the data frame using matplotlib library.
plt.grid(True)
plt.gca().set_ylim(0, 1.2) #Setting the x-axis.
plt.gca().set_xlim(0,249) #Setting the y-axis.
                             
plt.show()

### 7. Model evaluation:

In [None]:
#Evaluating the model on the test (validation) data samples.
print('Model Loss and Accuracy:', model.evaluate(x_test_trans, y_test_onehot))

### Notes:

1. StandardScaler() was not used, as the model did not generalize well.

2. QuantileTransformer() showed the best results.

3. Many architectures were used to train the model, (1 hidden, with 8 or 16 or 32 or 64), (2 hidden with 8 or 16 or 32 or 64)...etc. 

4. Different batche sizes were also used (16, 32, 64).

5. Using 10 units with one hidden layer and a batch of 16, showed the best results.

6. A network with only one hidden layer and two units was used, but it took 2500 epochs to reach a loss of 0.2 without overfitting.

7. Dropout layer was not used in the model, as it affected the results badly with every rate (0.1, 0.2, 0.3 ...1.0), the validation loss curve was noticed to experience a lot of spiking with the addition of Dropout layer.

8. Callbacks (EarlyStopping), was also not used, because the model was eventually converging after a certain point and the function was stopping the model at an early stage.

# B. Wine Classification Model

### 1. Loading the Wine dataset:

In [None]:
x, y = datasets.load_wine (return_X_y=True) #Splitting the Iris data into x (features) and y (targets).

### 2. Performing data normalization:

In [None]:
x = StandardScaler().fit(x).transform(x) #Normalizing the data to get the best results.

### 3. Splitting the data and prepare it for training:

In [None]:
#Splitting the data into 90% training set and 10% testing set.
x_train, x_test, y_train , y_test = model_selection.train_test_split(x,y, test_size = 0.1, random_state = 42, stratify=y)

#Exploring the data instances and shape so we can build our network based on those informations.
print(x_train.shape) 
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

### 4. Encoding the target (y) values:

In [None]:
#Using one_hot() function encode the classes into a (0,1) to prevent the model from assuming a natural ordering between categories that may result in poor performance. 
y_train_onehot, y_test_onehot = tf.one_hot(y_train, depth=len(set(y_train))), tf.one_hot(y_test, depth=len(set(y_test)))

### 5. Building, Compiling and Training the classification model using deep neural networks:

In [None]:
#Deleting tensorflow and importing it again to prevent the model from the saving the weights after training.
del tf 
import tensorflow as tf 

tf.random.set_seed(42) #Setting the graph-level random seed to get the same random numbers at every session (reproducible results). 

#Building the model using tensorflow's Sequential API for easier implementation.
model = tf.keras.Sequential([
                             tf.keras.layers.Input(shape=[13]), #Input layer with 4 units to hold the 4 features from the dataset.
                             tf.keras.layers.Dense(8, activation=tf.nn.leaky_relu,), #Hidden layer with 10 units to be trained, with leaky_relu to eleminate linearity.
                             tf.keras.layers.Dense(3, activation=tf.nn.softmax) #Output layer with 3 units to represent the 3 classes of the dataset.
])

In [None]:
#Compiling the model by using Adam() as the optimizer and CategoricalCrossentropy() as this is a multi-class problem.
model.compile(optimizer=tf.optimizers.Adam(), loss=tf.losses.CategoricalCrossentropy(), metrics=['acc'])

#Training the model.
history = model.fit(x=x_train, y=y_train_onehot, batch_size = 64, epochs=250, validation_data=(x_test, y_test_onehot))

### 6. Plotting the results:

In [None]:
#Plotting the results to look for overfitting within the results.
#Converting the results into a data frame using Pandas tool.
pd.DataFrame(history.history).plot(figsize=(10, 7))

#Plotting the data frame using matplotlib library.
plt.grid(True)
plt.gca().set_ylim(0, 1.2) #Setting the x-axis.
plt.gca().set_xlim(0,249) #Setting the y-axis.
                             
plt.show()

### 7. Model evaluation:

In [None]:
#Evaluating the model on the test (validation) data samples.
print('Model Loss and Accuracy:', model.evaluate(x_test_trans, y_test_onehot))

### Notes:

1. Many architectures were used to train the model, (1 hidden, with 8 or 16 or 32 or 64), (2 hidden with 8 or 16 or 32 or 64)...etc.

2. Different batche sizes were also used (16, 32, 64).

3. Using 8 units with one hidden layer and a batch of 64, showed the best results.

4. Dropout layer was not used in the model, as it affected the results badly with every rate (0.1, 0.2, 0.3 ...1.0), the validation loss curve was noticed to experience a lot of spiking with the addition of Dropout layer.

5. Callbacks (EarlyStopping), was also not used, because the model was eventually converging after a certain point and the function was stopping the model at an early stage.