In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, MaxPooling2D, Conv2D, BatchNormalization, Dropout
from keras import initializers
from keras.optimizers import SGD, Adam, RMSprop
from keras.layers import LeakyReLU, ReLU
from keras.regularizers import l1, l2, l1_l2
from keras.utils.vis_utils import plot_model
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error as mse
from keras.datasets import mnist
from keras.utils import to_categorical
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report as CR
from sklearn.preprocessing import StandardScaler

### Question 1
Implement a neural network model for regression in the case of the concrete strength dataset (concrete.csv). The dependent variable y is the "strength" column, and the input variables are the rest of the columns. Consider measuring the external validity from a 5-fold cross-validation; in this exercise, you would have to implement your design of the neural network that includes experimentation with the number of layers, neurons per layer, choice of activation functions, kernel regularizations, etc. As guidance, you would know that your model is doing reasonably well if the external validity (average MSE on the test sets) is less than 40.

In [None]:
df = pd.read_csv('drive/MyDrive/Data Sets/concrete(1).csv')
df

Unnamed: 0,cement,slag,ash,water,superplastic,coarseagg,fineagg,age,strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.30
...,...,...,...,...,...,...,...,...,...
1025,276.4,116.0,90.3,179.6,8.9,870.1,768.3,28,44.28
1026,322.2,0.0,115.6,196.0,10.4,817.9,813.4,28,31.18
1027,148.5,139.4,108.6,192.7,6.1,892.4,780.0,28,23.70
1028,159.1,186.7,0.0,175.6,11.3,989.6,788.9,28,32.77


In [None]:
y = df.iloc[:,-1].values
X = df.iloc[:,:-1].values

In [None]:
kf = KFold(n_splits=5, shuffle=True, random_state=123) #setting up the kFold split

In [None]:
scores = []
for idxTrain, idxTest in kf.split(X):
  XTrain, yTrain = X[idxTrain], y[idxTrain]
  XTest, yTest = X[idxTest], y[idxTest]
  model = Sequential()
  model.add(Dense(20, use_bias=True, input_dim=X.shape[1], activation=LeakyReLU(alpha=0.001))) # first layer
  model.add(Dense(10, use_bias=True, input_dim=X.shape[1], activation=LeakyReLU(alpha=0.001))) # second layer
  model.add(Dense(5,use_bias = True, activation=LeakyReLU(alpha=0.001))) # third layer
  model.add(Dense(1, use_bias=True, activation=LeakyReLU(alpha=0.001),input_dim=X.shape[1])) # fourth layer
  model.add(Activation('linear'))
  opt = Adam(clipvalue=0.5)
  model.compile(loss='mse',optimizer=opt)
  model.fit(XTrain, yTrain, epochs = 1000, batch_size = 8, verbose = 0)
  scores.append(model.evaluate(XTest, yTest))



In [None]:
print(np.mean(scores)) #average mse

44.98619689941406


### Question 2
Based on the class examples, design and implement a convolutional neural network model for classification in the case of the MNIST dataset and measure the validity in the test sets. The average accuracy on the test sets is expected to be at least 99%.

In [None]:
from tensorflow.keras.datasets import mnist

In [None]:
(XTrain, yTrain), (XTest, yTest) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
#function to create new cnn model every iteration
def createModel():
    model = Sequential()
    model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(units=128, activation='relu'))
    model.add(Dense(units=10, activation='softmax'))
    opt = Adam()
    model.compile(optimizer = opt, loss = 'categorical_crossentropy', metrics = 'accuracy')
    return model

In [None]:
#reshaping the data
xtrain = XTrain.reshape((XTrain.shape[0], 28, 28, 1))
xtest = XTrain.reshape((XTrain.shape[0], 28, 28, 1))
ytrain = to_categorical(yTrain, 10)
ytest = to_categorical(yTest, 10)

In [None]:
scale = StandardScaler()

In [None]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=123)

#train and measure the models accuracy using k-fold cross-validation
accuracies = []
for train_index, test_index in skf.split(xtrain, ytrain.argmax(axis=1)):
    x_train_fold, x_test_fold = xtrain[train_index], xtrain[test_index]
    y_train_fold, y_test_fold = ytrain[train_index], ytrain[test_index]
    model = createModel()
    model.fit(x_train_fold, y_train_fold, epochs=5, batch_size=32, verbose=0)
    accuracy = model.evaluate(x_test_fold, y_test_fold, verbose=0)[1]
    accuracies.append(accuracy)

Average accuracy: 97.40%


In [None]:
print(np.mean(accuracies))

0.9818750023841858
