# Task 1

In [None]:
from sklearn import datasets
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from keras.wrappers.scikit_learn import KerasClassifier, KerasRegressor
from sklearn.model_selection import GridSearchCV, StratifiedShuffleSplit
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

In [None]:
# Load the iris dataset

iris = datasets.load_iris()

In [None]:
# Store predictive features and the X and y variable

X = iris.data
y = iris.target

In [None]:
# Split the dataset into the training set and test set and take necessary transformations 

X_train, X_test, y_train, y_test = train_test_split(X,y, stratify=y)
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
ss = StandardScaler()
X_test_ss = ss.fit_transform(X_test)
X_train_ss = ss.fit_transform(X_train)

In [None]:
# Create a function called make_model for GridSearch

def make_model(optimizer="adam", hidden_size=5,drop_out=0.0):
    model = Sequential([
      Dense(hidden_size, input_shape=(4,)),
      Activation('relu'),
      Dense(8),
      Dropout(drop_out),
      Activation('relu'),
      Dense(3),
      Dropout(drop_out),
      Activation("softmax")
    ])
    model.compile(optimizer=optimizer,loss="categorical_crossentropy",
                  metrics=['accuracy'])
    return model
clf = KerasClassifier(make_model)


In [None]:
# Fit the classifier to the data

clf.fit(X_train_ss, y_train, epochs=10)

In [None]:
# GridSearch for the best tuning parameters

param_grid = {'epochs': np.arange(50,80,10),  
              'hidden_size': [64,128,256],
              'drop_out' : [0.1, 0.25, 0.5]
             }


cv = StratifiedShuffleSplit(n_splits=2, test_size=0.5, random_state=42)
grid = GridSearchCV(clf, param_grid=param_grid, cv=cv)

In [None]:
#Fit the model to the data

grid.fit(X_train_ss, y_train)

In [None]:
# Create a dataframe to show model performances depended on tuning parameters

res = pd.DataFrame(grid.cv_results_)
res.pivot_table(index=["param_epochs", "param_hidden_size","param_drop_out" ],
                values=['mean_train_score', "mean_test_score"])



In [None]:
# Extract the best tuning parameter combination 

grid.best_params_

# Store the best test scoret

score = grid.score(X_test_ss, y_test)

# Print the best test score 

score

# Task2

In [None]:
# Import necessary libraries

from sklearn import datasets
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from keras.wrappers.scikit_learn import KerasClassifier, KerasRegressor
from sklearn.model_selection import GridSearchCV, StratifiedShuffleSplit
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
from keras.datasets import fashion_mnist
from keras.utils.np_utils import to_categorical
from keras.wrappers.scikit_learn import KerasClassifier, KerasRegressor
from sklearn.model_selection import GridSearchCV

In [None]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

In [None]:
# Load the dataset
# 60000 samples, 28 x 28 pixel

((X_train, y_train), (X_test, y_test)) = fashion_mnist.load_data()

In [None]:
# Set a random seed for subsampling

idx = np.random.randint(1,60000,size=10000)

In [None]:
# Subsample 10,000 samples out of 60,000 samples

X_sub_train = X_train[idx]
y_sub_train = y_train[idx]

In [None]:
# Check if a corresponding matrix is aligned correctly by visualization 

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

plt.imshow(X_sub_train[0], cmap=plt.get_cmap('gray'))

In [None]:
# Reshape matrices from 3D into 2D

X_sub_train = X_sub_train.reshape(X_sub_train.shape[0], (X_sub_train.shape[1]*X_sub_train.shape[2]))
X_sub_test = X_test.reshape(X_test.shape[0],(X_test.shape[1]*X_test.shape[2]))

In [None]:
# Change numerics into a matrix form for the target feature

n_classes = len(set(y_sub_train))
y_sub_train = to_categorical(y_sub_train, num_classes = n_classes)
y_sub_test = to_categorical(y_test, num_classes = n_classes)

In [None]:
# Split the dataset into the training set and test set

X_sub_train, X_sub_val, y_sub_train, y_sub_val = train_test_split(X_sub_train, y_sub_train, stratify = y_sub_train, test_size = 0.3, random_state=42)


### Vanila Model

In [None]:
# Build a 2-layer dense neural network

model = Sequential([
    Dense(32, input_shape = (784,)),
    Activation("relu"),
    Dense(16),
    Activation("relu"),
    Dense(12),
    Activation("relu"),
    Dense(10),
    Activation("softmax")
])

In [None]:
# Configure the model for training

model.compile(optimizer = "adam", loss="categorical_crossentropy", metrics=['accuracy'])

In [None]:
# Fit the model to the training data

model.fit(X_sub_train, y_sub_train, validation_split=0.3, batch_size = 32, epochs = 75, verbose = 0)

In [None]:
# Compute and print the test loss and accuracy

score = model.evaluate(X_sub_test, y_sub_test, verbose=0)
model1_loss = score[0]
model1_acc = score[1]
print("Test loss: {:.3f}".format(score[0]))
print("Test Accuracy: {:.3f}".format(score[1]))

In [None]:
# Visualize a learning curve


epochs = 75

def learning_curve(model, X_train, y_train, epochs):
  
  
  model.compile(optimizer = "adam", loss="categorical_crossentropy", metrics=['accuracy'])
  model_hist = model.fit(X_train, y_train, validation_split=0.3, batch_size = 32, epochs = epochs, verbose = 0)
  
  plt.plot(model_hist.history['acc'])
  plt.plot(model_hist.history['val_acc'])
  plt.title("Accuracy vs Epoch")
  plt.ylabel("Accuracy")
  plt.xlabel("Number of Epoch")
  plt.xticks(np.arange(0, epochs+1, 10))
  plt.legend(['train', 'test'], loc='upper right')
  plt.show()
  
  return plt

In [None]:
learning_curve(model,X_sub_train, y_sub_train, epochs)

### Vanilla model using drop-out

In [None]:
# Build the model using drop-out

from keras.layers import Dropout

dropout_model = Sequential ([
    Dense(100, input_shape = (784,), activation='relu'),
    Dropout(.25),
    Dense(50, activation='relu'),
    Dropout(.25),
    Dense(25, activation='relu'),
    Dropout(.25),
    Dense(10, activation='softmax'),
    
])

In [None]:
# Compile and fit the model to the data

dropout_model.compile(optimizer = "adam", loss="categorical_crossentropy", 
                      metrics=['accuracy'])

dropout_model.fit(X_sub_train, y_sub_train, validation_split=0.3, batch_size = 32, epochs = 75, verbose = 0)

In [None]:
# Compute and print the loss and accuracy

score = dropout_model.evaluate(X_sub_val, y_sub_val, verbose=0)
model2_loss = score[0]
model2_acc = score[1]
print("Test loss: {:.3f}".format(score[0]))
print("Test Accuracy: {:.3f}".format(score[1]))

In [None]:
# Draw a learning curve

epochs = 75
learning_curve(dropout_model, X_sub_train, y_sub_train, epochs)

### Model using batch normalization

In [None]:
# Data preprocessing

from keras.utils.np_utils import to_categorical

X_train_img = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2],1)
X_test_img = X_test.reshape(X_test.shape[0], X_test.shape[1], X_train.shape[2],1)


n_classes = len(set(y_train))
y_train_img = to_categorical(y_train, n_classes)
y_test_img = to_categorical(y_test, n_classes)


In [None]:
# Split the data into the training set and test set

X_train_bn, X_test_bn, y_train_bn, y_test_bn = train_test_split(X_train_img, y_train_img, stratify = y_train_img,
                                                               test_size = 0.3, random_state=11)

In [None]:
# Building the model with batch normaliation 

from keras.layers import BatchNormalization
from keras.layers import Conv2D, MaxPooling2D, Flatten

input_shape = (28,28,1)

cnv_bn = Sequential([
    Conv2D(8, kernel_size = (3,3),
          input_shape = input_shape, activation = 'relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(8, kernel_size = (3,3),
          input_shape = (784,), activation = 'relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(32, activation='relu'),
    Dense(10, activation = 'softmax')
    
])

In [None]:
# Complile and fit the model to the data

cnv_bn.compile(optimizer = "adam", loss="categorical_crossentropy", 
                      metrics=['accuracy'])

cnv_bn.fit(X_train_bn, y_train_bn, validation_split=0.3, batch_size = 32, epochs = 75, verbose = 0)

In [None]:
# Compute and print loss and accuracy 

score = cnv_bn.evaluate(X_test_bn, y_test_bn, verbose=0)
model3_loss = score[0]
model3_acc = score[1]
print("Test loss: {:.3f}".format(score[0]))
print("Test Accuracy: {:.3f}".format(score[1]))

In [None]:
# Draw a learning curve

epochs = 75
learning_curve(cnv_bn, X_train_bn, y_train_bn, epochs)

### Model using residual connections without drop-out

In [None]:
# Data Preprocessing

X_train_rcnn = X_train.reshape(X_train.shape[0], (X_train.shape[1]* X_train.shape[2]))
X_test_rcnn = X_test.reshape(X_test.shape[0], (X_test.shape[1]* X_test.shape[2]))


n_classes = len(set(y_train))
y_train_rcnn = to_categorical(y_train, n_classes)
y_test_rcnn = to_categorical(y_test, n_classes)

In [None]:
# Split the data into the training and test set

X_train_rcnn_fit, X_test_rcnn_fit, y_train_rcnn_fit, y_test_rcnn_fit = train_test_split(X_train_rcnn, y_train_rcnn, stratify = y_train_rcnn,
                                                                                       test_size = 0.3, random_state=17)

In [None]:
# Build the resnet model

from keras.layers import Input, Dense
from keras.models import Model

inputs = Input(shape=(784,))

x = Dense(64, activation='relu')(inputs)
x = BatchNormalization()
x = Dense(32, activation='relu')(x)
x = BatchNormalization()
x = Dense(16, activation='relu')(x)
x = BatchNormalization()
predictions = Dense(10, activation='softmax')(x)

rcnn_model = Model(inputs=inputs, outputs=predictions)
rcnn_model.compile(optimizer= 'adam',
              loss="categorical_crossentropy",
              metrics=['accuracy'])


In [None]:
rcnn_model.fit(X_sub_train, y_sub_train, batch_size = 32, epochs = 75, verbose = 0)

In [None]:
#Compute and print the loss and accuracy

score = rcnn_model.evaluate(X_sub_test, y_sub_test, verbose=0)
model4_loss = score[0]
model4_acc = score[1]
print("Test loss: {:.3f}".format(score[0]))
print("Test Accuracy: {:.3f}".format(score[1]))

In [None]:
# Draw a learning curve 

epochs = 75
learning_curve(rcnn_model, X_sub_train, y_sub_train, epochs)

### Loss and Score Table

In [None]:
# Preparation for creating a dataframe for all the result 

loss = {'loss':[model1_loss,model2_loss,model3_loss,model4_loss]}
acc = {'test accuracy':[model1_acc,model2_acc,model3_acc,model4_acc]}

loss_df = pd.DataFrame(data=loss,index = ['Vanila Model', 'Vanila Model Using Drop-out',
                                          'Batch Normalization', 'Residual Connections'])
acc_df = pd.DataFrame(data=acc, index = ['Vanila Model', 'Vanila Model Using Drop-out'
                                         , 'Batch Normalization', 'Residual Connections'])

df = df_concat = pd.concat([loss_df, acc_df], axis=1)

# Display the dataframe

df

# Task3

### 3.1

In [None]:
from google.colab import drive
drive.mount('/contaent/gdrive', force_remount=True)


In [None]:
import zipfile, os
from scipy import misc
import imageio
import matplotlib.pyplot as pl
import os
import fnmatch
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn.model_selection import train_test_split
import cv2
from sklearn.model_selection import GridSearchCV, StratifiedShuffleSplit
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from sklearn.preprocessing import StandardScaler
from keras.preprocessing.image import ImageDataGenerator

In [None]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
    raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

In [None]:
path =  "gdrive/My Drive/hw5/"

In [None]:
#getting all the paths of the images
configfiles = [os.path.join(dirpath, f)
    for dirpath, dirnames, files in os.walk(path+"new")
    for f in fnmatch.filter(files, '*.png')]

In [None]:
#sub sample size
sample_size = 5000


In [None]:
#show 10 random images
for i in range(10):
    n = int(np.random.rand() * sample_size * 2)
    f = cv2.imread(configfiles[n])
    print(configfiles[n])
    print(f.shape)
    plt.imshow(f)
    plt.show()

In [None]:
#sort the paths into class 0 and class 1
class0 = [i for i in configfiles if "class0" in i]
class1 = [i for i in configfiles if "class1" in i]
print("Number of sample in class 0 is {}".format(len(class0)))
print("Number of sample in class 1 is {}".format(len(class1)))

In [None]:
sample_size = 5000

from keras.preprocessing import image
class0_arr = np.array([cv2.imread(class0[0])])
count = 1
ss = 1
test = []
while count < sample_size:
    
    if count%100 == 0:
        print(count)
    #current =  np.array([imageio.imread(class0[count],as_gray=True,pilmode="RGB")])
    img = image.load_img(class0[count], target_size=(50,50,3))
    current = np.array([image.img_to_array(img)])
    #test.append(image.img_to_array(img))
    img.close()
    class0_arr = np.concatenate((class0_arr,current),axis=0)
    count += 1



class0_arr.shape

In [None]:
sample_size = 5000
class1_arr = np.array([cv2.imread(class1[0])])
count = 1
ss = 1
while ss < sample_size or count < sample_size:
    if count%100 == 0:
        print(count)
#     current =  np.array([imageio.imread(class1[count],as_gray=True,pilmode="RGB")])
    current = np.array([cv2.imread(class1[count])])

    if current.shape[1:] == (50,50,3):
        class1_arr = np.concatenate((class1_arr,current),axis=0)
        ss += 1
    count += 1

class1_arr.shape

In [None]:
#creating balanced dataset 20k class0 and 30k class1
y_0 = np.zeros(sample_size)
y_1 = y_0 + 1
y = np.append(y_0,y_1)

X = np.concatenate((class0_arr,class1_arr),axis=0)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y, stratify=y, test_size=0.2, random_state=42)

In [None]:
input_shape = (50,50,3)

from keras.utils.np_utils import to_categorical
y_train_cat = to_categorical(y_train)
y_test_cat = to_categorical(y_test)

In [None]:
from keras.layers import BatchNormalization
from keras.layers import Conv2D, MaxPooling2D, Flatten, SeparableConv2D


cnv_bn = Sequential([
    Conv2D(128, kernel_size = (3,3),
           input_shape = input_shape, activation = 'relu', padding="same"),
    Dense(64, input_shape=input_shape, activation="relu"),
    BatchNormalization(),
    Conv2D(128, kernel_size = (3,3),
          input_shape = input_shape, activation = 'relu',padding="same"),
    BatchNormalization(),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(2, activation = 'softmax')
    
])
from keras.optimizers import SGD, Adam, Adagrad
opt = SGD(lr=0.01)
#opt = Adam(lr=0.001)
#opt = Adagrad(lr=0.01, decay=0.01 / 10)
cnv_bn.compile(optimizer = opt, loss="categorical_crossentropy", 
                      metrics=['accuracy'])



In [None]:
cnv_bn.fit(X_train, y_train_cat, validation_split=0.2, batch_size = 30, epochs = 30, verbose = 1)

In [None]:
score = cnv_bn.evaluate(X_test, y_test_cat, verbose=0)
print("Test loss: {:.3f}".format(score[0]))
print("Test Accuracy: {:.3f}".format(score[1]))

### 3.2

In [None]:
#creating image data generator to distort image
generator = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=50,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True)

In [None]:
generator.fit(X_train)

cnv_bn.fit_generator(generator.flow(X_train, y_train_cat, batch_size=32),
          steps_per_epoch=X_train.shape[0]//32,
         epochs=30,
          verbose=1)

In [None]:
score = cnv_bn.evaluate(X_test, y_test_cat, verbose=0)
print("Test loss: {:.3f}".format(score[0]))
print("Test Accuracy: {:.3f}".format(score[1]))

### 3.3

In [None]:
cnv_bn = Sequential([
    Conv2D(128, kernel_size = (3,3),
           input_shape = input_shape, activation = 'relu', padding="same"),
    Dense(256, input_shape=input_shape, activation="relu"),
    Dense(256, input_shape=input_shape, activation="relu"),
    Dense(256, input_shape=input_shape, activation="relu"),
    BatchNormalization(),
    
    Conv2D(128, kernel_size = (3,3),
          input_shape = input_shape, activation = 'relu',padding="same"),
    Dense(128, input_shape=input_shape, activation="relu"),
    Dense(128, input_shape=input_shape, activation="relu"),
    Dense(128, input_shape=input_shape, activation="relu"),
    BatchNormalization(),
    
    Conv2D(128, kernel_size = (3,3),
          input_shape = input_shape, activation = 'relu',padding="same"),
    Dense(64, input_shape=input_shape, activation="relu"),
    Dense(64, input_shape=input_shape, activation="relu"),
    Dense(64, input_shape=input_shape, activation="relu"),
    BatchNormalization(),
    
    Conv2D(128, kernel_size = (3,3),
          input_shape = input_shape, activation = 'relu',padding="same"),
    Dense(64, input_shape=input_shape, activation="relu"),
    Dense(64, input_shape=input_shape, activation="relu"),
    Dense(64, input_shape=input_shape, activation="relu"),
    BatchNormalization(),
    
    Conv2D(128, kernel_size = (3,3),
          input_shape = input_shape, activation = 'relu',padding="same"),
    Dense(64, input_shape=input_shape, activation="relu"),
    Dense(64, input_shape=input_shape, activation="relu"),
    Dense(64, input_shape=input_shape, activation="relu"),
    BatchNormalization(),
    
    Flatten(),
    Dense(64, activation='relu'),
    Dense(2, activation = 'softmax')
    
])
from keras.optimizers import SGD, Adam, Adagrad
opt = SGD(lr=0.01)
#opt = Adam(lr=0.001)
#opt = Adagrad(lr=0.01, decay=0.01 / 10)
cnv_bn.compile(optimizer = opt, loss="categorical_crossentropy", 
                      metrics=['accuracy'])


In [None]:
cnv_bn.fit(X_train, y_train_cat, validation_split=0.2, batch_size = 30, epochs = 30, verbose = 1)

In [None]:
score = cnv_bn.evaluate(X_test, y_test_cat, verbose=0)
print("Test loss: {:.3f}".format(score[0]))
print("Test Accuracy: {:.3f}".format(score[1]))