In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Commented out IPython magic to ensure Python compatibility.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import print_function

import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
# %matplotlib inline

from keras.engine.base_layer import Layer
from keras.layers import Activation, Dense, ReLU
from keras import backend as K
from sklearn.model_selection import train_test_split
from keras.datasets import mnist
from keras.optimizers import SGD
from keras.utils import np_utils

import keras
from keras.models import Sequential
from keras.layers.core import Flatten
from keras.layers import Dropout
from keras.layers import Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras import models
from keras import layers
import numpy as np
import time
from tqdm import tqdm
import pandas as pd
import os
import gc
import math
from matplotlib import pyplot

home_dir = os.path.join('/content/drive/MyDrive/liparelu_classification/multiclass')
log_dir = os.path.join('/content/drive/MyDrive/liparelu_classification/logs')

datasets_to_use = os.listdir(home_dir)
datasets_to_use.sort()

In [None]:

for datasets in datasets_to_use:
	if not ('.csv' in datasets):
		continue

	if 'Indian Liver' in datasets:
		continue

	dset = datasets[0:-4]
	print(dset)
	K.clear_session()
	

	df = pd.read_csv(os.path.join(home_dir,dset+'.csv'), header=None)
	df.head()

	# Preprocessing training data
	_, c = df.shape
	y = np.asarray(df[c-1])
	x = np.asarray(df.drop([c-1], axis=1))

	x.shape, y.shape

	from sklearn.model_selection import train_test_split
	x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

	x_train.shape, x_test.shape, y_train.shape, y_test.shape

	from keras.utils import to_categorical

	# y_train_cat = to_categorical(y_train)
	# y_test_cat  = to_categorical(y_test)

	from keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler

	import tensorflow as tf

	class ApproxRelu(Layer):
		
		def __init__(self, k=0.5, n=1.15, **kwargs):
			super(ApproxRelu, self).__init__(**kwargs)
			self.supports_masking = True
			self.k = K.cast_to_floatx(k)
			self.n = K.cast_to_floatx(n)

		def call(self, inputs):
			orig = inputs
			inputs = tf.where(orig <= 0.0, tf.zeros_like(inputs), inputs)
			inputs = tf.where(tf.greater(orig, 0.0), self.k*tf.pow(inputs, self.n), inputs)
			return  inputs

		def get_config(self):
			config = {'k': float(self.k), 'n': float(self.n)}
			base_config = super(ApproxRelu, self).get_config()
			return dict(list(base_config.items()) + list(config.items()))

		def compute_output_shape(self, input_shape):
			return input_shape

	class AbstractModel:
		"""
		The base class for all Model classes
		"""

		def __init__(self, n_classes = 2, activation='arelu', classification=True, optimizer='sgd',
					 bs = 8, epochs= 100, loss = 'categorical_crossentropy', adaptive = True,
					 convergence = True, num_hiddden_layers=1, num_hidden_neurons=10, x_train=None, x_test=None, y_train=None, y_test=None):


			if n_classes < 2:
				raise ValueError('n_classes must be at least 2.')
			if optimizer == 'sgd':
				self.optimizer = SGD(lr=1e-1, decay=1e-4)
			else:
				raise NotImplementedError('Only SGD optimizer is implemented!')
			if bs < 1 or not isinstance(bs, int):
				raise ValueError('Improper batch size')
			if epochs < 1:
				raise ValueError('Invalid number of epochs')
			if not (loss in ["categorical_crossentropy", "mae", "quadratic"]):
				raise ValueError('Invalid loss function specified')

			self.activation = activation
			self.classification = classification
			self.bs = bs
		
			self.n_classes = n_classes
			self.epochs = epochs
		 
			self.adaptive = adaptive
			self.convergence = convergence
			self.num_hiddden_layers = num_hiddden_layers
			self.num_hidden_neurons = num_hidden_neurons

			self.lr_history = []
			self.model_history = None
			self.K_z = []

			self.model = None
			self.callbacks = None

			if self.convergence or self.adaptive:
				self.callbacks = []

			if self.convergence:
				self.epochs = 20000000 #very large value


			self.loss = loss

			if loss=="categorical_crossentropy" and self.n_classes==2:
				self.loss = "binary_crossentropy"

			self.metrics = None
			if loss=="categorical_crossentropy":
				self.metrics = ["accuracy"]
			else:
				self.metrics = [loss]

			self.x_train = x_train
			self.inp_shape = x_train.shape[1]
			self.x_test = x_test
			self.y_train = y_train
			self.y_test = y_test
			
			self.K_a = None

			self._scale_data()

		def _scale_data(self):
			print("Scaling...")
			from sklearn.preprocessing import MinMaxScaler
			scaler_train = MinMaxScaler()
			self.x_train = scaler_train.fit_transform(self.x_train)
			scaler_test = MinMaxScaler()
			self.x_test = scaler_test.fit_transform(self.x_test)


		def _get_model(self):
			"""
			Get a model instance.
			:return: Keras Model instance
			"""
			network = models.Sequential()
			network.add(layers.Dense(self.num_hidden_neurons, input_shape=(self.inp_shape, )))
			network.add(ApproxRelu())
			for i in range(self.num_hiddden_layers-1):
				network.add(layers.Dense(self.num_hidden_neurons))
				network.add(ApproxRelu())
			if self.loss=='categorical_crossentropy':
				network.add(layers.Dense(self.n_classes, activation='softmax'))
			elif self.loss=='binary_crossentropy':
				network.add(Dense(1, activation='sigmoid'))
			else:
				# for mae and mse we assume Arelu in the output neuron
				network.add(Dense(1, activation='linear'))
				# network.add(layers.Dense(1))
				# network.add(ApproxRelu())
			return network

		def _lr_schedule(self, epoch: int):
			"""
			Get the learning rate for a given epoch. Note that this uses the LipschitzLR policy, so the epoch
			number doesn't actually matter.
			:param epoch: int. Epoch number
			:return: learning rate
			"""

			# if self.task == 'regression':
			#     # TODO: Implement this with LipschitzLR
			#     return 0.1

			
			if self.x_train is None:
				raise ValueError('x_train is None')

			if self.loss == 'categorical_crossentropy' or self.loss == 'binary_crossentropy':

				penultimate_activ_func = K.function([self.model.layers[0].input], [self.model.layers[-2].output])

				K_max=-1.0
				for i in range((len(self.x_train) - 1) // self.bs + 1):
					start_i = i * self.bs
					end_i = start_i + self.bs
					xb = self.x_train[start_i:end_i]

					activ = np.linalg.norm(penultimate_activ_func([xb]))
					Kz = activ
					
					L=Kz
					if(L>K_max):
						K_max=L

				K_ = (((self.n_classes - 1) * K_max) / (self.n_classes * self.bs))
				
				if K_ != 0:
					
					lr = float(1 / K_)
					lr = lr * 0.1
					
				else:
					lr = 0.1
					
				if lr > 20.:
					lr = 0.1
				
				self.lr_history.append(lr)
				self.K_z.append(K_max)
				
				return lr
				

			elif self.loss == 'mae':
				
				penultimate_activ_func = K.function([self.model.layers[0].input], [self.model.layers[-2].output])
				#final_logit_func = K.function([self.model.layers[0].input], [self.model.layers[-2].output])

				K_max=-1.0
				for i in range((len(self.x_train) - 1) // self.bs + 1):
					start_i = i * self.bs
					end_i = start_i + self.bs
					xb = self.x_train[start_i:end_i]

					activ = np.linalg.norm(penultimate_activ_func([xb]))
					Kz = activ
					
					L=Kz
					if(L>K_max):
						K_max=L
						
				# Zj = 0.
				# for i in range((len(self.x_train) - 1) // self.bs + 1):
				# 	start_i = i * self.bs
				# 	end_i = start_i + self.bs
				# 	xb = self.x_train[start_i:end_i]

				# 	activ = np.max(final_logit_func([xb]))
				# 	if activ > Zj:
				# 		Zj = activ
						
				K_ = K_max/self.bs #((K_max * (Zj**0.1)) / (2*self.bs))
				
				if K_ != 0:
					
					lr = float(1 / K_)
					lr = lr * 0.01
					
				else:
					lr = 0.1
					
				if lr > 20.:
					lr = 0.1
				
				
				
				self.lr_history.append(lr)
				self.K_z.append(K_max)
				
				
				return lr

			
			elif self.loss == 'mse':
				return 0.1
				
	#             penultimate_activ_func = K.function([self.model.layers[0].input], [self.model.layers[-2].output])
	#             penultimate_activ_func = K.function([self.model.layers[0].input], [self.model.layers[-2].output])

	#             Kz = 0.
	#             for i in range((len(self.x_train) - 1) // self.bs + 1):
	#                 start_i = i * self.bs
	#                 end_i = start_i + self.bs
	#                 xb = self.x_train[start_i:end_i]

	#                 activ = np.linalg.norm(penultimate_activ_func([xb]))
	#                 if activ > Kz:
	#                     Kz = activ
						
	#             maxwt = np.max(model.layers[-2].get_weights()[0])
						
	#             K_ = (Kz * (maxwt**1.1)) / (2*self.bs)
				



			return 0.1
			
			

		def fit(self):
			"""
			Fit to data
			:return: None
			"""

			if self.x_train is None or self.x_test is None or \
				 self.y_train is None or self.y_test is None:
				raise ValueError('Data is None')

			self.model = self._get_model()

			if self.convergence:
				early_stopper = EarlyStopping(monitor='val_loss', patience=10, mode='min', min_delta=0.0001)
				self.callbacks.append(early_stopper)

			if self.adaptive:
				lr_scheduler = LearningRateScheduler(self._lr_schedule)
				self.callbacks.append(lr_scheduler)

			self.model.compile(self.optimizer, loss=self.loss, metrics=self.metrics)
			self.model_history = self.model.fit(self.x_train, self.y_train, epochs=self.epochs,
							 batch_size=self.bs, callbacks=self.callbacks, validation_data=(self.x_test, self.y_test), verbose=0)

		def predict(self, x: np.ndarray):
			"""
			Predict on new data
			:param x: array-like
			:return: predictions:  array-like
			"""
			return self.model.predict(x)
	 
		def get_acc(self):
			y_pred_test = self.model.predict(self.x_test)
			y_pred_train = self.model.predict(self.x_train)

			y_final_train = []
			for pred in y_pred_train:
				val = math.floor(pred)
				val = max(0, val)
				val = min(val, np.max(self.y_train))
				y_final_train.append(val)
			

			y_final_test = []
			for pred in y_pred_test:
				val = math.floor(pred)
				val = max(0, val)
				val = min(val, np.max(self.y_test))
				y_final_test.append(val)

			return accuracy_score(self.y_test, y_final_test), accuracy_score(self.y_train, y_final_train)

		def score(self):
			"""
			Returns model performance on test set
			:return:
			"""

			if self.x_test is None or self.y_test is None:
				raise ValueError('Test data is None')

			train_loss, _ = self.model.evaluate(self.x_train, self.y_train)
			test_loss, _ = self.model.evaluate(self.x_test, self.y_test)
			
			test_metric, train_metric = self.get_acc()
			
			return train_loss, train_metric, test_loss, test_metric

		def plot_lr(self):
			"""
			Plots learning rate history
			:return: None
			"""
			plt.style.use('ggplot')
			plt.plot(range(self.epochs), self.lr_history)

		def get_history(self):
			"""
			returns model history
			:return:
			"""

			return self.model_history.history["loss"]

	arelu_cc = None
	columns = ['Experiment','Train_Accuracy','Test_Accuracy','Train_loss','Test_loss']

	hidden_layers = [1]
	hidden_width = [4, 5, 8, 10,20,50]
	loss_metric = [['mae','mae']]

	history_cce = []
	history_mae = []
	stored_lr_hist = []


	for lm in loss_metric:
		arelu_scores = []
		for layer in hidden_layers:
			for width in hidden_width:
				for lr in ["lalr", "base"]:
					print('arelu',layer,width,lr)
					K.clear_session()
					if lm[0]=='mse' and lr=='lalr':
						continue
					if lm[0]=='categorical_crossentropy':
						classification = True
					# train_y, test_y = y_train_cat, y_test_cat
					else:
						classification = False
					# train_y, test_y = y_train, y_test

					if lr=='lalr':
						adaptive=True
					else:
						adaptive=False

					shape1, _ = x_train.shape
					if shape1<500:
						bs=2
					elif shape1 < 1000:
						bs=4
					elif shape1 < 5000:
						bs = 16
					else:
						bs = 32
					network = AbstractModel(bs=bs, loss=lm[0], adaptive=adaptive, num_hiddden_layers=layer, num_hidden_neurons=width, classification=classification, x_train=x_train, x_test=x_test, y_train=y_train, y_test=y_test)
					network.fit()
					train_loss, train_metric, test_loss, test_metric = network.score()
					history = network.get_history()
					exp_name = str(layer)+'_'+str(width)+'_'+lr
					arelu_scores.append([exp_name, train_metric, test_metric, train_loss, test_loss])
					
					if lm[0]=='categorical_crossentropy':
						history_cce.append([exp_name, history])
					elif lm[0]=='mse':
						history_mse.append([exp_name, history])
					else:
						history_mae.append([exp_name, history])
					if lr=='lalr':
						stored_lr_hist.append([exp_name, network.lr_history])

	if lm[0]=='categorical_crossentropy':
		arelu_cc = pd.DataFrame(arelu_scores[:],columns=columns)
	elif lm[0]=='mse':
		arelu_mse = pd.DataFrame(arelu_scores[:],columns=columns)
	else:
		arelu_mae = pd.DataFrame(arelu_scores[:],columns=columns)

	pyplot.title('LR')
	pyplot.xlabel('Epoch')
	pyplot.ylabel('LR')
	for i,lr in enumerate(stored_lr_hist):
		pyplot.plot(lr[1], label=lr[0])
	pyplot.legend()
	pyplot.savefig(os.path.join(log_dir,dset+'_mae_lr.png'))
	pyplot.show()

	epochs_taken = [len(i[1]) for i in history_mae]
	arelu_mae['epochs_conv'] = epochs_taken
	arelu_mae

	arelu_mae.to_csv(os.path.join(log_dir,dset+'_mae.csv'), index=False)

	pyplot.title('Learning Curves')
	pyplot.xlabel('Epoch')
	pyplot.ylabel('MAE Loss')
	for history in history_mae:
		pyplot.plot(history[1], label=history[0])
	pyplot.legend()
	pyplot.savefig(os.path.join(log_dir,dset+'_mae_history.png'))
	pyplot.show()