In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn import metrics 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression

import tensorflow as tf
from tensorflow import keras
from keras import Sequential
from keras.layers import Dense


In [None]:
# Download the csv with name "SelfShiksha_ANN_MCQ40_ANN_Dataset1.csv"
df=pd.read_csv("SelfShiksha_ANN_MCQ47_ANN_Dataset1.csv")

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
# Last column is the class label, and the remaining columns are the input feature values
# You can also choose a few columns as input features and analyse the results

x=df.drop(['is_anomaly'],axis=1)
# x = df[['param1','param3','param6','param8','param11','param13']]
y=list(df['is_anomaly'])

# This scales the input values to a suitable range
x = MinMaxScaler().fit_transform(x)

# Split the dataset into a training and testing set
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25,random_state=4)
y_test = np.array(y_test)
y_train = np.array(y_train)

In [None]:
# LOGISTIC REGRESSION

LogReg = LogisticRegression()
LogReg.fit(x_train,y_train)
y_pred=LogReg.predict(x_test)
accuracy = metrics.accuracy_score(y_test, y_pred)
print(accuracy)

In [None]:
x_train.shape

In [None]:
y_train.shape

In [None]:
#ARTIFICIAL NEURAL NETWORK
# Here we are defining an ANN with 3 hidden layers, each of which has 32 nodes.
# The hidden layers using ReLU activation and the output layer uses Sigmoid.
# We are using Binary Cross Entropy as our cost/loss function, 
# and Stochastic Gradient Descent as our optimizer.

model = Sequential()
model.add(Dense(32, activation='relu',input_dim = x_train.shape[1]))
model.add(Dense(32, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1,activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='sgd',metrics=['accuracy'])

In [None]:
# This does the actual model training.
# epochs refers to the number of training iterations to be used.
# batch_size specifies the number of data points to be used in each step of SGD.

history = model.fit(x_train,y_train,epochs=10,shuffle=True,verbose=2,batch_size=32)

In [None]:
loss_accuracy = model.evaluate(x_test,y_test)
print("Accuracy = ",loss_accuracy[1])

In [None]:
#Plottling change accuracy with epochs
# This is a fairly trivial problem and so the algorithm reaches almost 100% accuracy in the first epoch itself!

plt.plot(history.history['accuracy'])
#plt.plot(history.history['loss'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['accuracy'], loc='upper left')
plt.show()

In [None]:
#Plotting loss values for each epoch
# Although the accuracy is almost 100% right from the first epoch, 
# we can see there is a slight decrease in the loss as we train the model for more epochs.

plt.plot(history.history['loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['loss'], loc='upper left')
plt.show()

## DATASET 2

### The previous Dataset 1 was fairly simple and we got almost 100% accuracy with both Logistic Regression and ANN.
### Now lets take a slightly more complicated problem and see if ANN can give us better accuracy.

In [None]:
# Source : https://www.kaggle.com/code/caghank/logistic-regression-vs-neural-network-vs-cnn/notebook

In [None]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from keras.models import Sequential # initialize neural network library
from keras.utils import to_categorical
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory
from sklearn.metrics import confusion_matrix
import itertools

from keras.layers import Dense,Dropout, Flatten, Conv2D, MaxPool2D
from keras.optimizers import RMSprop,Adam,Adamax
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau

In [None]:
# Download the csv file with name "SelfShiksha_ANN_MCQ40_ANN_Dataset2.csv"

df =  pd.read_csv("SelfShiksha_ANN_MCQ47_ANN_Dataset2.csv")

In [None]:
df.head()

In [None]:
# Image consists of 784 pixel which is 28 * 28 

img_size = 28 

In [None]:
X = df.drop(['label'],axis = 1) 
Y = df['label']

In [None]:
# Normalize the data
X = X / 255.0

In [None]:
#The first 25 data point in train data and their labels
plt.figure(figsize = (10,10))
for i in range (25) :
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.xlabel(Y[i])
    plt.imshow(X.iloc[i,:].values.reshape(img_size,img_size))

In [None]:
# Divide the data into training and testing set

from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.15, random_state=42)
Y_train =Y_train.values.reshape(-1,1)

In [None]:
print("X_train shape is : ",X_train.shape)
print("Y_train shape is : ", Y_train.shape)
print("X_test shape is " , X_test.shape)
print("Y_test shape is " , Y_test.shape)

In [None]:
# LOGISTIC REGRESSION

LogReg = LogisticRegression(solver='sag',random_state = 42,max_iter= 200, tol = 0.1,)
LogReg.fit(X_train,Y_train)
Y_pred=LogReg.predict(X_test)
accuracy = metrics.accuracy_score(Y_test, Y_pred)
print(accuracy)

print("Train accuracy: {} ".format(LogReg.score(X_train, Y_train)))
print("Test accuracy: {} ".format(LogReg.score(X_test, Y_test)))

In [None]:
#ARTIFICIAL NEURAL NETWORK

model = Sequential() # initialize neural network
model.add(Dense(units = 240, kernel_initializer = 'uniform', activation = 'relu', input_dim = X_train.shape[1]))
model.add(Dense(units = 240, kernel_initializer = 'uniform', activation = 'relu'))
model.add(Dense(units = 10, kernel_initializer = 'uniform', activation = 'softmax')) #softmax is used for classification

# adamax =Adamax(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)

model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [None]:
Y_train = to_categorical(Y_train) #we are using 1 hot encoding here
# model.fit(X_train,Y_train,epochs=10,shuffle=True,verbose=2)
history = model.fit(X_train , Y_train,epochs = 10,batch_size = 150)

In [None]:
# We can clearly see that for this dataset, ANN outperforms Logistic Regression by a significant margin!

test_loss, test_acc = model.evaluate(X_train, Y_train) #evaluating our model
print('Test accuracy:', test_acc)

In [None]:
# This plot shows how the model accuracy increases with increasing epochs

plt.plot(history.history['accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['accuracy'], loc='upper left')
plt.show()

In [None]:
# This plot shows how the model loss decreases with increasing epochs

plt.plot(history.history['loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['loss'], loc='upper left')
plt.show()