In [1]:
# Connecting google colab with google drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Importing required packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Reading the 'Sonar_data.csv' file as df
df = pd.read_csv('/content/drive/MyDrive/Datasets/sonar_dataset.csv', header = None)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,R
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,R


In [4]:
# Get the shape of the data frame objcet
df.shape

(208, 61)

In [5]:
# Check any of the columns contain any missing value or not
df.isna().sum()

Unnamed: 0,0
0,0
1,0
2,0
3,0
4,0
...,...
56,0
57,0
58,0
59,0


In [6]:
# Get the column names of the data frame object
df.columns

Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
       36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
       54, 55, 56, 57, 58, 59, 60],
      dtype='int64')

### Task
* Using the first 60 columns named as 0,1,2,...,59, we have to determine the output of the last column. So, column named 60 is out target variable. The column 60 contains R and M as values. Our target is to predict that. So, first we do some EDA on that particular column. M means metals and R means rough surface

In [7]:
# Check entrywise value count
df[60].value_counts()

Unnamed: 0_level_0,count
60,Unnamed: 1_level_1
M,111
R,97


In [8]:
# Separating the independent set of variable as X and dependent set of variable as y
X = df.drop(60, axis = 'columns')
y = df[60]

In [9]:
# Check the set of dependent variable y
y.unique() # Contains R and M as entries

array(['R', 'M'], dtype=object)

In [10]:
# Deep learning algos can't understand text. So, we need to convert them in nueric sense
# We replace the values of y as: Map R to 1 and map M to 0
y.replace({'R': 1, 'M': 0}, inplace = True)
y.unique()

array([1, 0])

In [11]:
# Train test split the model with 0.25 test sample and random state is 1
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 1)

# Check the length of each train and test sample
print('Shape of X train is:', X_train.shape)
print('Shape of X test is:', X_test.shape)
print('Shape of y train is:', y_train.shape)
print('Shape of y test is:', y_test.shape)

Shape of X train is: (156, 60)
Shape of X test is: (52, 60)
Shape of y train is: (156,)
Shape of y test is: (52,)


### Artificial Intelligence model building:

* The model name is model
* Use keras sequential
* The input layer has 60 neurons
* The first hidden layer also has 60 neurons with relu
* Second hidden layer has 30 neurons with relu
* Third hidden layer has 15 neurons with relu
* Output layer has 1 node with sigmoid
* Compile the model with adam optimizer, binary cross entropy loss and accuracy metric
* Fit the model with train and test data with 100 epochs and batch_size 8

In [12]:
# Define the model as model
model = keras.Sequential([
    keras.layers.Input(shape = (60,)),
    keras.layers.Dense(60, activation = 'relu'),
    keras.layers.Dense(30, activation = 'relu'),
    keras.layers.Dense(15, activation = 'relu'),
    keras.layers.Dense(1, activation = 'sigmoid'),
])

# Compile the model
model.compile(
    optimizer = 'adam',
    loss = 'binary_crossentropy',
    metrics = ['accuracy']
)

# Fit the training data in the model
model.fit(X_train, y_train, epochs = 100, batch_size = 8) # Mini-batch

Epoch 1/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.5009 - loss: 0.7068
Epoch 2/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6873 - loss: 0.6552
Epoch 3/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7159 - loss: 0.6174 
Epoch 4/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8170 - loss: 0.5776 
Epoch 5/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7401 - loss: 0.5408 
Epoch 6/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7693 - loss: 0.5386  
Epoch 7/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8185 - loss: 0.4612 
Epoch 8/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7552 - loss: 0.4988 
Epoch 9/100
[1m20/20[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7a112dda3fd0>

In [13]:
# Check the performance of the model on test data
model.evaluate(X_test, y_test)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7396 - loss: 1.0571  


[0.9493933916091919, 0.75]

In [14]:
# Get the first 10 predictions using the model as y_pred_10
y_pred_10 = model.predict(X_test)[:10].reshape(-1)
y_pred_10 = np.round(y_pred_10)
print('First ten predicted outcomes are:', y_pred_10)
print('Get the actual values as:', y_test[:10].values)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
First ten predicted outcomes are: [0. 1. 1. 0. 1. 1. 0. 1. 0. 1.]
Get the actual values as: [0 0 0 0 1 1 0 1 0 1]


### The model is good. But we may achieve high performance on the test data by introducing regularization.

* Use dropout regularization after the hidden layers with dropout probability of 0.5
* Name the model as modeld

In [15]:
# Define the model as model
modeld = keras.Sequential([
    keras.layers.Input(shape = (60,)),
    keras.layers.Dense(60, activation = 'relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(30, activation = 'relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(15, activation = 'relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(1, activation = 'sigmoid'),
])

# Compile the model
modeld.compile(
    optimizer = 'adam',
    loss = 'binary_crossentropy',
    metrics = ['accuracy']
)

# Fit the training data in the model
modeld.fit(X_train, y_train, epochs = 100, batch_size = 8) # Mini-batch

Epoch 1/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.5453 - loss: 0.7054
Epoch 2/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4908 - loss: 0.7217 
Epoch 3/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5207 - loss: 0.7107 
Epoch 4/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5111 - loss: 0.7152 
Epoch 5/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5121 - loss: 0.6985 
Epoch 6/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5396 - loss: 0.6896 
Epoch 7/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4411 - loss: 0.7020 
Epoch 8/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4158 - loss: 0.7410 
Epoch 9/100
[1m20/20[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7a112c33cbd0>

In [16]:
# Calculate test set accuracy
modeld.evaluate(X_test, y_test)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7396 - loss: 0.4858  


[0.4545029103755951, 0.75]

### Check classification report:
* Get the classification report for both the models
* For model and modeld

In [17]:
# Import classification matrix from sklearn
from sklearn.metrics import classification_report

# Get classification report for model named 'model'
y_pred = np.round(model.predict(X_test))
print('Classification Report for the Normal model')
print(classification_report(y_pred, y_test))

print('----------------------------------------------------------------------------')

# Get classification report for model named 'modeld'
y_predd = np.round(modeld.predict(X_test))
print('Classification Report for the Dropout model')
print(classification_report(y_predd, y_test))

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
Classification Report for the Normal model
              precision    recall  f1-score   support

         0.0       0.85      0.72      0.78        32
         1.0       0.64      0.80      0.71        20

    accuracy                           0.75        52
   macro avg       0.75      0.76      0.75        52
weighted avg       0.77      0.75      0.75        52

----------------------------------------------------------------------------
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
Classification Report for the Dropout model
              precision    recall  f1-score   support

         0.0       0.89      0.71      0.79        34
         1.0       0.60      0.83      0.70        18

    accuracy                           0.75        52
   macro avg       0.74      0.77      0.74        52
weighted avg       0.79      0.75      0.76        52

