Dropout regularization is a technique used in training neural networks to prevent overfitting. It's especially useful when you have a deep neural network with a large number of parameters.

## What is Dropout?

Dropout works by randomly "dropping out" (i.e., setting to zero) a fraction of the neurons in a layer during each training iteration. This means those neurons do not contribute to the forward pass or the backward pass (no update of their weights for that iteration).

In [29]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [30]:
import warnings
warnings.filterwarnings('ignore')


In [31]:
df=pd.read_csv('sonar_dataset.csv',header=None)

In [32]:
df.shape

(208, 61)

In [33]:
df.sample(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
116,0.0094,0.0333,0.0306,0.0376,0.1296,0.1795,0.1909,0.1692,0.187,0.1725,...,0.0153,0.0112,0.0241,0.0164,0.0055,0.0078,0.0055,0.0091,0.0067,M
38,0.0123,0.0022,0.0196,0.0206,0.018,0.0492,0.0033,0.0398,0.0791,0.0475,...,0.0125,0.0134,0.0026,0.0038,0.0018,0.0113,0.0058,0.0047,0.0071,R
58,0.0225,0.0019,0.0075,0.0097,0.0445,0.0906,0.0889,0.0655,0.1624,0.1452,...,0.0034,0.0129,0.01,0.0044,0.0057,0.003,0.0035,0.0021,0.0027,R
181,0.0423,0.0321,0.0709,0.0108,0.107,0.0973,0.0961,0.1323,0.2462,0.2696,...,0.0176,0.0035,0.0093,0.0121,0.0075,0.0056,0.0021,0.0043,0.0017,M
166,0.0411,0.0277,0.0604,0.0525,0.0489,0.0385,0.0611,0.1117,0.1237,0.23,...,0.0217,0.0038,0.0019,0.0065,0.0132,0.0108,0.005,0.0085,0.0044,M


In [34]:
df.isnull().sum()

0     0
1     0
2     0
3     0
4     0
     ..
56    0
57    0
58    0
59    0
60    0
Length: 61, dtype: int64

In [35]:
df.columns

Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
       36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
       54, 55, 56, 57, 58, 59, 60],
      dtype='int64')

In [36]:
df[60].value_counts()
# index 60 is our y, it is not skewed or it is balanced

60
M    111
R     97
Name: count, dtype: int64

In [37]:
X=df.drop(60,axis=1)
y=df[60]

In [38]:
y.head()

0    R
1    R
2    R
3    R
4    R
Name: 60, dtype: object

In [39]:
y=pd.get_dummies(y, drop_first=True)
y.sample(5)

Unnamed: 0,R
109,False
11,True
41,True
205,False
131,False


In [40]:
y.value_counts()

R    
False    111
True      97
Name: count, dtype: int64

In [41]:
X.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0232,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0125,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0033,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0241,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0156,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094


In [42]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.25,random_state=1)