# Building a system in Python that can predict whether an object is either Rock or Mine with SONAR Data. 

### Here, we would be using Logistic Regression Model for our prediction. 

#### Work Flow
     
     1. Collecting SONAR Data
     2. Data pre processing
     3. Splitting into Train and Test 
     4. Feeding into ML model - Logistic Regression Model here
     5. Receive a trained Logistic Regression model
     
          -- When we give any new data the model will predict whether the object is Rock(R) or Mine(M).

In [1]:
# Importing Libraries

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [6]:
# Data Collection and Data Processing

# Loading Dataset
sonar_data = pd.read_csv('SonarData.csv')
sonar_data.head()

Unnamed: 0,0.0200,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.0180,0.0084,0.0090,0.0032,R
0,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
1,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R
2,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,R
3,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,R
4,0.0286,0.0453,0.0277,0.0174,0.0384,0.099,0.1201,0.1833,0.2105,0.3039,...,0.0045,0.0014,0.0038,0.0013,0.0089,0.0057,0.0027,0.0051,0.0062,R


In [15]:
labels = [str(i) for i in range(0, 61)]
sonar_data.columns=labels
sonar_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.0140,0.0049,0.0052,0.0044,R
1,0.0262,0.0582,0.1099,0.1083,0.0974,0.2280,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.0180,0.0244,0.0316,0.0164,0.0095,0.0078,R
2,0.0100,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.0150,0.0085,0.0073,0.0050,0.0044,0.0040,0.0117,R
3,0.0762,0.0666,0.0481,0.0394,0.0590,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.0110,0.0015,0.0072,0.0048,0.0107,0.0094,R
4,0.0286,0.0453,0.0277,0.0174,0.0384,0.0990,0.1201,0.1833,0.2105,0.3039,...,0.0045,0.0014,0.0038,0.0013,0.0089,0.0057,0.0027,0.0051,0.0062,R
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
202,0.0187,0.0346,0.0168,0.0177,0.0393,0.1630,0.2028,0.1694,0.2328,0.2684,...,0.0116,0.0098,0.0199,0.0033,0.0101,0.0065,0.0115,0.0193,0.0157,M
203,0.0323,0.0101,0.0298,0.0564,0.0760,0.0958,0.0990,0.1018,0.1030,0.2154,...,0.0061,0.0093,0.0135,0.0063,0.0063,0.0034,0.0032,0.0062,0.0067,M
204,0.0522,0.0437,0.0180,0.0292,0.0351,0.1171,0.1257,0.1178,0.1258,0.2529,...,0.0160,0.0029,0.0051,0.0062,0.0089,0.0140,0.0138,0.0077,0.0031,M
205,0.0303,0.0353,0.0490,0.0608,0.0167,0.1354,0.1465,0.1123,0.1945,0.2354,...,0.0086,0.0046,0.0126,0.0036,0.0035,0.0034,0.0079,0.0036,0.0048,M


In [16]:
sonar_data.shape

(207, 61)

In [17]:
sonar_data.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
count,207.0,207.0,207.0,207.0,207.0,207.0,207.0,207.0,207.0,207.0,...,207.0,207.0,207.0,207.0,207.0,207.0,207.0,207.0,207.0,207.0
mean,0.029208,0.038443,0.043837,0.054053,0.075105,0.104599,0.121591,0.134677,0.177361,0.208245,...,0.016034,0.013472,0.010729,0.010917,0.0093,0.008181,0.007771,0.007947,0.007936,0.006523
std,0.023038,0.03304,0.038521,0.046583,0.055669,0.059247,0.061897,0.08534,0.118311,0.134741,...,0.012027,0.009628,0.007071,0.00731,0.007103,0.005719,0.005756,0.006485,0.006196,0.005038
min,0.0015,0.0006,0.0015,0.0058,0.0067,0.0102,0.0033,0.0055,0.0075,0.0113,...,0.0,0.0008,0.0005,0.001,0.0006,0.0004,0.0003,0.0003,0.0001,0.0006
25%,0.0133,0.0164,0.0189,0.02445,0.0377,0.06695,0.0806,0.08035,0.09675,0.11115,...,0.00835,0.00735,0.00505,0.00535,0.0041,0.0044,0.0037,0.0036,0.00365,0.0031
50%,0.0228,0.0308,0.0342,0.0441,0.062,0.0921,0.1056,0.1119,0.1522,0.181,...,0.0138,0.0115,0.0096,0.0093,0.0075,0.0068,0.0059,0.0058,0.0063,0.0053
75%,0.0358,0.0481,0.0582,0.0657,0.10105,0.13415,0.15305,0.1698,0.2315,0.269,...,0.0207,0.01675,0.0149,0.01445,0.0121,0.01035,0.01035,0.0104,0.01035,0.00855
max,0.1371,0.2339,0.3059,0.4264,0.401,0.3823,0.3729,0.459,0.6828,0.7106,...,0.1004,0.0709,0.039,0.0352,0.0447,0.0394,0.0355,0.044,0.0364,0.0439


In [19]:
sonar_data.iloc[:,60].value_counts()

M    111
R     96
Name: 60, dtype: int64

In [20]:
# Here 'M' is Mine and 'R' is Rock
sonar_data.groupby(sonar_data.iloc[:,60]).mean()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
60,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
M,0.034989,0.045544,0.05072,0.064768,0.086715,0.111864,0.128359,0.149832,0.213492,0.251022,...,0.019352,0.016014,0.011643,0.012185,0.009923,0.008914,0.007825,0.00906,0.008695,0.00693
R,0.022524,0.030232,0.035879,0.041664,0.06168,0.096199,0.113767,0.117153,0.135584,0.158785,...,0.012198,0.010533,0.009673,0.009451,0.008581,0.007333,0.007708,0.006659,0.007058,0.006053


In [34]:
# Seperating Data and Labels
X = sonar_data.drop(columns=['60'], axis=1)
Y = sonar_data['60']

print(X)
print(Y)

          0       1       2       3       4       5       6       7       8  \
0    0.0453  0.0523  0.0843  0.0689  0.1183  0.2583  0.2156  0.3481  0.3337   
1    0.0262  0.0582  0.1099  0.1083  0.0974  0.2280  0.2431  0.3771  0.5598   
2    0.0100  0.0171  0.0623  0.0205  0.0205  0.0368  0.1098  0.1276  0.0598   
3    0.0762  0.0666  0.0481  0.0394  0.0590  0.0649  0.1209  0.2467  0.3564   
4    0.0286  0.0453  0.0277  0.0174  0.0384  0.0990  0.1201  0.1833  0.2105   
..      ...     ...     ...     ...     ...     ...     ...     ...     ...   
202  0.0187  0.0346  0.0168  0.0177  0.0393  0.1630  0.2028  0.1694  0.2328   
203  0.0323  0.0101  0.0298  0.0564  0.0760  0.0958  0.0990  0.1018  0.1030   
204  0.0522  0.0437  0.0180  0.0292  0.0351  0.1171  0.1257  0.1178  0.1258   
205  0.0303  0.0353  0.0490  0.0608  0.0167  0.1354  0.1465  0.1123  0.1945   
206  0.0260  0.0363  0.0136  0.0272  0.0214  0.0338  0.0655  0.1400  0.1843   

          9  ...      50      51      52      53   

In [47]:
# Splitting into Training and Testing data

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.1, stratify=Y, random_state=1 ) 
# test_size means splitting ratio 0.2 means 80-20 distribution
# stratify = Y means splitting the data in equal number means equal number of mine and rock in training and testing data
# random_state = 1 means splitting the data in a particylar way if state = 2 it is splitted inna different manner

print(X.shape, X_train.shape, X_test.shape)

(207, 60) (186, 60) (21, 60)


In [48]:
# Creating the Logistic Regression model
model = LogisticRegression()

In [49]:
# Training the model with training data
model.fit(X_train, Y_train)

LogisticRegression()

In [50]:
# Evaluating the model 

# Finding accuracy of training data

X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
training_data_accuracy

0.8440860215053764

In [51]:
# Finding accuracy of testing data

X_test_prediction = model.predict(X_test)
training_data_accuracy = accuracy_score(X_test_prediction, Y_test)
training_data_accuracy

0.7619047619047619

#### Making the Prediction System

In [59]:
#sample input for Mine '0.0100,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,0.0881,0.1992,0.0184,0.2261,0.1729,0.2131,0.0693,0.2281,0.4060,0.3973,0.2741,0.3690,0.5556,0.4846,0.3140,0.5334,0.5256,0.2520,0.2090,0.3559,0.6260,0.7340,0.6120,0.3497,0.3953,0.3012,0.5408,0.8814,0.9857,0.9167,0.6121,0.5006,0.3210,0.3202,0.4295,0.3654,0.2655,0.1576,0.0681,0.0294,0.0241,0.0121,0.0036,0.0150,0.0085,0.0073,0.0050,0.0044,0.0040,0.0117'
#Sample input for Rock '0.0762,0.0666,0.0481,0.0394,0.0590,0.0649,0.1209,0.2467,0.3564,0.4459,0.4152,0.3952,0.4256,0.4135,0.4528,0.5326,0.7306,0.6193,0.2032,0.4636,0.4148,0.4292,0.5730,0.5399,0.3161,0.2285,0.6995,1.0000,0.7262,0.4724,0.5103,0.5459,0.2881,0.0981,0.1951,0.4181,0.4604,0.3217,0.2828,0.2430,0.1979,0.2444,0.1847,0.0841,0.0692,0.0528,0.0357,0.0085,0.0230,0.0046,0.0156,0.0031,0.0054,0.0105,0.0110,0.0015,0.0072,0.0048,0.0107,0.0094'


input_data = () #enter you input here
#converting the input data into a numpy array
input_data_as_numpy_array = np.asarray(input_data)

#reshaping the numpy array as we are predicting for one instance
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

prediction = model.predict(input_data_reshaped)
print(prediction)

if (prediction[0]=='R'):
    print("Object is a Rock")
else:
    print("Object is a Mine")


['M']
Object is a Mine


### Feature Importance Visualization:
##### Create visualizations to showcase the importance of different features in your model's predictions.

In [None]:
# Higher the importance score/weight for a feature, the more impact that feature has on the model's predictions. 
# A high importance score/weight suggests that shuffling the values of that feature significantly affects the model's accuracy, indicating that the feature contains valuable information for the model.

In [80]:
import eli5
from eli5.sklearn import PermutationImportance

# Train your chosen model
model.fit(X_train, y_train)

feature_names = [str(i) for i in range(1, 61)]
# Calculate feature importances
perm_importance = PermutationImportance(model, random_state=42).fit(X_test, y_test)

# Visualize feature importances
eli5.show_weights(perm_importance, feature_names=feature_names)

Weight,Feature
0.0429  ± 0.0555,35
0.0333  ± 0.0571,12
0.0333  ± 0.0381,36
0.0333  ± 0.0381,25
0.0286  ± 0.0356,17
0.0286  ± 0.0467,45
0.0238  ± 0.0426,28
0.0238  ± 0.0673,16
0.0238  ± 0.0000,48
0.0190  ± 0.0632,15
