## Importing dependencies

In [311]:
import csv
import pandas as pd 
import numpy as np 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

## Data collecting and processing

In [312]:
sonar_data = 'C:/Users/crist/Desktop/Python_Stuff/sonar_data.csv'
sonar = pd.read_csv(sonar_data, header = None)
test_data = 'C:/Users/crist/Desktop/My Gits/Sonar-Rocks-vs-Mines-predictions-with-Python/test.csv'
test = pd.read_csv(test_data)
test.shape

(208, 60)

In [313]:
#Counting only the last column with values M(mines) and R(rocks)
sonar[60].value_counts()

M    111
R     97
Name: 60, dtype: int64

In [314]:
sonar.groupby(60).mean()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
60,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
M,0.034989,0.045544,0.05072,0.064768,0.086715,0.111864,0.128359,0.149832,0.213492,0.251022,...,0.019352,0.016014,0.011643,0.012185,0.009923,0.008914,0.007825,0.00906,0.008695,0.00693
R,0.022498,0.030303,0.035951,0.041447,0.062028,0.096224,0.11418,0.117596,0.137392,0.159325,...,0.012311,0.010453,0.00964,0.009518,0.008567,0.00743,0.007814,0.006677,0.007078,0.006024


## Separating data and labels

In [315]:
X = sonar.drop(columns = 60,  axis = 1)
y = sonar[60]

In [316]:
print(X.head())
print(y.head())

       0       1       2       3       4       5       6       7       8   \
0  0.0200  0.0371  0.0428  0.0207  0.0954  0.0986  0.1539  0.1601  0.3109   
1  0.0453  0.0523  0.0843  0.0689  0.1183  0.2583  0.2156  0.3481  0.3337   
2  0.0262  0.0582  0.1099  0.1083  0.0974  0.2280  0.2431  0.3771  0.5598   
3  0.0100  0.0171  0.0623  0.0205  0.0205  0.0368  0.1098  0.1276  0.0598   
4  0.0762  0.0666  0.0481  0.0394  0.0590  0.0649  0.1209  0.2467  0.3564   

       9   ...      50      51      52      53      54      55      56  \
0  0.2111  ...  0.0232  0.0027  0.0065  0.0159  0.0072  0.0167  0.0180   
1  0.2872  ...  0.0125  0.0084  0.0089  0.0048  0.0094  0.0191  0.0140   
2  0.6194  ...  0.0033  0.0232  0.0166  0.0095  0.0180  0.0244  0.0316   
3  0.1264  ...  0.0241  0.0121  0.0036  0.0150  0.0085  0.0073  0.0050   
4  0.4459  ...  0.0156  0.0031  0.0054  0.0105  0.0110  0.0015  0.0072   

       57      58      59  
0  0.0084  0.0090  0.0032  
1  0.0049  0.0052  0.0044  
2  0.016

## Training and test the data

In [317]:
X_train, X_test, y_train, y_test = train_test_split(X, y , test_size= 0.1, stratify= y, random_state= 150)

In [318]:
print(X.shape, X_train.shape, X_test.shape)

(208, 60) (187, 60) (21, 60)


## Model training with Logistic Regression

In [319]:
model = LogisticRegression()

## Training the Logistic Regression model with training data

In [320]:
model.fit(X_train, y_train)

LogisticRegression()

## Model evaluation, accurary on training data

In [321]:
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, y_train)
print('Accuracy on training data: ', training_data_accuracy)

Accuracy on training data:  0.8342245989304813


## Model evaluation, accurary on training data

In [322]:
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, y_test)
print('Accuracy on test data: ', test_data_accuracy)

Accuracy on test data:  0.7619047619047619


## Making a predictive system for sonar data

In [323]:
# Taking a random row from csv file as an imput
'''input_data = (0.0206,0.0132,0.0533,0.0569,0.0647,0.1432,0.1344,0.2041,0.1571,0.1573,0.2327,0.1785,0.1507,0.1916,0.2061,0.2307,0.2360,0.1299,0.3812,0.5858,0.4497,0.4876,1.0000,0.8675,0.4718,0.5341,0.6197,0.7143,0.5605,0.3728,0.2481,0.1921,0.1386,0.3325,0.2883,0.3228,0.2607,0.2040,0.2396,0.1319,0.0683,0.0334,0.0716,0.0976,0.0787,0.0522,0.0500,0.0231,0.0221,0.0144,0.0307,0.0386,0.0147,0.0018,0.0100,0.0096,0.0077,0.0180,0.0109,0.0070)'''

# Generating 60 input random numbers
from random import uniform
random_values = []

for v in range(1):
    for i in range(60):
        num = uniform(0.0010, 0.5)
        random_values.append(num)

   
# Changind the input_data into a numpy array

#inp_data_array = np.array(input_data)

#input_data_reshaped = imp_data_array.reshape(1, -1)

input_val = np.array(random_values)

inp_reshape= input_val.reshape(1, -1)

prediction = model.predict(inp_reshape)

print(prediction)

if prediction[0] == 'R':
    print('The object is a rock.')
else:
    print('The object is a mine.')


['M']
The object is a mine.


In [324]:
model_full = LogisticRegression()
model_full.fit(test, y)

LogisticRegression()

In [326]:

predict_full = model.predict(test)
data_accuracy = accuracy_score(predict_full, y)
print('Accuracy on test data: ', test_data_accuracy)

Accuracy on test data:  0.7619047619047619
