# Logistic Regression in python

In [2]:
import pandas as pd 
import numpy as np 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split  
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

### Importing data for analysis
- We are using the **Sonar Dataset**
- This dataset contains the radar returns of rocks or simulated mines
- [Sonar Dataset Description](https://raw.githubusercontent.com/jbrownlee/Datasets/master/sonar.names)
- [Sonar Dataset Source](https://raw.githubusercontent.com/jbrownlee/Datasets/master/sonar.csv)



In [3]:
# Load dataset
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/sonar.csv"
dataset = pd.read_csv(url, header=None)

In [4]:
# Inspecting the dataset
dataset.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
count,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,...,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0
mean,0.029164,0.038437,0.043832,0.053892,0.075202,0.10457,0.121747,0.134799,0.178003,0.208259,...,0.016069,0.01342,0.010709,0.010941,0.00929,0.008222,0.00782,0.007949,0.007941,0.006507
std,0.022991,0.03296,0.038428,0.046528,0.055552,0.059105,0.061788,0.085152,0.118387,0.134416,...,0.012008,0.009634,0.00706,0.007301,0.007088,0.005736,0.005785,0.00647,0.006181,0.005031
min,0.0015,0.0006,0.0015,0.0058,0.0067,0.0102,0.0033,0.0055,0.0075,0.0113,...,0.0,0.0008,0.0005,0.001,0.0006,0.0004,0.0003,0.0003,0.0001,0.0006
25%,0.01335,0.01645,0.01895,0.024375,0.03805,0.067025,0.0809,0.080425,0.097025,0.111275,...,0.008425,0.007275,0.005075,0.005375,0.00415,0.0044,0.0037,0.0036,0.003675,0.0031
50%,0.0228,0.0308,0.0343,0.04405,0.0625,0.09215,0.10695,0.1121,0.15225,0.1824,...,0.0139,0.0114,0.00955,0.0093,0.0075,0.00685,0.00595,0.0058,0.0064,0.0053
75%,0.03555,0.04795,0.05795,0.0645,0.100275,0.134125,0.154,0.1696,0.233425,0.2687,...,0.020825,0.016725,0.0149,0.0145,0.0121,0.010575,0.010425,0.01035,0.010325,0.008525
max,0.1371,0.2339,0.3059,0.4264,0.401,0.3823,0.3729,0.459,0.6828,0.7106,...,0.1004,0.0709,0.039,0.0352,0.0447,0.0394,0.0355,0.044,0.0364,0.0439


In [5]:
X = dataset.drop([60], axis=1)
y = dataset[60]

### Standardization

In [6]:
ss = StandardScaler()
X_ss = ss.fit_transform(X)

### Train test split 

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X_ss, y, test_size=0.1)  

### Training Model 

In [8]:
logreg = LogisticRegression()
logreg.fit(X_train,y_train)


LogisticRegression()

### Obtaining Intercept and Coefficients

In [9]:
print('Logreg intercept:', logreg.intercept_)
print('Logreg coef(s):', logreg.coef_)

Logreg intercept: [-0.64050954]
Logreg coef(s): [[-6.40309956e-01 -4.69102201e-02  5.78572695e-01 -7.07098548e-01
   1.01656341e-01 -4.64802055e-02  1.40367929e-01  7.93641188e-01
  -6.60587373e-01  1.14636954e-03 -5.54149724e-01 -1.11575810e+00
   9.99872659e-02  1.47753470e-01  1.22145486e-01  7.04217082e-01
   4.24217268e-01 -3.58297273e-01  1.38830966e-01 -2.72990813e-01
  -8.29973363e-02 -5.97216095e-01 -5.63927671e-01 -6.00917049e-01
   1.21380792e-01  4.55786847e-01 -2.63532788e-01  1.07509476e-01
  -2.76440687e-01 -7.21815723e-01  1.54426385e+00 -4.42532895e-01
  -5.17058376e-01  3.16017060e-01  8.85072147e-02  7.05146491e-01
   6.81118417e-01 -2.65248920e-01 -4.14142736e-01  9.18714380e-01
   1.80588756e-01 -1.52765875e-01 -1.55891830e-01 -3.50989627e-01
  -7.45078863e-01 -6.14243083e-01 -2.33624618e-01 -6.95282486e-01
  -9.21919162e-01  1.26502821e+00 -2.29448690e-01 -6.20358345e-01
  -3.69946641e-01 -4.51822071e-01  4.03990868e-02  3.90026058e-02
   3.72917135e-01 -4.6850697

### Obtaining predictions and prediction probabilities 

In [10]:
logreg.predict(X_test)

array(['M', 'M', 'R', 'M', 'M', 'R', 'R', 'M', 'M', 'M', 'M', 'R', 'R',
       'R', 'R', 'M', 'M', 'M', 'M', 'R', 'M'], dtype=object)

In [11]:
logreg.predict_proba(X_test)

array([[9.99968866e-01, 3.11341829e-05],
       [9.99368406e-01, 6.31594340e-04],
       [3.23607830e-02, 9.67639217e-01],
       [9.72888174e-01, 2.71118262e-02],
       [8.32648607e-01, 1.67351393e-01],
       [7.15604609e-02, 9.28439539e-01],
       [1.20197589e-02, 9.87980241e-01],
       [8.56822103e-01, 1.43177897e-01],
       [5.21955704e-01, 4.78044296e-01],
       [6.59639706e-01, 3.40360294e-01],
       [6.86542830e-01, 3.13457170e-01],
       [1.98559277e-03, 9.98014407e-01],
       [4.85482978e-01, 5.14517022e-01],
       [4.55503841e-01, 5.44496159e-01],
       [3.78518035e-01, 6.21481965e-01],
       [9.99813164e-01, 1.86836486e-04],
       [9.91983692e-01, 8.01630812e-03],
       [5.31925551e-01, 4.68074449e-01],
       [6.44482903e-01, 3.55517097e-01],
       [2.16572045e-02, 9.78342795e-01],
       [7.49885849e-01, 2.50114151e-01]])

### Exercise 1
- Compare the model performance using logistic regression as well as knn.
- Which model is faster 
- Which model has a higher accuracy

Using KNN

In [12]:
classifier = KNeighborsClassifier()  
classifier.fit(X_train, y_train)

KNeighborsClassifier()

In [13]:
classifier.predict(X_test)

array(['M', 'M', 'R', 'M', 'M', 'R', 'R', 'M', 'R', 'M', 'R', 'R', 'M',
       'M', 'M', 'M', 'R', 'M', 'M', 'R', 'M'], dtype=object)

In [17]:
y_pred_knn =classifier.predict_proba(X_test)
y_pred_knn

array([[1. , 0. ],
       [1. , 0. ],
       [0. , 1. ],
       [1. , 0. ],
       [0.6, 0.4],
       [0.4, 0.6],
       [0. , 1. ],
       [1. , 0. ],
       [0.4, 0.6],
       [0.8, 0.2],
       [0.4, 0.6],
       [0. , 1. ],
       [1. , 0. ],
       [0.6, 0.4],
       [1. , 0. ],
       [1. , 0. ],
       [0.4, 0.6],
       [0.8, 0.2],
       [0.8, 0.2],
       [0. , 1. ],
       [1. , 0. ]])

Accuray

In [19]:
acc_knn = accuracy_score(y_test, y_pred_knn)
acc_knn


ValueError: Classification metrics can't handle a mix of binary and continuous-multioutput targets

LogisticRegression

In [22]:
y_pred_lg = logreg.predict_proba(X_test)

In [23]:
acc_lg= accuracy_score(y_test, y_pred_lg)
acc_lg

ValueError: Classification metrics can't handle a mix of binary and continuous-multioutput targets