# Logistic Regression

## Importing

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
dataset = pd.read_csv('pulsar.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Splitting

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

In [4]:
print(X_train)

[[ 1.20640625e+02  4.78429616e+01  2.57962577e-01 ...  3.51982345e+01
   4.81978426e+00  2.35283829e+01]
 [ 1.16554688e+02  4.87029915e+01  1.97625250e-01 ...  1.66106785e+01
   8.16618510e+00  8.48467094e+01]
 [ 6.67734375e+01  5.82062892e+01  1.83873406e+00 ...  8.06734938e+01
   6.49639086e-01 -9.18078374e-01]
 ...
 [ 1.33164062e+02  5.54754700e+01 -9.44232330e-02 ...  1.26535027e+01
   1.21759792e+01  1.81458434e+02]
 [ 9.81718750e+01  4.88055948e+01  3.11807442e-01 ...  1.62043130e+01
   1.02032095e+01  1.22776632e+02]
 [ 1.06046875e+02  4.20750556e+01  4.21143692e-01 ...  1.99249204e+01
   7.31348277e+00  5.87921276e+01]]


In [5]:
print(y_train)

[0 0 1 ... 0 0 0]


In [6]:
print(X_test)

[[1.02289062e+02 4.57590757e+01 2.31697754e-01 ... 1.43430197e+01
  9.50036830e+00 1.17057724e+02]
 [1.20093750e+02 5.17653114e+01 1.52884958e-01 ... 1.96742682e+01
  7.96200404e+00 7.36250634e+01]
 [1.08820312e+02 5.80159506e+01 3.96478925e-01 ... 1.94694241e+01
  9.30173415e+00 9.46556370e+01]
 ...
 [1.02867188e+02 4.77706714e+01 3.24098342e-01 ... 1.83772548e+01
  8.34428670e+00 8.12848948e+01]
 [1.44312500e+02 4.87919291e+01 3.69933000e-03 ... 3.23990870e+01
  4.51396826e+00 1.98693710e+01]
 [1.20273438e+02 4.44043415e+01 1.78299756e-01 ... 1.50265056e+01
  8.48738922e+00 9.57230890e+01]]


In [7]:
print(y_test)

[0 0 0 ... 0 0 0]


In [8]:
dataset.head(1)

Unnamed: 0,Mean of the integrated profile,Standard deviation of the integrated profile,Excess kurtosis of the integrated profile,Skewness of the integrated profile,Mean of the DM-SNR curve,Standard deviation of the DM-SNR curve,Excess kurtosis of the DM-SNR curve,Skewness of the DM-SNR curve,target_class
0,140.5625,55.683782,-0.234571,-0.699648,3.199833,19.110426,7.975532,74.242225,0


## Feature Scaling

In [9]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [10]:
print(X_train)

[[ 0.37710226  0.18643192 -0.21304291 ...  0.44549441 -0.76498842
  -0.76290015]
 [ 0.21958464  0.31196312 -0.26882602 ... -0.50223528 -0.02369848
  -0.18468066]
 [-1.69954017  1.69907806  1.24841321 ...  2.76415609 -1.68875303
  -0.99342536]
 ...
 [ 0.85989527  1.30048377 -0.53883092 ... -0.70400108  0.86454539
   0.72634848]
 [-0.48909406  0.32693924 -0.16326221 ... -0.52295477  0.42754025
   0.17299088]
 [-0.1855037  -0.6554599  -0.06217857 ... -0.33325097 -0.21258789
  -0.43037012]]


In [11]:
print(X_test)

[[-0.33037172 -0.11773505 -0.23732528 ... -0.61785713  0.27184788
   0.11906273]
 [ 0.35601959  0.75894371 -0.31018935 ... -0.34603105 -0.06892838
  -0.29049852]
 [-0.07858447  1.67129596 -0.08498166 ... -0.3564755   0.22784672
  -0.09218444]
 ...
 [-0.30808433  0.17588034 -0.15189902 ... -0.41216229  0.01575433
  -0.21826785]
 [ 1.28968044  0.32494457 -0.44811457 ...  0.30277338 -0.83273235
  -0.79740389]
 [ 0.36294675 -0.31547399 -0.28669284 ... -0.58300801  0.0474542
  -0.08211858]]


## Training

In [12]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=0, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

## Predicting

In [13]:
print(classifier.predict(sc.transform([[140.5625,55.683782,-0.234571,-0.699648,3.199833,19.110426,7.975532,74.242225]])))

[0]


In [14]:
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[0 0]
 [0 0]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]


In [15]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[3289   17]
 [  43  231]]


0.9832402234636871