# Logistic Regression

## Importing the libraries

In [23]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [25]:
dataset = pd.read_csv('/content/data.csv')
X = dataset.iloc[:, 1:].values
y = dataset.iloc[:, 0].values
dataset.head()
dataset.shape

(6819, 96)

## Splitting the dataset into the Training set and Test set

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [11]:
print(X_train)

[[-0.58566954 -0.34412577 -0.51362042 ...  0.04866632  0.
  -0.26714477]
 [ 0.02453203 -0.00563253  0.12339955 ... -0.01506966  0.
   1.34607941]
 [ 0.05553157  0.14593161 -0.04848066 ... -0.00632642  0.
  -0.50236886]
 ...
 [ 0.68939069  0.79007919  0.70991862 ... -0.0111      0.
  -0.07956969]
 [ 0.00984803  0.38085602  0.00467816 ... -0.01063666  0.
   3.118217  ]
 [ 1.45540576  1.23129924  1.29289378 ... -0.00755453  0.
   0.03649519]]


In [12]:
print(y_train)

[0 0 0 ... 0 0 0]


In [13]:
print(X_test)

[[-0.39559338 -1.37223584 -0.6642371  ... -0.05120416  0.
   0.06901742]
 [-0.0505195  -0.00563253 -0.00949752 ... -0.01528091  0.
  -0.18542477]
 [ 0.21297663 -0.32054913  0.10922386 ...  0.01812839  0.
   0.57024106]
 ...
 [-0.05296683  0.01878614  0.1526369  ...  0.06730125  0.
  -0.5704947 ]
 [ 0.13384621  0.11309271  0.00999405 ...  0.0208476   0.
  -0.38241728]
 [ 0.59394472  0.76397648  0.77725314 ... -0.01528091  0.
   0.74781105]]


In [14]:
print(y_test)

[0 0 0 ... 0 0 0]


## Feature Scaling

In [15]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [16]:
print(X_train)

[[-0.58566954 -0.34412577 -0.51362042 ...  0.04866632  0.
  -0.26714477]
 [ 0.02453203 -0.00563253  0.12339955 ... -0.01506966  0.
   1.34607941]
 [ 0.05553157  0.14593161 -0.04848066 ... -0.00632642  0.
  -0.50236886]
 ...
 [ 0.68939069  0.79007919  0.70991862 ... -0.0111      0.
  -0.07956969]
 [ 0.00984803  0.38085602  0.00467816 ... -0.01063666  0.
   3.118217  ]
 [ 1.45540576  1.23129924  1.29289378 ... -0.00755453  0.
   0.03649519]]


In [17]:
print(X_test)

[[-0.39559338 -1.37223584 -0.6642371  ... -0.05120416  0.
   0.06901742]
 [-0.0505195  -0.00563253 -0.00949752 ... -0.01528091  0.
  -0.18542477]
 [ 0.21297663 -0.32054913  0.10922386 ...  0.01812839  0.
   0.57024106]
 ...
 [-0.05296683  0.01878614  0.1526369  ...  0.06730125  0.
  -0.5704947 ]
 [ 0.13384621  0.11309271  0.00999405 ...  0.0208476   0.
  -0.38241728]
 [ 0.59394472  0.76397648  0.77725314 ... -0.01528091  0.
   0.74781105]]


## Training the Logistic Regression model on the Training set

In [18]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


LogisticRegression(random_state=0)

## Predicting the Test set results

In [20]:
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[0 0]
 [0 0]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]


## Making the Confusion Matrix

In [21]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[1640   13]
 [  41   11]]


0.9683284457478006

## Visualising the Training set results

In [26]:
from matplotlib.colors import ListedColormap
X_set, y_set = sc.inverse_transform(X_train), y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 10, stop = X_set[:, 0].max() + 10, step = 0.25),
                     np.arange(start = X_set[:, 1].min() - 1000, stop = X_set[:, 1].max() + 1000, step = 0.25))
plt.contourf(X1, X2, classifier.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape),
             alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('Logistic Regression (Training set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()

ValueError: ignored

## Visualising the Test set results

In [27]:
from matplotlib.colors import ListedColormap
X_set, y_set = sc.inverse_transform(X_test), y_test
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 10, stop = X_set[:, 0].max() + 10, step = 0.25),
                     np.arange(start = X_set[:, 1].min() - 1000, stop = X_set[:, 1].max() + 1000, step = 0.25))
plt.contourf(X1, X2, classifier.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape),
             alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('Logistic Regression (Test set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()

ValueError: ignored

Training SVM model on the linear kernel

In [28]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'linear', random_state = 0)
classifier.fit(X_train, y_train)

SVC(kernel='linear', random_state=0)

In [29]:
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[0 0]
 [0 0]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]


In [30]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[1650    3]
 [  47    5]]


0.9706744868035191

Visualization the test result SVM

Training SVM model on the RBF kernel

In [31]:
from sklearn.svm import SVC
classifier_rbf = SVC(kernel = 'rbf', random_state = 0)
classifier_rbf.fit(X_train, y_train)

SVC(random_state=0)

In [32]:
y_pred_rbf = classifier_rbf.predict(X_test)
print(np.concatenate((y_pred_rbf.reshape(len(y_pred_rbf),1), y_test.reshape(len(y_test),1)),1))

[[0 0]
 [0 0]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]


In [34]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm_rbf = confusion_matrix(y_test, y_pred_rbf)
print(cm_rbf)
accuracy_score(y_test, y_pred_rbf)

[[1651    2]
 [  50    2]]


0.9695014662756598

Training SVM model on the sigmoid kernel

In [35]:
from sklearn.svm import SVC
classifier_sg = SVC(kernel = 'sigmoid', random_state = 0)
classifier_sg.fit(X_train, y_train)

SVC(kernel='sigmoid', random_state=0)

In [36]:
y_pred_sg = classifier_sg.predict(X_test)
print(np.concatenate((y_pred_sg.reshape(len(y_pred_sg),1), y_test.reshape(len(y_test),1)),1))

[[0 0]
 [0 0]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]


In [37]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm_sg = confusion_matrix(y_test, y_pred_sg)
print(cm_sg)
accuracy_score(y_test, y_pred_sg)

[[1633   20]
 [  40   12]]


0.9648093841642229