<a href="https://colab.research.google.com/github/carlos-alves-one/-ML-Logistic-Regression/blob/main/logistic_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Logistic Regression

## Import the libraries

In [23]:
# Imports the NumPy library, which is a popular numerical computing library in Python
import numpy as np

# Imports the pyplot module of the matplotlib library in Python used for data visualization
import matplotlib.pyplot as plt

# Imports the Pandas library in Python using, which provides data structures and functions for data manipulation and analysis
import pandas as pd

## Import the dataset

In [24]:
# Use read_csv() method to read the contents of the CSV file and convert it into a DataFrame object
dataset = pd.read_csv('data.csv')

# Selects all rows and all columns except the last one from the dataset
X = dataset.iloc[:, -3:-1].values

# Select the last column from the dataset
y = dataset.iloc[:, -1].values

In [31]:
# Print the first 5 elements of the matrix X
print(X[:5, -2:])

[[ 4648  4719]
 [ 2657 19366]
 [ 2487 15500]
 [ 3100 12380]
 [ 1596  7946]]


In [35]:
# Print the first 5 elements of the matrix Y
print(y[:5,])

['Rejected' 'Rejected' 'Rejected' 'Rejected' 'Rejected']


## Splitting the dataset into the Training set and Test set

In [25]:
# Imports train_test_split function from model_selection module of the scikit-learn library
from sklearn.model_selection import train_test_split

# Splits the dataset into a training set and a testing set using train_test_split function from scikit-learn
# 80% train and 20% test
# The 'random_state' parameter is set to 0, which means that the split will be deterministic and the same split 
# will be obtained every time the code is run. 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Feature Scaling

In [26]:
# Imports the StandardScaler class from the scikit-learn library in Python, which is used for standardizing 
# (scaling) the features of a dataset by removing the mean and scaling to unit variance.
from sklearn.preprocessing import StandardScaler

# Creates an instance of the StandardScaler class from scikit-learn, which can be used to scale the features 
# of a dataset to have zero mean and unit variance.
sc = StandardScaler()

# Applies fit_transform method of the StandardScaler to the training set X_train, 
# which scales the features of the dataset to have zero mean and unit variance based on the training set statistics.
X_train = sc.fit_transform(X_train)

# Applies fit_transform method of the StandardScaler to the training set X_test
X_test = sc.transform(X_test)

In [27]:
# Print the first 5 elements of the matrix X_train
print(X_train[:5, -2:])

[[-0.72069539 -1.6156983 ]
 [-1.5513517  -0.46106989]
 [-1.14720215  0.29874223]
 [-0.20475979 -1.43747292]
 [ 1.08765888 -1.18269085]]


In [28]:
# Print the first 5 elements of the matrix X_test
print(X_test[:5, -2:])

[[ 1.63885007 -1.69696349]
 [-1.0233776   0.16254455]
 [ 0.51497037  0.00611778]
 [-1.13860322  1.37960417]
 [ 0.55022597 -1.52902705]]


## Training the Logistic Regression model on the Training set

In [29]:
# Imports the LogisticRegression class from the scikit-learn linear_model module, 
# which allows the user to build a logistic regression model for binary classification.
from sklearn.linear_model import LogisticRegression

# Creates an instance of the LogisticRegression class with a specified random state
classifier = LogisticRegression(random_state = 0)

# Fits the logistic regression model represented by the classifier object to the training data, 
# X_train and y_train, so that the model can learn to make predictions on new data
classifier.fit(X_train, y_train)

## Predicting a new result

In [37]:
# Applies the trained logistic regression classifier to predict the binary class label of a new sample 
# represented by the input features [4648, 4719] that has been standardized using the same scaling applied to the training data
print(classifier.predict(sc.transform([[ 4648,4719]])))

['Rejected']


In [39]:
# Applies the trained logistic regression classifier to predict the binary class labels of the test set X_test, 
# and stores the predicted labels in the y_pred variable.
y_pred = classifier.predict(X_test)

# Concatenates the predicted and actual binary class labels for the test set into a single two-dimensional NumPy array, 
# and then prints the resulting array to the console.
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Approved']
 ['Rejected' 'Approved']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Approved']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Approved']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Approved']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']
 ['Rejected' 'Rejected']


## Making the Confusion Matrix

In [40]:
# Imports the confusion_matrix function from the scikit-learn metrics module, 
# which can be used to compute the confusion matrix of a classifier's predictions.
from sklearn.metrics import confusion_matrix

# Applies the confusion_matrix function to compute the confusion matrix of a classifier's predictions 
# by comparing the predicted binary class labels (y_pred) to the actual binary class labels (y_test) for the test set.
cm = confusion_matrix(y_test, y_pred)

# Prints the confusion matrix
print(cm)

[[  0  28]
 [  0 172]]


In [42]:
# Imports the accuracy_score function from the scikit-learn metrics module, 
# which can be used to compute the accuracy of a classifier's predictions by comparing 
# the predicted binary class labels to the actual binary class labels for a set of data.
from sklearn.metrics import accuracy_score

# Applies the accuracy_score function to compute the accuracy of a classifier's predictions 
# by comparing the predicted binary class labels (y_pred) to the actual binary class labels (y_test) for the test set.
accuracy_score(y_test, y_pred)

0.86