# In this practice session, we will learn to code Logistic Regression Classifier. 
# We will perform the following steps to build a simple classifier using the popular Iris dataset.

 
 
  - **Data Preprocessing**

    - Importing the libraries.
    - Importing dataset (Dataset Link https://archive.ics.uci.edu/ml/datasets/iris).
    - Dealing with the categorical variable.
    - Classifying dependent and independent variables.
    - Splitting the data into a training set and test set.
    - Feature scaling.
 

  -  **Logistic Regression Classification**

    - Create a Logistic Regression classifier.
    - Feed the training data to the classifier.
    - Predicting the species for the test set.
    - Using the confusion matrix to find accuracy.

# Load the Dependencies

In [None]:
import ipywidgets as widgets
from IPython.display import display

style = {'description_width': 'initial'}

In [None]:
#1 Importing essential libraries
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
import seaborn as sns
iris = load_iris() 
data = iris.data 
target = iris.target 
names = iris.target_names

# Load the Dataset

In [None]:
#file_name = 'iris.data'

dataset = pd.DataFrame(data, columns=['sepal length', 'sepal width', 'petal length', "petal width"])
dataset['Species']=target
dataset.head()

In [None]:
print(f"Dataset has {dataset.shape[0]} rows and {dataset.shape[1]} columns.")

In [None]:
#Plotting the relation between salary and experience
wig_col = widgets.Dropdown(
                options=[col for col in dataset.columns.tolist() if col.startswith(('sepal', 'petal'))],
                description='Choose a Column to Plot vs. Attributes',
                disabled=False,
                layout=widgets.Layout(width='40%', height='40px'),
                style=style)

# Plot Variables

In [None]:
display(wig_col)

sns.catplot(x="Species", y=wig_col.value, kind="boxen", data=dataset, height=8.27, aspect=11.7/8.27);

In [None]:
g = sns.catplot(x="Species", y=wig_col.value, kind="violin", inner=None, data=dataset, height=8.27, aspect=11.7/8.27)
sns.swarmplot(x="Species", y=wig_col.value, color="k", size=3, data=dataset, ax=g.ax);

display(wig_col)

In [None]:
#3 classify dependent and independent variables
X = dataset.iloc[:,:-1].values  #independent variable YearsofExperience
y = dataset.iloc[:,-1].values  #dependent variable salary

In [None]:
print("\nIdependent Variable (Sepal and Petal Attributes):\n\n", X[:5])
print("\nDependent Variable (Species):\n\n", y[:5])

# Encode Classes

In [None]:
from sklearn.preprocessing import LabelEncoder
labelencoder = LabelEncoder()
dataset['Species'] = labelencoder.fit_transform(dataset['Species'])

In [None]:
dataset['Species'].unique()

# Create Train and Test Sets

In [None]:
#4 Creating training set and testing set
from sklearn.model_selection import train_test_split
test_size = widgets.FloatSlider(min=0.01, max=0.6, value=0.2, description="Test Size :", tooltips=['Usually 20-30%'])
display(test_size)

In [None]:
#Divide the dataset into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X ,y, test_size=test_size.value, random_state = 0) 

In [None]:
print("Training Set :\n----------------\n")
print("X = \n", X_train[:5])
print("y = \n", y_train[:5])

print("\n\nTest Set :\n----------------\n")
print("X = \n",X_test[:5])
print("y = \n", y_test[:5])

In [None]:
print(f"Shape of Training set is {X_train.shape}")
print(f"Shape of Testing set is {X_test.shape}")

# Normalise Features

As the Features are not in the range of 0-1, Let's normalize the features using Standard Scaler(Z-score) normalization and Label Encode the Class String Names.

In [None]:
#Feature scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test) 

print("\n-------------------------\nDataset after Scaling:\n-------------------------\n", )

print("\nX_train :\n", X_train[:5])
print("-------------------------")
print("\nX_test :\n", X_test[:5])

# Logistic Regression

In [None]:
# import Logistic Regression library
from sklearn.linear_model import LogisticRegression

# configure params for the model.
penalty_wig = widgets.Dropdown(options=["l2", "l1"], 
                             description="Penalty Type = ", style=style)

display(penalty_wig)

njobs_wig = widgets.Dropdown(options=[('One', 1), ('Two', 2), ('Three', 3), ('All Cores', -1)], 
                             description="Number of CPU Cores = ", style=style)

display(njobs_wig)

c_wig = widgets.Dropdown(options=[1.0, 10.0, 100.0, 1000.0, 10000.0], 
                             description="Penalty parameter C = ", style=style)

display(c_wig)

# Predict and Evaluate the Model 

In [None]:
classifier = LogisticRegression(C=c_wig.value, penalty=penalty_wig.value,
                                n_jobs=njobs_wig.value, random_state=0)

#Feed the training data to the classifier
classifier.fit(X_train,y_train)

#Predicting the species for test set
y_pred = classifier.predict(X_test)

print("\n---------------------------\n")
print("Predicted Values for Test Set :\n",y_pred)
print("\n---------------------------\n")
print("Actual Values for Test Set :\n",y_test)


In [None]:
#8 Claculating the Accuracy of the predictions
from sklearn import metrics
print("Prediction Accuracy = ", metrics.accuracy_score(y_test, y_pred))

#9 Comparing Actual and Predicted Salaries for he test set
print("\nActual vs Predicted Salaries \n------------------------------\n")
error_df = pd.DataFrame({"Actual" : y_test,
                         "Predicted" : y_pred})

error_df

# Actual vs. Predicted 

In [None]:
#Using confusion matrix to find the accuracy
from sklearn.metrics import confusion_matrix, classification_report
cm = confusion_matrix(y_test,y_pred)

accuracy = cm.diagonal().sum()/cm.sum()

print("\n---------------------------\n")
print("Accuracy of Predictions = ",accuracy)

print("\n---------------------------\n")
print(classification_report(y_test, y_pred))