In [62]:
#importing the required libraries 
import os
import numpy as np # importing for numeric operations
import pandas as pd # importing for data analysis

from sklearn.model_selection import train_test_split #for splitting the data into train and test

## for standardize and Encoding the data
from sklearn.preprocessing import StandardScaler

# for model Building 
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

# for evaluate the model performance
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, f1_score,classification_report

## Mounting the drive

In [63]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [64]:
import warnings
warnings.filterwarnings("ignore")

## Load the data

In [65]:
Bank_note =pd.read_csv("/content/drive/MyDrive/Turingminds/DevOps/Activity/BankNote_Authentication.csv")

## EDA

In [66]:
# checking the top 5 records
Bank_note.head()

Unnamed: 0,variance,skewness,curtosis,entropy,class
0,3.6216,8.6661,-2.8073,-0.44699,0
1,4.5459,8.1674,-2.4586,-1.4621,0
2,3.866,-2.6383,1.9242,0.10645,0
3,3.4566,9.5228,-4.0112,-3.5944,0
4,0.32924,-4.4552,4.5718,-0.9888,0


In [67]:
# checking the bottom 5 records
Bank_note.tail()

Unnamed: 0,variance,skewness,curtosis,entropy,class
1367,0.40614,1.3492,-1.4501,-0.55949,1
1368,-1.3887,-4.8773,6.4774,0.34179,1
1369,-3.7503,-13.4586,17.5932,-2.7771,1
1370,-3.5637,-8.3827,12.393,-1.2823,1
1371,-2.5419,-0.65804,2.6842,1.1952,1


In [68]:
# checking how many coloumns & rows are there in the data
Bank_note.shape

(1372, 5)

In [69]:
#checking the columns names
Bank_note.columns

Index(['variance', 'skewness', 'curtosis', 'entropy', 'class'], dtype='object')

In [70]:
# checking the data types of the each attributes
Bank_note.dtypes

variance    float64
skewness    float64
curtosis    float64
entropy     float64
class         int64
dtype: object

In [71]:
# checking the summary statistics
Bank_note.describe()

Unnamed: 0,variance,skewness,curtosis,entropy,class
count,1372.0,1372.0,1372.0,1372.0,1372.0
mean,0.433735,1.922353,1.397627,-1.191657,0.444606
std,2.842763,5.869047,4.31003,2.101013,0.497103
min,-7.0421,-13.7731,-5.2861,-8.5482,0.0
25%,-1.773,-1.7082,-1.574975,-2.41345,0.0
50%,0.49618,2.31965,0.61663,-0.58665,0.0
75%,2.821475,6.814625,3.17925,0.39481,1.0
max,6.8248,12.9516,17.9274,2.4495,1.0


In [72]:
# checking how many  unique data are present
Bank_note.nunique() 

variance    1338
skewness    1256
curtosis    1270
entropy     1156
class          2
dtype: int64

In [73]:
# checking the null values 
Bank_note.isnull().sum()

variance    0
skewness    0
curtosis    0
entropy     0
class       0
dtype: int64

In [74]:
Bank_note['class'].value_counts(normalize=True)*100

0    55.539359
1    44.460641
Name: class, dtype: float64

## split the data into train & test 

In [75]:
X=Bank_note.drop(['class'],axis=1)
y=Bank_note['class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=5271, stratify=y)

In [76]:
# Checking the shape of the train & test data
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(960, 4)
(412, 4)
(960,)
(412,)


In [77]:
# scaling the data
scaler =StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [78]:
scores = pd.DataFrame(columns=['Model', 'Train_Accuracy', 'Train_Recall', 'Train_Precision', 'Train_F1_Score', 
                               'Test_Accuracy', 'Test_Recall', 'Test_Precision', 'Test_F1_Score'])

def get_metrics(train_actual, train_predicted, test_actual, test_predicted, model_description, dataframe):

    train_accuracy  = accuracy_score(train_actual, train_predicted)
    train_recall    = recall_score(train_actual, train_predicted, average="weighted")
    train_precision = precision_score(train_actual, train_predicted, average="weighted")
    train_f1score   = f1_score(train_actual, train_predicted, average="weighted")
    
    test_accuracy   = accuracy_score(test_actual, test_predicted)
    test_recall     = recall_score(test_actual, test_predicted, average="weighted")
    test_precision  = precision_score(test_actual, test_predicted, average="weighted")
    test_f1score    = f1_score(test_actual, test_predicted, average="weighted")

    dataframe       = dataframe.append(pd.Series([model_description, 
                                                  train_accuracy, train_recall, train_precision, train_f1score,
                                                  test_accuracy, test_recall, test_precision, test_f1score],
                                                 index=scores.columns ), 
                                       ignore_index=True)

    return(dataframe)

## Build the Models

## LogisticRegression

In [79]:
model1 = LogisticRegression()
model1.fit(X_train,y_train)

In [80]:
train_pred = model1.predict(X_train)
test_pred = model1.predict(X_test)

In [81]:
scores = get_metrics(y_train, train_pred, y_test, test_pred, "LogisticRegression", scores)
scores

Unnamed: 0,Model,Train_Accuracy,Train_Recall,Train_Precision,Train_F1_Score,Test_Accuracy,Test_Recall,Test_Precision,Test_F1_Score
0,LogisticRegression,0.984375,0.984375,0.984671,0.984393,0.975728,0.975728,0.976986,0.97578


## RandomForest_classifier

In [82]:
model2 = RandomForestClassifier(n_estimators=30)
model2.fit(X_train,y_train)

In [83]:
train_pred = model2.predict(X_train)
test_pred = model2.predict(X_test)

In [84]:
scores = get_metrics(y_train, train_pred, y_test, test_pred, "RandomForest", scores)
scores

Unnamed: 0,Model,Train_Accuracy,Train_Recall,Train_Precision,Train_F1_Score,Test_Accuracy,Test_Recall,Test_Precision,Test_F1_Score
0,LogisticRegression,0.984375,0.984375,0.984671,0.984393,0.975728,0.975728,0.976986,0.97578
1,RandomForest,1.0,1.0,1.0,1.0,0.995146,0.995146,0.995146,0.995146


save as pickle file

In [85]:
import pickle
pickle_out = open("model1.pkl","wb")
pickle.dump(model1, pickle_out)
pickle_out.close()