## Importing Required Library

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.compose import ColumnTransformer

## Reading Data Files

In [2]:
df = pd.read_csv('iris.csv')
df.rename(columns={'sepal.length':'SepalLength','sepal.width':'SepalWidth','petal.length':'PetalLength','petal.width':'PetalWidth','variety':'Variety'}, inplace=True)
df['Variety'] = df['Variety'].map({'Setosa':1,'Versicolor':2, 'Virginica':3})
df.head()

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Variety
0,5.1,3.5,1.4,0.2,1
1,4.9,3.0,1.4,0.2,1
2,4.7,3.2,1.3,0.2,1
3,4.6,3.1,1.5,0.2,1
4,5.0,3.6,1.4,0.2,1


In [3]:
df['Variety'].value_counts()

1    50
2    50
3    50
Name: Variety, dtype: int64

In [4]:
X = df.drop(columns='Variety')
y = df['Variety']

## Sperating Variable (Input & Target) due to Data Leakage

In [5]:
from sklearn.model_selection import train_test_split 

X_train,X_test,y_train,y_test = train_test_split(X,y, test_size=0.3, random_state=None)

## Applying Preprocessing Technique

In [6]:
## Feature Scaling
from sklearn.preprocessing import MinMaxScaler
from sklearn.compose import ColumnTransformer

scale = ColumnTransformer(transformers=
                          [('scale', MinMaxScaler(), [0,1,2,3])])


In [7]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(penalty='elasticnet', solver='saga',max_iter=100,multi_class='multinomial',l1_ratio=0.7)

In [8]:
from sklearn.pipeline import Pipeline
pipe = Pipeline([('tnf1',scale),('tnf2',lr)])

In [9]:
pipe.fit(X_train,y_train)

In [10]:
y_pred = pipe.predict(X_test)

In [11]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           1       1.00      1.00      1.00        16
           2       1.00      0.86      0.92        14
           3       0.88      1.00      0.94        15

    accuracy                           0.96        45
   macro avg       0.96      0.95      0.95        45
weighted avg       0.96      0.96      0.96        45



In [12]:
from sklearn.model_selection import cross_val_score,KFold

cv = KFold(n_splits=5, shuffle=True,random_state=None)

cross_val_score(pipe, X, y, cv=10, scoring='accuracy').mean()

0.9533333333333334

## Hyperparameter Tunning

In [21]:
from sklearn.model_selection import GridSearchCV, KFold

In [22]:
cv = KFold(n_splits=5, shuffle=True,random_state=None)

In [23]:
param_lst = {
    'tnf2__penalty':['l1', 'l2', 'elasticnet'],
    'tnf2__solver':['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
    'tnf2__max_iter':[100,200,300,400],
    'tnf2__C':[0.1,0.3,0.4,0.5,0.6],
    'tnf2__multi_class':['multinomial'],
    'tnf2__l1_ratio':[0.1,0.3,0.4,0.5,0.7]}

In [24]:
hyp = GridSearchCV(estimator=pipe, param_grid=param_lst,cv=cv,n_jobs=-1,scoring='accuracy',)

In [25]:
hyp.fit(X_train,y_train)



































































4500 fits failed out of a total of 7500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
500 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.8/dist-packages/sklearn/pipeline.py", line 382, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "/usr/local/lib/python3.8/dist-packages/sklearn/linear_model/_logistic.py", line 1094, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/usr/local/lib/python3.8/dist-packages/sklearn/linear_model/_logistic.py", line 61, in _check_

In [26]:
hyp.best_score_

0.9523809523809523

In [27]:
hyp.best_params_

{'tnf2__C': 0.6,
 'tnf2__l1_ratio': 0.1,
 'tnf2__max_iter': 100,
 'tnf2__multi_class': 'multinomial',
 'tnf2__penalty': 'l1',
 'tnf2__solver': 'saga'}

In [287]:
import pickle
pathurl = open('pipe.pkl','wb')
pickle.dump(pipe, pathurl)
pathurl.close()

