# The famous Iris flower dataset introducing in 1936 is used for this analysis. In this project, we build a Multi-Class Classifier Model using Support Vector Machine and deploy the model using Streamlit.

## Acquisition: Read data

In [None]:
import pandas as pd

url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_data = pd.read_csv(url, header=None)
columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
iris_data.columns = columns
iris_data.head()

In [None]:
iris_data.shape

In [None]:
iris_data.info()

In [None]:
# Check the number of samples of each Iris flower species
iris_data.species.value_counts()

In [None]:
# Plot the frequency distribution of data
import matplotlib.pyplot as plt

iris_data.hist(edgecolor='red', linewidth=1.2)
fig = plt.gcf()
fig.set_size_inches(12, 8)
plt.show()

#### We see that the sepal_width feature has distributed

### Create Scatter plot to understand the relationship of features among species

In [None]:
def plot_iris(plot , colors, species, xlabel, ylabel):
    for i in range(3):
        df = iris_data[iris_data.species == species[i]]
        plot.scatter(df[xlabel], df[ylabel], c=colors[i], label=species[i])
    plot.xlabel(xlabel)
    plot.ylabel(ylabel)
    plot.legend()

colors = ['red', 'green', 'blue']
species = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
plt.figure(figsize=(14, 15))

plt.subplot(221)
plot_iris(plot=plt, colors=colors, species=species, xlabel='sepal_length', ylabel='sepal_width')

plt.subplot(222)
plot_iris(plot=plt, colors=colors, species=species, xlabel='petal_length', ylabel='petal_width')

plt.subplot(223)
plot_iris(plot=plt, colors=colors, species=species, xlabel='sepal_length', ylabel='petal_length')

plt.subplot(224)
plot_iris(plot=plt, colors=colors, species=species, xlabel='sepal_width', ylabel='petal_width')
    

#### The class Iris-setosa is linearly separable from the other 2

### Use the Correlation Heatmap to visualize the relationship among features in dataset

In [None]:
import seaborn as sns

corr = iris_data.corr()
fig, ax = plt.subplots(figsize=(13, 7))
plt.title('Correlation Heat Map', fontsize=18)
sns.heatmap(corr, annot=True, cmap='coolwarm')

#### The most correlated features are: petal_length and petal_width (0.96), sepal_length and petal_length (0.87)

## Preparing Data

In [None]:
iris_data.isna().sum()

In [52]:
# One Hot Encoding
from sklearn.preprocessing import LabelEncoder
import numpy as np

df = iris_data.drop(columns=['species'])

le = LabelEncoder()
y = le.fit_transform(iris_data['species'])

## Modeling

In [53]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

transform = StandardScaler()
X = transform.fit_transform(df)

In [54]:
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=.2, random_state=2)
print(Xtrain.shape, ytrain.shape)
print(Xtest.shape, ytest.shape)

(120, 4) (120,)
(30, 4) (30,)


#### Find the hyperparameter for Support Vector Machine (SVM)

In [55]:
from sklearn.model_selection import GridSearchCV

parameters = {'kernel':('linear', 'rbf','poly','rbf', 'sigmoid'),
              'C': np.logspace(-3, 3, 5),
              'gamma':np.logspace(-3, 3, 5),
              'probability': (True, False)}
svm = SVC()

In [56]:
svm_cv = GridSearchCV(svm, parameters, cv=10)
svm_cv.fit(Xtrain, ytrain)

GridSearchCV(cv=10, estimator=SVC(),
             param_grid={'C': array([1.00000000e-03, 3.16227766e-02, 1.00000000e+00, 3.16227766e+01,
       1.00000000e+03]),
                         'gamma': array([1.00000000e-03, 3.16227766e-02, 1.00000000e+00, 3.16227766e+01,
       1.00000000e+03]),
                         'kernel': ('linear', 'rbf', 'poly', 'rbf', 'sigmoid'),
                         'probability': (True, False)})

In [57]:
print("tuned hpyerparameters :(best parameters) ",svm_cv.best_params_)
print("accuracy :",svm_cv.best_score_)

tuned hpyerparameters :(best parameters)  {'C': 31.622776601683793, 'gamma': 0.001, 'kernel': 'rbf', 'probability': True}
accuracy : 0.9666666666666666


In [58]:
y_pred = svm_cv.predict(Xtest)

## Evaluation

In [59]:
from sklearn.metrics import accuracy_score

print(f'Accuracy: {accuracy_score(y_pred, ytest)*100:.2f}')

Accuracy: 96.67


In [60]:
# Check precision, recall, f1-score
from sklearn.metrics import classification_report

print(classification_report(y_pred, ytest))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       0.88      1.00      0.93         7
           2       1.00      0.89      0.94         9

    accuracy                           0.97        30
   macro avg       0.96      0.96      0.96        30
weighted avg       0.97      0.97      0.97        30



## Deployment

In [None]:
!pip install pickle-mixin

In [61]:
import pickle

#Store svm model, label encoder, standard scaler in a pickle file
# It will serialize the object and convert it into byte stream
filename = 'svm_model.pkl'
data = {'model': svm_cv, 'le': le, 'scaler': transform}
with open('svm_model.pkl', 'wb') as file:
    pickle.dump(data, file)

In [None]:
!pip install streamlit