In [1]:
#import required packages
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [2]:
#import iris dataset from sklearn's library
from sklearn.datasets import load_iris
iris = load_iris()

In [3]:
#the feature names of the iris data
iris.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [4]:
#the names of the flowers (target to be discovered)
iris.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [5]:
#converting the given data into pandas dataframe
df = pd.DataFrame(iris.data)
df.columns = iris.feature_names
#Because the classification was either 0 , 1 or 2 this is my approach to rename them (not the recommended method)
species = []
for i in iris.target:
    if i == 0:
        species.append('Iris-Setosa')
    elif i == 1:
        species.append('Iris-Versicolor')
    else:
        species.append('Iris-Virginica')
np_array_species = np.array(species)
#adding the classification of species to the pandas dataframe
df['classification'] = np_array_species
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),classification
0,5.1,3.5,1.4,0.2,Iris-Setosa
1,4.9,3.0,1.4,0.2,Iris-Setosa
2,4.7,3.2,1.3,0.2,Iris-Setosa
3,4.6,3.1,1.5,0.2,Iris-Setosa
4,5.0,3.6,1.4,0.2,Iris-Setosa


In [6]:
#seperating feature_name and target_name
X = df.loc[:,df.columns != 'classification']
y = df['classification']

In [7]:
#split the data into training and testing data using 70/30 principle
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state = 1)

In [8]:
#Fit the model into the training data
model = SVC()
model.fit(X_train,y_train)

SVC()

In [9]:
#Making predictions on validation dataset
prediction = model.predict(X_test)

In [10]:
#Accuracy of our SVC algorithm
accuracy_score(y_test,prediction) * 100

97.77777777777777

In [16]:
#pickle the model to be used in django
pd.to_pickle(model,"../pickles/iris_scv.pkl")

In [12]:
#unpickle model
model = pd.read_pickle("../iris/pickles/iris_scv.pkl")

In [13]:
#Testing with random units
sp_l = float(input('Enter sp l : '))
sp_2 = float(input('Enter sp l : '))
pt_1 = float(input('Enter sp l : '))
pt_2 = float(input('Enter sp l : '))

results = model.predict([[sp_l,sp_2,pt_1,pt_2]])
print(results)

Enter sp l : 2
Enter sp l : 2
Enter sp l : 2
Enter sp l : 2
['Iris-Setosa']


In [14]:
#Finding maximum values of columns from the dataframe
df.max()

sepal length (cm)               7.9
sepal width (cm)                4.4
petal length (cm)               6.9
petal width (cm)                2.5
classification       Iris-Virginica
dtype: object

In [15]:
#Finding mimimum values of columns from the dataframe
df.min()

sepal length (cm)            4.3
sepal width (cm)               2
petal length (cm)              1
petal width (cm)             0.1
classification       Iris-Setosa
dtype: object