In [1]:
# import some library that you need
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline
import os

In [2]:
# choose the path of environment
# os.chdir("../dataset")
df = pd.read_csv("../dataset/iris.data")

In [3]:
# select fifth from the first row
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
# change the name of columns
col_name = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
df.columns = col_name
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [5]:
# df.head(90)

In [6]:
# the unique name of species
df.species.unique()

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [7]:
# select the features into X
col = ['petal_length', 'petal_width']
X = df.loc[:, col]

In [8]:
# convert the species to number with map function and put target into y
species_to_num = {'Iris-setosa': 0,
                  'Iris-versicolor': 1,
                  'Iris-virginica': 2}
df['tmp'] = df['species'].map(species_to_num)
y = df['tmp']

# split (breaks into two parameters namely X and y)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.9, random_state=0)



In [9]:
# variabel with standarscaler function
sc_x = StandardScaler()

# Fit to data, then transform it
X_std_train = sc_x.fit_transform(X_train)

In [10]:
# process of training
clf = svm.SVC(kernel='linear', C=1.0, verbose=True)
clf.fit(X_std_train, y_train)

[LibSVM]

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=True)

In [11]:
# Use cross_validate to measure generalization error, cv is validation generator if none use 3
from sklearn.model_selection import cross_val_predict
y_train_pred = cross_val_predict(clf, X_std_train, y_train, cv=3)
# y_train_pred = clf.predict(X_std_train)
# print(y_train_pred)

[LibSVM][LibSVM][LibSVM]

In [12]:
# generate guessed data
confusion_matrix(y_train, y_train_pred)

array([[47,  0,  0],
       [ 0, 38,  4],
       [ 0,  2, 44]])

In [13]:
# to see the accuration of score clf
clf.score(X_std_train, y_train)

0.9703703703703703

In [14]:
from sklearn.svm import SVR
svr = SVR(kernel='linear')
svr.fit(X_train, y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
  gamma='auto_deprecated', kernel='linear', max_iter=-1, shrinking=True,
  tol=0.001, verbose=False)

In [15]:
# Use cross_validate to measure generalization error, cv is validation generator if none use 3
from sklearn.model_selection import cross_val_predict
y_train_pred = cross_val_predict(svr, X_std_train, y_train, cv=3)
# y_train_pred = clf.predict(X_std_train)
# print(y_train_pred)

In [16]:
from sklearn import tree
clf_tree = tree.DecisionTreeClassifier()
clf_tree.fit(X_std_train, y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [17]:
from sklearn.model_selection import cross_val_predict
y_train_pred = cross_val_predict(clf_tree, X_std_train, y_train, cv=3)

In [18]:
confusion_matrix(y_train, y_train_pred)

array([[47,  0,  0],
       [ 0, 39,  3],
       [ 0,  3, 43]])

In [19]:
clf_tree.score(X_std_train, y_train)

1.0

In [51]:
import pickle
import datetime

dstr= datetime.datetime.now().strftime("%y%m%d%H%M%S")

# save to file in the current working directory
pkl_filename = "iris.sklearn_"+dstr+".pkl"  
with open(pkl_filename, 'wb') as file:  
    pickle.dump(clf, file)

# load from file
with open(pkl_filename, 'rb') as file:  
    pickle_model = pickle.load(file)

In [60]:
svm_clf = pd.read_pickle('../weight/iris.sklearn.pkl')

In [24]:
df.head(150)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,tmp
0,5.1,3.5,1.4,0.2,Iris-setosa,0
1,4.9,3.0,1.4,0.2,Iris-setosa,0
2,4.7,3.2,1.3,0.2,Iris-setosa,0
3,4.6,3.1,1.5,0.2,Iris-setosa,0
4,5.0,3.6,1.4,0.2,Iris-setosa,0
5,5.4,3.9,1.7,0.4,Iris-setosa,0
6,4.6,3.4,1.4,0.3,Iris-setosa,0
7,5.0,3.4,1.5,0.2,Iris-setosa,0
8,4.4,2.9,1.4,0.2,Iris-setosa,0
9,4.9,3.1,1.5,0.1,Iris-setosa,0


In [25]:
def predict_iris(sepal_length, sepal_width, petal_length, petal_width):
    test = np.array([[ sepal_length, sepal_width, petal_length, petal_width ]])
    clf = pd.read_pickle('../weight/iris.sklearn.pkl')
    pred = clf.predict(test)
    return pred[0]

predict_iris(5.9, 3.0, 5.1, 1.8)

2