In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

## *Data set loading*

In [2]:
iris = sns.load_dataset("iris")

## *Information about the data*

In [3]:
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


## *Splitting the data into a training set and testing set*

*Creating the set for training and testing the data*

In [46]:
from sklearn.model_selection import train_test_split

In [47]:
X = iris.drop('species',axis=1)
y = iris['species']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

### *SVM*

In [48]:
from sklearn.svm import SVC

*Model training*

In [49]:
model = SVC()

model.fit(X_train,y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

*Model Prediction & Evaluation*

In [50]:
from sklearn.metrics import confusion_matrix, classification_report

In [51]:
predictions = model.predict(X_test)

*Results*

In [52]:
print('Confusion Matrix:','\n','\n', confusion_matrix(y_test,predictions))
print('\n')
print('Classification Report:','\n','\n',classification_report(y_test,predictions))

Confusion Matrix: 
 
 [[14  0  0]
 [ 0 13  0]
 [ 0  1 17]]


Classification Report: 
 
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        14
  versicolor       0.93      1.00      0.96        13
   virginica       1.00      0.94      0.97        18

    accuracy                           0.98        45
   macro avg       0.98      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45



*The outputs are great. We have results over 90 percent.*
*The SVM model worked really well.*

### *GridSearch*

In [53]:
from sklearn.model_selection import GridSearchCV

In [55]:
param_grid = {'C': [0.1,1, 10, 100], 'gamma': [1,0.1,0.01,0.001]}

*Create a GridSearchCV object and fit it to the training data.*

In [56]:
grid = GridSearchCV(SVC(),param_grid,refit=True,verbose=2)

grid.fit(X_train,y_train)

Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV] C=0.1, gamma=1 ..................................................
[CV] ................................... C=0.1, gamma=1, total=   0.0s
[CV] C=0.1, gamma=1 ..................................................
[CV] ................................... C=0.1, gamma=1, total=   0.0s
[CV] C=0.1, gamma=1 ..................................................
[CV] ................................... C=0.1, gamma=1, total=   0.0s
[CV] C=0.1, gamma=1 ..................................................
[CV] ................................... C=0.1, gamma=1, total=   0.0s
[CV] C=0.1, gamma=1 ..................................................
[CV] ................................... C=0.1, gamma=1, total=   0.0s
[CV] C=0.1, gamma=0.1 ................................................
[CV] ................................. C=0.1, gamma=0.1, total=   0.0s
[CV] C=0.1, gamma=0.1 ................................................
[CV] ...........

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s



[CV] C=10, gamma=1 ...................................................
[CV] .................................... C=10, gamma=1, total=   0.0s
[CV] C=10, gamma=0.1 .................................................
[CV] .................................. C=10, gamma=0.1, total=   0.0s
[CV] C=10, gamma=0.1 .................................................
[CV] .................................. C=10, gamma=0.1, total=   0.0s
[CV] C=10, gamma=0.1 .................................................
[CV] .................................. C=10, gamma=0.1, total=   0.0s
[CV] C=10, gamma=0.1 .................................................
[CV] .................................. C=10, gamma=0.1, total=   0.0s
[CV] C=10, gamma=0.1 .................................................
[CV] .................................. C=10, gamma=0.1, total=   0.0s
[CV] C=10, gamma=0.01 ................................................
[CV] ................................. C=10, gamma=0.01, total=   0.0s
[CV] 

[Parallel(n_jobs=1)]: Done  80 out of  80 | elapsed:    0.2s finished


GridSearchCV(cv=None, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [0.1, 1, 10, 100],
                         'gamma': [1, 0.1, 0.01, 0.001]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=2)

*Model Prediction & Evaluation*

In [57]:
grid_predictions = grid.predict(X_test)

In [58]:
print('Confusion Matrix:','\n','\n', confusion_matrix(y_test,grid_predictions))
print('\n')
print('Classification Report:','\n','\n',classification_report(y_test,grid_predictions))

Confusion Matrix: 
 
 [[14  0  0]
 [ 0 12  1]
 [ 0  1 17]]


Classification Report: 
 
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        14
  versicolor       0.92      0.92      0.92        13
   virginica       0.94      0.94      0.94        18

    accuracy                           0.96        45
   macro avg       0.96      0.96      0.96        45
weighted avg       0.96      0.96      0.96        45



## *Decision Tree*

*Creating the set for training and testing the data*

In [34]:
from sklearn.tree import DecisionTreeClassifier

In [36]:
X = iris.drop('species',axis=1)
y = iris['species']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state=101)

*Model training*

In [37]:
dtree = DecisionTreeClassifier()

dtree.fit(X_train,y_train)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

*Model Prediction & Evaluation*

In [38]:
dtree_pred = dtree.predict(X_test)

*Results*

In [39]:
print('Confusion Matrix:','\n','\n', confusion_matrix(y_test,dtree_pred))
print('\n')
print('Classification Report:','\n','\n',classification_report(y_test,dtree_pred))

Confusion Matrix: 
 
 [[13  0  0]
 [ 0 19  1]
 [ 0  1 11]]


Classification Report: 
 
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        13
  versicolor       0.95      0.95      0.95        20
   virginica       0.92      0.92      0.92        12

    accuracy                           0.96        45
   macro avg       0.96      0.96      0.96        45
weighted avg       0.96      0.96      0.96        45



### *Random Forests*

In [40]:
from sklearn.ensemble import RandomForestClassifier

*Model training*

In [41]:
rfc = RandomForestClassifier(n_estimators=200)

rfc.fit(X_train,y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=200,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

*Model Prediction & Evaluation*

In [42]:
rfc_pred = rfc.predict(X_test)

*Results*

In [43]:
print('Confusion Matrix:','\n','\n', confusion_matrix(y_test,rfc_pred))
print('\n')
print('Classification Report:','\n','\n',classification_report(y_test,rfc_pred))

Confusion Matrix: 
 
 [[13  0  0]
 [ 0 19  1]
 [ 0  1 11]]


Classification Report: 
 
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        13
  versicolor       0.95      0.95      0.95        20
   virginica       0.92      0.92      0.92        12

    accuracy                           0.96        45
   macro avg       0.96      0.96      0.96        45
weighted avg       0.96      0.96      0.96        45



*Random Forest model worked well too, but SVM model had a better overall perfomance result for this particular case*