# Import Necessary libraries

In [76]:
import numpy as np 
import pandas as pd 
from sklearn.metrics import confusion_matrix 
from sklearn.model_selection import train_test_split 
from sklearn.tree import DecisionTreeClassifier 
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report
from sklearn.svm import SVC

In [2]:
from sklearn.datasets import load_iris

# Load the dataset
iris = load_iris()

# Extract features (X) and target labels (y)
x = iris.data  # Features
y = iris.target

In [4]:
X_train, X_test, y_train, y_test = train_test_split( x,y, test_size = 0.3, random_state = 100)

# Decision tree modelling

#### Gini Index is a metric to measure how often a randomly chosen element would be incorrectly identified. It means an attribute with lower gini index should be preferred.

In [5]:
clf_gini = DecisionTreeClassifier(criterion = "gini",random_state = 100,max_depth=3, min_samples_leaf=5)

In [6]:
clf_entropy = DecisionTreeClassifier( criterion = "entropy", random_state = 100,  max_depth = 3, min_samples_leaf = 5)

In [7]:
clf_gini.fit(X_train, y_train)

In [8]:
clf_entropy.fit(X_train, y_train)

In [9]:
y_pred_gini = clf_gini.predict(X_test)
y_pred_entropy = clf_entropy.predict(X_test)

In [10]:
clf_entropy.score(X_train, y_train)

0.9619047619047619

In [11]:
clf_gini.score(X_train, y_train)

0.9619047619047619

# Random Forest modelling

In [12]:
from sklearn.ensemble import RandomForestClassifier

In [13]:
clf = RandomForestClassifier(n_jobs=2, random_state=0)

In [14]:
clf.fit(X_train, y_train)

In [15]:
clf.score(X_train, y_train)

1.0

# SVM modelling

In [77]:
from sklearn import svm

In [78]:
svm_linear = svm.SVC(kernel='linear', C=1, gamma=1) 

In [79]:
svm_linear.fit(X_train, y_train)

In [80]:
svm_linear.score(X_train, y_train)

0.9714285714285714

# Kernel options are linear, rbf, poly, sigmoid

In [81]:
svm_rbf = svm.SVC(kernel='rbf', C=1, gamma=1) 
svm_rbf.fit(X_train, y_train)
svm_rbf.score(X_train, y_train)

0.9809523809523809

In [82]:
svm_poly = svm.SVC(kernel='poly', C=1, gamma=1) 
svm_poly.fit(X_train, y_train)
svm_poly.score(X_train, y_train)

0.9904761904761905

In [83]:
svm_sig = svm.SVC(kernel='sigmoid', C=1, gamma=1) 
svm_sig.fit(X_train, y_train)
svm_sig.score(X_train, y_train)

0.37142857142857144

#### gamma: Higher the value of gamma, will try to exact fit the as per training data set i.e. generalization error and cause over-fitting problem.

In [84]:
svm_rbf_10 = svm.SVC(kernel='rbf', C=1, gamma=10) 
svm_rbf_10.fit(X_train, y_train)
svm_rbf_10.score(X_train, y_train)

1.0

In [85]:
svm_rbf_5 = svm.SVC(kernel='rbf', C=1, gamma=1.5) 
svm_rbf_5.fit(X_train, y_train)
svm_rbf_5.score(X_train, y_train)

0.9809523809523809

In [86]:
y_svm_rbf_5=svm_rbf_5.predict(X_test)

In [87]:
accuracy_score(y_test,y_svm_rbf_5)*100

95.55555555555556

#### C: Penalty parameter C of the error term. It also controls the trade off between smooth decision boundary and classifying the training points correctly.

In [88]:
svm_rbf_2 = svm.SVC(kernel='rbf', C=1, gamma=2) 
svm_rbf_2.fit(X_train, y_train)
svm_rbf_2.score(X_train, y_train)

0.9809523809523809

In [89]:
svm_rbf_c10 = svm.SVC(kernel='rbf', C=1.5, gamma=1) 
svm_rbf_c10.fit(X_train, y_train)
svm_rbf_c10.score(X_train, y_train)

0.9809523809523809

In [90]:
y_svm_rbf_c10=svm_rbf_c10.predict(X_test)
accuracy_score(y_test,y_svm_rbf_c10)*100

95.55555555555556

In [91]:
svm_rbf_c175 = svm.SVC(kernel='rbf', C=1.75, gamma=1) 
svm_rbf_c175.fit(X_train, y_train)
svm_rbf_c175.score(X_train, y_train)

0.9809523809523809