In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sn
from sklearn.model_selection import train_test_split

In [None]:
df = pd.read_csv("../data/iris.csv")
df = df.drop("Id", axis=1)
df.head()

In [None]:
fig = df[df.Species == "Iris-setosa"].plot(
    kind="scatter", x="SepalLengthCm", y="SepalWidthCm", color="orange", label="Setosa"
)
df[df.Species == "Iris-versicolor"].plot(
    kind="scatter",
    x="SepalLengthCm",
    y="SepalWidthCm",
    color="blue",
    label="versicolor",
    ax=fig,
)
df[df.Species == "Iris-virginica"].plot(
    kind="scatter",
    x="SepalLengthCm",
    y="SepalWidthCm",
    color="green",
    label="virginica",
    ax=fig,
)
fig.set_xlabel("Sepal Length")
fig.set_ylabel("Sepal Width")
fig.set_title("Sepal Length VS Width")
fig = plt.gcf()
fig.set_size_inches(12, 8)
plt.show()

In [None]:
train, test = train_test_split(df, test_size=0.3)
features_columns = [
    "SepalLengthCm",
    "SepalWidthCm",
    "PetalLengthCm",
    "PetalWidthCm",
]
X_train = train[features_columns]
y_train = train.Species
X_test = test[features_columns]
y_test = test.Species

# Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn import metrics

logreg = LogisticRegression(max_iter=150)
logreg.fit(X_train, y_train)
pred = logreg.predict(X_test)
print("accuracy_score", metrics.accuracy_score(pred, y_test))

In [None]:
model = svm.SVC(C=1.0, kernel="rbf")
model.fit(X_train, y_train)

In [None]:
pred = model.predict(X_test)
print(metrics.classification_report(pred, y_test))

In [None]:
print('support_vectors', len(model.support_vectors_))

# GridsearchCV

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    "C": [0.1, 1, 10, 100, 1000],
    "gamma": [1, 0.1, 0.01, 0.001, 0.0001],
    "kernel": ["linear", "poly", "rbf", "sigmoid"],
}

grid = GridSearchCV(svm.SVC(), param_grid, refit=True, verbose=3)
grid.fit(X_train, y_train)

In [None]:
print(grid.best_estimator_)
print(grid.best_params_)
print(grid.best_score_)

In [None]:
from sklearn.metrics import confusion_matrix, classification_report

grid_pred = grid.predict(X_test)
sn.heatmap(confusion_matrix(y_test, grid_pred), annot=True)
print(classification_report(y_test, grid_pred))