#### Import packages

In [None]:
import numpy as np
import pandas as pd

#### Load data

In [None]:
df = pd.read_csv('../datasets/iris.csv')

#### Data inspection and preparation

In [None]:
df.shape

In [None]:
df.head()

In [None]:
list(df.columns)

In [None]:
data = df.iloc[:, 1:5]
data

In [None]:
df.iloc[:, 5]

In [None]:
df.loc[df['Species'] == 'Iris-setosa', 'Species'] = 0
df.loc[df['Species'] == 'Iris-virginica', 'Species'] = 1
df.loc[df['Species'] == 'Iris-versicolor', 'Species'] = 2
labels = df.iloc[:, 5]
labels

In [None]:
data = data.to_numpy()
data

In [None]:
labels = labels.to_numpy()
labels

In [None]:
X, y = data, labels

#### Clustering

In [None]:
from sklearn.cluster import KMeans

In [None]:
kmeans = KMeans(n_clusters=3, random_state=0).fit(X)

In [None]:
kmeans.labels_

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.scatter(X[:,0], X[:,1], c=y)
plt.show()

In [None]:
plt.scatter(X[:,0], X[:,1], c=kmeans.labels_)
plt.show()

#### Dimensionality Reduction

In [None]:
from sklearn.decomposition import PCA

In [None]:
pca = PCA(n_components=2)

In [None]:
X_new = pca.fit_transform(X)

In [None]:
X_new.shape

In [None]:
kmeans = KMeans(n_clusters=3, random_state=0).fit(X_new)

In [None]:
plt.scatter(X_new[:,0], X_new[:,1], c=y)
plt.show()

In [None]:
plt.scatter(X_new[:,0], X_new[:,1], c=kmeans.labels_)
plt.show()

#### Classification

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
X_train

In [None]:
X_train.shape

In [None]:
X_train.dtype

In [None]:
y_train

In [None]:
y_train.shape

In [None]:
y_train.dtype

In [None]:
y_train = y_train.astype('int64')

In [None]:
X_test

In [None]:
X_test.shape

In [None]:
X_test.dtype

In [None]:
y_test

In [None]:
y_test.shape

In [None]:
y_test.dtype

In [None]:
y_test = y_test.astype('int64')

In [None]:
from sklearn.svm import SVC

In [None]:
svm = SVC()

In [None]:
svm.fit(X_train, y_train)

In [None]:
svm.score(X_test, y_test)

In [None]:
predictions = svm.predict(X_test)

In [None]:
def to_string(val):
    if val == 0:
        return 'Iris-setosa'
    elif val == 1:
        return 'Iris-virginica'
    else:
        return 'Iris-versicolor'

In [None]:
print('Predictions and real labels:')
print('\n')
for i in range(len(predictions)):
    print(to_string(y_test[i]), to_string(predictions[i]))
    print('\n')