In [11]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score

In [12]:
# read the dataset
with open("flower_dataset.csv") as f:
    lines = f.readlines()
    data = [line.strip().split(",") for line in lines]

# head: Petal Length,Petal Width,Type
data = data[1:]

data


[['2.882026172983832', '2.2000786041836116', '0'],
 ['2.4893689920528694', '3.120446599600729', '0'],
 ['2.9337789950749835', '1.5113610600617946', '0'],
 ['2.4750442087627946', '1.9243213958511511', '0'],
 ['1.948390574103221', '2.205299250969186', '0'],
 ['2.072021785580439', '2.7271367534814877', '0'],
 ['2.3805188625734965', '2.060837508246414', '0'],
 ['2.221931616372713', '2.1668371636871333', '0'],
 ['2.747039536578803', '1.8974208681170996', '0'],
 ['2.1565338508254506', '1.5729521303491376', '0'],
 ['0.7235050920829607', '2.3268092977201804', '0'],
 ['2.432218099429753', '1.628917489796779', '0'],
 ['3.1348773119938036', '1.2728171627006177', '0'],
 ['2.022879258650723', '1.906408074987083', '0'],
 ['2.766389607179229', '2.7346793849501427', '0'],
 ['2.077473712848458', '2.1890812598010867', '0'],
 ['1.5561071261849437', '1.0096017658880365', '0'],
 ['1.8260439253369236', '2.07817448455199', '0'],
 ['2.6151453403638603', '2.6011899243922056', '0'],
 ['1.8063365912960239', '1.8

In [13]:
# split the data into features and labels
X = [[float(x[0]), float(x[1])] for x in data]
y = [x[2] for x in data]

# convert Ys to integers
y = [int(x) for x in y]

# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

# train the model
svm = SVC(kernel="linear")
svm.fit(X_train, y_train)


In [14]:
# make predictions
y_pred = svm.predict(X_test)

y_pred

array([1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
       1, 0, 1, 1, 0, 1, 0, 0])

In [15]:
# calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)

accuracy

1.0

In [16]:
# cross validation
cross_val_score(svm, X, y, cv=5)


array([1., 1., 1., 1., 1.])

In [17]:
# cross validation with accuracy
cross_val_score(svm, X, y, cv=5, scoring="accuracy")

array([1., 1., 1., 1., 1.])

In [18]:
# generate the final dataset
with open("flower_dataset_results.csv", "w") as f:
    f.write("Petal Length,Petal Width,Type,Predicted\n")
    for i in range(len(X)):
        f.write(f"{X[i][0]},{X[i][1]},{y[i]},{svm.predict([X[i]])[0]}\n")