In [1]:
import numpy as np
from scipy.io import arff
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
data, meta = arff.loadarff('dataset/iris.arff')

In [3]:
df = pd.DataFrame(data)
df['class'] = df['class'].str.decode('utf-8')

In [4]:
df

Unnamed: 0,sepallength,sepalwidth,petallength,petalwidth,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [5]:
X = df.drop(columns=['class'])
y = df['class']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [7]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# k-NN Classifier

In [11]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# k-NN Classifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)

accuracy_knn = accuracy_score(y_test, y_pred_knn)
print(f'k-NN Accuracy: {accuracy_knn:.4f}')

k-NN Accuracy: 1.0000


# Decision Tree Classifier

In [12]:
from sklearn.tree import DecisionTreeClassifier

# Decision Tree Classifier
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)

accuracy_dt = accuracy_score(y_test, y_pred_dt)
print(f'Decision Tree Accuracy: {accuracy_dt:.4f}')


Decision Tree Accuracy: 1.0000


# Neural Network

In [13]:
from sklearn.neural_network import MLPClassifier

# Neural Network (MLP)
mlp = MLPClassifier(hidden_layer_sizes=(5,), max_iter=1000, random_state=42)
mlp.fit(X_train, y_train)
y_pred_mlp = mlp.predict(X_test)

accuracy_mlp = accuracy_score(y_test, y_pred_mlp)
print(f'Neural Network Accuracy: {accuracy_mlp:.4f}')


Neural Network Accuracy: 1.0000




In [14]:
# Print accuracies for comparison
print(f'Accuracy Comparison:')
print(f'k-NN Accuracy: {accuracy_knn:.4f}')
print(f'Decision Tree Accuracy: {accuracy_dt:.4f}')
print(f'Neural Network Accuracy: {accuracy_mlp:.4f}')


Accuracy Comparison:
k-NN Accuracy: 1.0000
Decision Tree Accuracy: 1.0000
Neural Network Accuracy: 1.0000


# Feature selection

In [15]:
# Drop one feature (e.g., Sepal Width)
X_train_reduced = np.delete(X_train, 1, axis=1)  # Remove second feature (Sepal Width)
X_test_reduced = np.delete(X_test, 1, axis=1)

# Re-train and evaluate the models
knn.fit(X_train_reduced, y_train)
y_pred_knn_reduced = knn.predict(X_test_reduced)
accuracy_knn_reduced = accuracy_score(y_test, y_pred_knn_reduced)

dt.fit(X_train_reduced, y_train)
y_pred_dt_reduced = dt.predict(X_test_reduced)
accuracy_dt_reduced = accuracy_score(y_test, y_pred_dt_reduced)

mlp.fit(X_train_reduced, y_train)
y_pred_mlp_reduced = mlp.predict(X_test_reduced)
accuracy_mlp_reduced = accuracy_score(y_test, y_pred_mlp_reduced)

# Print reduced feature accuracies
print(f'k-NN Accuracy (Reduced Features): {accuracy_knn_reduced:.4f}')
print(f'Decision Tree Accuracy (Reduced Features): {accuracy_dt_reduced:.4f}')
print(f'Neural Network Accuracy (Reduced Features): {accuracy_mlp_reduced:.4f}')


k-NN Accuracy (Reduced Features): 1.0000
Decision Tree Accuracy (Reduced Features): 1.0000
Neural Network Accuracy (Reduced Features): 1.0000




# Hyperparameter tuning

In [16]:
# Experiment with different number of hidden units
mlp_2 = MLPClassifier(hidden_layer_sizes=(10,), max_iter=1000, random_state=42)
mlp_2.fit(X_train, y_train)
y_pred_mlp_2 = mlp_2.predict(X_test)

accuracy_mlp_2 = accuracy_score(y_test, y_pred_mlp_2)
print(f'Neural Network (10 neurons) Accuracy: {accuracy_mlp_2:.4f}')


Neural Network (10 neurons) Accuracy: 1.0000


